[HUDI-3664] Fixing Column Stats Index composition (#5181)
Co-authored-by: Sagar Sumit <sagarsumit09@gmail.com>
This commit is contained in:
@@ -19,6 +19,7 @@
|
||||
package org.apache.hudi.avro;
|
||||
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.testutils.SchemaTestUtil;
|
||||
import org.apache.hudi.exception.SchemaCompatibilityException;
|
||||
|
||||
import org.apache.avro.JsonProperties;
|
||||
@@ -27,12 +28,14 @@ import org.apache.avro.generic.GenericData;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.math.BigDecimal;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.apache.hudi.avro.HoodieAvroUtils.getNestedFieldSchemaFromWriteSchema;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
import static org.junit.jupiter.api.Assertions.assertNull;
|
||||
@@ -88,6 +91,12 @@ public class TestHoodieAvroUtils {
|
||||
+ "{\"name\":\"decimal_col\",\"type\":[\"null\","
|
||||
+ "{\"type\":\"bytes\",\"logicalType\":\"decimal\",\"precision\":8,\"scale\":4}],\"default\":null}]}";
|
||||
|
||||
private static String SCHEMA_WITH_NESTED_FIELD = "{\"name\":\"MyClass\",\"type\":\"record\",\"namespace\":\"com.acme.avro\",\"fields\":["
|
||||
+ "{\"name\":\"firstname\",\"type\":\"string\"},"
|
||||
+ "{\"name\":\"lastname\",\"type\":\"string\"},"
|
||||
+ "{\"name\":\"student\",\"type\":{\"name\":\"student\",\"type\":\"record\",\"fields\":["
|
||||
+ "{\"name\":\"firstname\",\"type\":[\"null\" ,\"string\"],\"default\": null},{\"name\":\"lastname\",\"type\":[\"null\" ,\"string\"],\"default\": null}]}}]}";
|
||||
|
||||
@Test
|
||||
public void testPropsPresent() {
|
||||
Schema schema = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(EXAMPLE_SCHEMA));
|
||||
@@ -248,7 +257,7 @@ public class TestHoodieAvroUtils {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetNestedFieldValWithDecimalFiled() {
|
||||
public void testGetNestedFieldValWithDecimalField() {
|
||||
GenericRecord rec = new GenericData.Record(new Schema.Parser().parse(SCHEMA_WITH_DECIMAL_FIELD));
|
||||
rec.put("key_col", "key");
|
||||
BigDecimal bigDecimal = new BigDecimal("1234.5678");
|
||||
@@ -264,4 +273,36 @@ public class TestHoodieAvroUtils {
|
||||
assertEquals(0, buffer.position());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetNestedFieldSchema() throws IOException {
|
||||
Schema schema = SchemaTestUtil.getEvolvedSchema();
|
||||
GenericRecord rec = new GenericData.Record(schema);
|
||||
rec.put("field1", "key1");
|
||||
rec.put("field2", "val1");
|
||||
rec.put("name", "val2");
|
||||
rec.put("favorite_number", 2);
|
||||
// test simple field schema
|
||||
assertEquals(Schema.create(Schema.Type.STRING), getNestedFieldSchemaFromWriteSchema(rec.getSchema(), "field1"));
|
||||
|
||||
GenericRecord rec2 = new GenericData.Record(schema);
|
||||
rec2.put("field1", "key1");
|
||||
rec2.put("field2", "val1");
|
||||
rec2.put("name", "val2");
|
||||
rec2.put("favorite_number", 12);
|
||||
// test comparison of non-string type
|
||||
assertEquals(-1, GenericData.get().compare(rec.get("favorite_number"), rec2.get("favorite_number"), getNestedFieldSchemaFromWriteSchema(rec.getSchema(), "favorite_number")));
|
||||
|
||||
// test nested field schema
|
||||
Schema nestedSchema = new Schema.Parser().parse(SCHEMA_WITH_NESTED_FIELD);
|
||||
GenericRecord rec3 = new GenericData.Record(nestedSchema);
|
||||
rec3.put("firstname", "person1");
|
||||
rec3.put("lastname", "person2");
|
||||
GenericRecord studentRecord = new GenericData.Record(rec3.getSchema().getField("student").schema());
|
||||
studentRecord.put("firstname", "person1");
|
||||
studentRecord.put("lastname", "person2");
|
||||
rec3.put("student", studentRecord);
|
||||
|
||||
assertEquals(Schema.create(Schema.Type.STRING), getNestedFieldSchemaFromWriteSchema(rec3.getSchema(), "student.firstname"));
|
||||
assertEquals(Schema.create(Schema.Type.STRING), getNestedFieldSchemaFromWriteSchema(nestedSchema, "student.firstname"));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1749,40 +1749,39 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
||||
|
||||
FileCreateUtils.createDeltaCommit(basePath, "100", fs);
|
||||
|
||||
HoodieLogFileReader reader = new HoodieLogFileReader(fs, new HoodieLogFile(writer.getLogFile().getPath(),
|
||||
fs.getFileStatus(writer.getLogFile().getPath()).getLen()), SchemaTestUtil.getSimpleSchema(),
|
||||
bufferSize, readBlocksLazily, true);
|
||||
HoodieLogFile logFile = new HoodieLogFile(writer.getLogFile().getPath(), fs.getFileStatus(writer.getLogFile().getPath()).getLen());
|
||||
try (HoodieLogFileReader reader = new HoodieLogFileReader(fs, logFile, SchemaTestUtil.getSimpleSchema(), bufferSize, readBlocksLazily, true)) {
|
||||
|
||||
assertTrue(reader.hasPrev(), "Last block should be available");
|
||||
HoodieLogBlock prevBlock = reader.prev();
|
||||
HoodieDataBlock dataBlockRead = (HoodieDataBlock) prevBlock;
|
||||
assertTrue(reader.hasPrev(), "Last block should be available");
|
||||
HoodieLogBlock prevBlock = reader.prev();
|
||||
HoodieDataBlock dataBlockRead = (HoodieDataBlock) prevBlock;
|
||||
|
||||
List<IndexedRecord> recordsRead1 = getRecords(dataBlockRead);
|
||||
assertEquals(copyOfRecords3.size(), recordsRead1.size(),
|
||||
"Third records size should be equal to the written records size");
|
||||
assertEquals(copyOfRecords3, recordsRead1,
|
||||
"Both records lists should be the same. (ordering guaranteed)");
|
||||
List<IndexedRecord> recordsRead1 = getRecords(dataBlockRead);
|
||||
assertEquals(copyOfRecords3.size(), recordsRead1.size(),
|
||||
"Third records size should be equal to the written records size");
|
||||
assertEquals(copyOfRecords3, recordsRead1,
|
||||
"Both records lists should be the same. (ordering guaranteed)");
|
||||
|
||||
assertTrue(reader.hasPrev(), "Second block should be available");
|
||||
prevBlock = reader.prev();
|
||||
dataBlockRead = (HoodieDataBlock) prevBlock;
|
||||
List<IndexedRecord> recordsRead2 = getRecords(dataBlockRead);
|
||||
assertEquals(copyOfRecords2.size(), recordsRead2.size(),
|
||||
"Read records size should be equal to the written records size");
|
||||
assertEquals(copyOfRecords2, recordsRead2,
|
||||
"Both records lists should be the same. (ordering guaranteed)");
|
||||
assertTrue(reader.hasPrev(), "Second block should be available");
|
||||
prevBlock = reader.prev();
|
||||
dataBlockRead = (HoodieDataBlock) prevBlock;
|
||||
List<IndexedRecord> recordsRead2 = getRecords(dataBlockRead);
|
||||
assertEquals(copyOfRecords2.size(), recordsRead2.size(),
|
||||
"Read records size should be equal to the written records size");
|
||||
assertEquals(copyOfRecords2, recordsRead2,
|
||||
"Both records lists should be the same. (ordering guaranteed)");
|
||||
|
||||
assertTrue(reader.hasPrev(), "First block should be available");
|
||||
prevBlock = reader.prev();
|
||||
dataBlockRead = (HoodieDataBlock) prevBlock;
|
||||
List<IndexedRecord> recordsRead3 = getRecords(dataBlockRead);
|
||||
assertEquals(copyOfRecords1.size(), recordsRead3.size(),
|
||||
"Read records size should be equal to the written records size");
|
||||
assertEquals(copyOfRecords1, recordsRead3,
|
||||
"Both records lists should be the same. (ordering guaranteed)");
|
||||
assertTrue(reader.hasPrev(), "First block should be available");
|
||||
prevBlock = reader.prev();
|
||||
dataBlockRead = (HoodieDataBlock) prevBlock;
|
||||
List<IndexedRecord> recordsRead3 = getRecords(dataBlockRead);
|
||||
assertEquals(copyOfRecords1.size(), recordsRead3.size(),
|
||||
"Read records size should be equal to the written records size");
|
||||
assertEquals(copyOfRecords1, recordsRead3,
|
||||
"Both records lists should be the same. (ordering guaranteed)");
|
||||
|
||||
assertFalse(reader.hasPrev());
|
||||
reader.close();
|
||||
assertFalse(reader.hasPrev());
|
||||
}
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@@ -1830,19 +1829,20 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
||||
writer.close();
|
||||
|
||||
// First round of reads - we should be able to read the first block and then EOF
|
||||
HoodieLogFileReader reader =
|
||||
new HoodieLogFileReader(fs, new HoodieLogFile(writer.getLogFile().getPath(),
|
||||
fs.getFileStatus(writer.getLogFile().getPath()).getLen()), schema, bufferSize, readBlocksLazily, true);
|
||||
HoodieLogFile logFile = new HoodieLogFile(writer.getLogFile().getPath(), fs.getFileStatus(writer.getLogFile().getPath()).getLen());
|
||||
|
||||
assertTrue(reader.hasPrev(), "Last block should be available");
|
||||
HoodieLogBlock block = reader.prev();
|
||||
assertTrue(block instanceof HoodieDataBlock, "Last block should be datablock");
|
||||
try (HoodieLogFileReader reader =
|
||||
new HoodieLogFileReader(fs, logFile, schema, bufferSize, readBlocksLazily, true)) {
|
||||
|
||||
assertTrue(reader.hasPrev(), "Last block should be available");
|
||||
assertThrows(CorruptedLogFileException.class, () -> {
|
||||
reader.prev();
|
||||
});
|
||||
reader.close();
|
||||
assertTrue(reader.hasPrev(), "Last block should be available");
|
||||
HoodieLogBlock block = reader.prev();
|
||||
assertTrue(block instanceof HoodieDataBlock, "Last block should be datablock");
|
||||
|
||||
assertTrue(reader.hasPrev(), "Last block should be available");
|
||||
assertThrows(CorruptedLogFileException.class, () -> {
|
||||
reader.prev();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@@ -1882,28 +1882,28 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
||||
|
||||
FileCreateUtils.createDeltaCommit(basePath, "100", fs);
|
||||
|
||||
HoodieLogFileReader reader = new HoodieLogFileReader(fs, new HoodieLogFile(writer.getLogFile().getPath(),
|
||||
fs.getFileStatus(writer.getLogFile().getPath()).getLen()), SchemaTestUtil.getSimpleSchema(),
|
||||
bufferSize, readBlocksLazily, true);
|
||||
HoodieLogFile logFile = new HoodieLogFile(writer.getLogFile().getPath(), fs.getFileStatus(writer.getLogFile().getPath()).getLen());
|
||||
try (HoodieLogFileReader reader =
|
||||
new HoodieLogFileReader(fs, logFile, SchemaTestUtil.getSimpleSchema(), bufferSize, readBlocksLazily, true)) {
|
||||
|
||||
assertTrue(reader.hasPrev(), "Third block should be available");
|
||||
reader.moveToPrev();
|
||||
assertTrue(reader.hasPrev(), "Third block should be available");
|
||||
reader.moveToPrev();
|
||||
|
||||
assertTrue(reader.hasPrev(), "Second block should be available");
|
||||
reader.moveToPrev();
|
||||
assertTrue(reader.hasPrev(), "Second block should be available");
|
||||
reader.moveToPrev();
|
||||
|
||||
// After moving twice, this last reader.prev() should read the First block written
|
||||
assertTrue(reader.hasPrev(), "First block should be available");
|
||||
HoodieLogBlock prevBlock = reader.prev();
|
||||
HoodieDataBlock dataBlockRead = (HoodieDataBlock) prevBlock;
|
||||
List<IndexedRecord> recordsRead = getRecords(dataBlockRead);
|
||||
assertEquals(copyOfRecords1.size(), recordsRead.size(),
|
||||
"Read records size should be equal to the written records size");
|
||||
assertEquals(copyOfRecords1, recordsRead,
|
||||
"Both records lists should be the same. (ordering guaranteed)");
|
||||
// After moving twice, this last reader.prev() should read the First block written
|
||||
assertTrue(reader.hasPrev(), "First block should be available");
|
||||
HoodieLogBlock prevBlock = reader.prev();
|
||||
HoodieDataBlock dataBlockRead = (HoodieDataBlock) prevBlock;
|
||||
List<IndexedRecord> recordsRead = getRecords(dataBlockRead);
|
||||
assertEquals(copyOfRecords1.size(), recordsRead.size(),
|
||||
"Read records size should be equal to the written records size");
|
||||
assertEquals(copyOfRecords1, recordsRead,
|
||||
"Both records lists should be the same. (ordering guaranteed)");
|
||||
|
||||
assertFalse(reader.hasPrev());
|
||||
reader.close();
|
||||
assertFalse(reader.hasPrev());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
|
||||
@@ -99,15 +99,6 @@ public class FileCreateUtils {
|
||||
return String.format("%s_%s_%s%s%s.%s", fileId, WRITE_TOKEN, instantTime, fileExtension, HoodieTableMetaClient.MARKER_EXTN, ioType);
|
||||
}
|
||||
|
||||
private static void createMetaFile(String basePath, String instantTime, String suffix) throws IOException {
|
||||
Path parentPath = Paths.get(basePath, HoodieTableMetaClient.METAFOLDER_NAME);
|
||||
Files.createDirectories(parentPath);
|
||||
Path metaFilePath = parentPath.resolve(instantTime + suffix);
|
||||
if (Files.notExists(metaFilePath)) {
|
||||
Files.createFile(metaFilePath);
|
||||
}
|
||||
}
|
||||
|
||||
private static void createMetaFile(String basePath, String instantTime, String suffix, FileSystem fs) throws IOException {
|
||||
org.apache.hadoop.fs.Path parentPath = new org.apache.hadoop.fs.Path(basePath, HoodieTableMetaClient.METAFOLDER_NAME);
|
||||
if (!fs.exists(parentPath)) {
|
||||
@@ -119,12 +110,20 @@ public class FileCreateUtils {
|
||||
}
|
||||
}
|
||||
|
||||
private static void createMetaFile(String basePath, String instantTime, String suffix) throws IOException {
|
||||
createMetaFile(basePath, instantTime, suffix, "".getBytes());
|
||||
}
|
||||
|
||||
private static void createMetaFile(String basePath, String instantTime, String suffix, byte[] content) throws IOException {
|
||||
Path parentPath = Paths.get(basePath, HoodieTableMetaClient.METAFOLDER_NAME);
|
||||
Files.createDirectories(parentPath);
|
||||
Path metaFilePath = parentPath.resolve(instantTime + suffix);
|
||||
if (Files.notExists(metaFilePath)) {
|
||||
Files.write(metaFilePath, content);
|
||||
if (content.length == 0) {
|
||||
Files.createFile(metaFilePath);
|
||||
} else {
|
||||
Files.write(metaFilePath, content);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -117,8 +117,12 @@ import static org.apache.hudi.common.util.StringUtils.EMPTY_STRING;
|
||||
|
||||
public class HoodieTestTable {
|
||||
|
||||
public static final String PHONY_TABLE_SCHEMA =
|
||||
"{\"namespace\": \"org.apache.hudi.avro.model\", \"type\": \"record\", \"name\": \"PhonyRecord\", \"fields\": []}";
|
||||
|
||||
private static final Logger LOG = LogManager.getLogger(HoodieTestTable.class);
|
||||
private static final Random RANDOM = new Random();
|
||||
|
||||
protected static HoodieTestTableState testTableState;
|
||||
private final List<String> inflightCommits = new ArrayList<>();
|
||||
|
||||
@@ -215,7 +219,7 @@ public class HoodieTestTable {
|
||||
writeStats.addAll(generateHoodieWriteStatForPartitionLogFiles(testTableState.getPartitionToLogFileInfoMap(commitTime), commitTime, bootstrap));
|
||||
}
|
||||
Map<String, String> extraMetadata = createImmutableMap("test", "test");
|
||||
return buildMetadata(writeStats, partitionToReplaceFileIds, Option.of(extraMetadata), operationType, EMPTY_STRING, action);
|
||||
return buildMetadata(writeStats, partitionToReplaceFileIds, Option.of(extraMetadata), operationType, PHONY_TABLE_SCHEMA, action);
|
||||
}
|
||||
|
||||
public HoodieTestTable moveInflightCommitToComplete(String instantTime, HoodieCommitMetadata metadata) throws IOException {
|
||||
@@ -779,7 +783,7 @@ public class HoodieTestTable {
|
||||
this.withBaseFilesInPartition(partition, testTableState.getPartitionToBaseFileInfoMap(commitTime).get(partition));
|
||||
}
|
||||
HoodieReplaceCommitMetadata replaceMetadata =
|
||||
(HoodieReplaceCommitMetadata) buildMetadata(writeStats, partitionToReplaceFileIds, Option.empty(), CLUSTER, EMPTY_STRING,
|
||||
(HoodieReplaceCommitMetadata) buildMetadata(writeStats, partitionToReplaceFileIds, Option.empty(), CLUSTER, PHONY_TABLE_SCHEMA,
|
||||
REPLACE_COMMIT_ACTION);
|
||||
addReplaceCommit(commitTime, Option.empty(), Option.empty(), replaceMetadata);
|
||||
return replaceMetadata;
|
||||
|
||||
Reference in New Issue
Block a user