[HUDI-2656] Generalize HoodieIndex for flexible record data type (#3893)
Co-authored-by: Raymond Xu <2701446+xushiyan@users.noreply.github.com>
This commit is contained in:
@@ -19,6 +19,7 @@
|
||||
|
||||
package org.apache.hudi.common.fs.inline;
|
||||
|
||||
import org.apache.hudi.common.model.HoodieAvroRecord;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.testutils.FileSystemTestUtils;
|
||||
import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
|
||||
@@ -144,7 +145,8 @@ public class TestParquetInLining {
|
||||
List<HoodieRecord> hoodieRecords = dataGenerator.generateInsertsWithHoodieAvroPayload(commitTime, 10);
|
||||
List<GenericRecord> toReturn = new ArrayList<>();
|
||||
for (HoodieRecord record : hoodieRecords) {
|
||||
toReturn.add((GenericRecord) record.getData().getInsertValue(HoodieTestDataGenerator.AVRO_SCHEMA).get());
|
||||
toReturn.add((GenericRecord) ((HoodieAvroRecord) record).getData()
|
||||
.getInsertValue(HoodieTestDataGenerator.AVRO_SCHEMA).get());
|
||||
}
|
||||
return toReturn;
|
||||
}
|
||||
|
||||
@@ -18,18 +18,10 @@
|
||||
|
||||
package org.apache.hudi.common.functional;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.avro.generic.IndexedRecord;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hbase.io.compress.Compression;
|
||||
import org.apache.hudi.avro.HoodieAvroUtils;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.model.HoodieArchivedLogFile;
|
||||
import org.apache.hudi.common.model.HoodieAvroRecord;
|
||||
import org.apache.hudi.common.model.HoodieKey;
|
||||
import org.apache.hudi.common.model.HoodieLogFile;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
@@ -59,6 +51,16 @@ import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.common.util.collection.ExternalSpillableMap;
|
||||
import org.apache.hudi.exception.CorruptedLogFileException;
|
||||
import org.apache.hudi.exception.HoodieIOException;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.avro.generic.IndexedRecord;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hbase.io.compress.Compression;
|
||||
import org.apache.parquet.hadoop.metadata.CompressionCodecName;
|
||||
import org.apache.parquet.hadoop.util.counters.BenchmarkCounter;
|
||||
import org.junit.jupiter.api.AfterAll;
|
||||
@@ -585,7 +587,8 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
||||
|
||||
List<IndexedRecord> scannedRecords = new ArrayList<>();
|
||||
for (HoodieRecord record : scanner) {
|
||||
scannedRecords.add((IndexedRecord) record.getData().getInsertValue(schema).get());
|
||||
scannedRecords.add((IndexedRecord)
|
||||
((HoodieAvroRecord) record).getData().getInsertValue(schema).get());
|
||||
}
|
||||
|
||||
assertEquals(scannedRecords.size(), allRecords.stream().mapToLong(Collection::size).sum(),
|
||||
|
||||
@@ -44,7 +44,7 @@ public class TestHoodieRecord {
|
||||
public void setUp() throws Exception {
|
||||
final List<IndexedRecord> indexedRecords = SchemaTestUtil.generateHoodieTestRecords(0, 1);
|
||||
final List<HoodieRecord> hoodieRecords =
|
||||
indexedRecords.stream().map(r -> new HoodieRecord(new HoodieKey(UUID.randomUUID().toString(), "0000/00/00"),
|
||||
indexedRecords.stream().map(r -> new HoodieAvroRecord(new HoodieKey(UUID.randomUUID().toString(), "0000/00/00"),
|
||||
new AvroBinaryTestPayload(Option.of((GenericRecord) r)))).collect(Collectors.toList());
|
||||
hoodieRecord = hoodieRecords.get(0);
|
||||
}
|
||||
|
||||
@@ -23,6 +23,7 @@ import org.apache.hudi.avro.HoodieAvroUtils;
|
||||
import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.model.HoodieAvroPayload;
|
||||
import org.apache.hudi.common.model.HoodieAvroRecord;
|
||||
import org.apache.hudi.common.model.HoodieCommitMetadata;
|
||||
import org.apache.hudi.common.model.HoodieKey;
|
||||
import org.apache.hudi.common.model.HoodiePartitionMetadata;
|
||||
@@ -510,7 +511,7 @@ public class HoodieTestDataGenerator implements AutoCloseable {
|
||||
populateKeysBySchema(schemaStr, currSize + i, kp);
|
||||
incrementNumExistingKeysBySchema(schemaStr);
|
||||
try {
|
||||
return new HoodieRecord(key, generateRandomValueAsPerSchema(schemaStr, key, instantTime, isFlattened));
|
||||
return new HoodieAvroRecord(key, generateRandomValueAsPerSchema(schemaStr, key, instantTime, isFlattened));
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException(e.getMessage(), e);
|
||||
}
|
||||
@@ -541,7 +542,7 @@ public class HoodieTestDataGenerator implements AutoCloseable {
|
||||
List<HoodieRecord> copy = new ArrayList<>();
|
||||
for (HoodieRecord r : origin) {
|
||||
HoodieKey key = r.getKey();
|
||||
HoodieRecord record = new HoodieRecord(key, generateRandomValue(key, instantTime));
|
||||
HoodieRecord record = new HoodieAvroRecord(key, generateRandomValue(key, instantTime));
|
||||
copy.add(record);
|
||||
}
|
||||
return copy;
|
||||
@@ -553,7 +554,7 @@ public class HoodieTestDataGenerator implements AutoCloseable {
|
||||
for (int i = 0; i < limit; i++) {
|
||||
String partitionPath = partitionPaths[RAND.nextInt(partitionPaths.length)];
|
||||
HoodieKey key = new HoodieKey(UUID.randomUUID().toString(), partitionPath);
|
||||
HoodieRecord record = new HoodieRecord(key, generateAvroPayload(key, instantTime));
|
||||
HoodieRecord record = new HoodieAvroRecord(key, generateAvroPayload(key, instantTime));
|
||||
inserts.add(record);
|
||||
|
||||
KeyPartition kp = new KeyPartition();
|
||||
@@ -568,7 +569,7 @@ public class HoodieTestDataGenerator implements AutoCloseable {
|
||||
public List<HoodieRecord> generateUpdatesWithHoodieAvroPayload(String instantTime, List<HoodieRecord> baseRecords) {
|
||||
List<HoodieRecord> updates = new ArrayList<>();
|
||||
for (HoodieRecord baseRecord : baseRecords) {
|
||||
HoodieRecord record = new HoodieRecord(baseRecord.getKey(), generateAvroPayload(baseRecord.getKey(), instantTime));
|
||||
HoodieRecord record = new HoodieAvroRecord(baseRecord.getKey(), generateAvroPayload(baseRecord.getKey(), instantTime));
|
||||
updates.add(record);
|
||||
}
|
||||
return updates;
|
||||
@@ -596,11 +597,11 @@ public class HoodieTestDataGenerator implements AutoCloseable {
|
||||
public HoodieRecord generateDeleteRecord(HoodieKey key) throws IOException {
|
||||
RawTripTestPayload payload =
|
||||
new RawTripTestPayload(Option.empty(), key.getRecordKey(), key.getPartitionPath(), null, true, 0L);
|
||||
return new HoodieRecord(key, payload);
|
||||
return new HoodieAvroRecord(key, payload);
|
||||
}
|
||||
|
||||
public HoodieRecord generateUpdateRecord(HoodieKey key, String instantTime) throws IOException {
|
||||
return new HoodieRecord(key, generateRandomValue(key, instantTime));
|
||||
return new HoodieAvroRecord(key, generateRandomValue(key, instantTime));
|
||||
}
|
||||
|
||||
public List<HoodieRecord> generateUpdates(String instantTime, List<HoodieRecord> baseRecords) throws IOException {
|
||||
@@ -615,7 +616,7 @@ public class HoodieTestDataGenerator implements AutoCloseable {
|
||||
public List<HoodieRecord> generateUpdatesWithTS(String instantTime, List<HoodieRecord> baseRecords, int ts) throws IOException {
|
||||
List<HoodieRecord> updates = new ArrayList<>();
|
||||
for (HoodieRecord baseRecord : baseRecords) {
|
||||
HoodieRecord record = new HoodieRecord(baseRecord.getKey(),
|
||||
HoodieRecord record = new HoodieAvroRecord(baseRecord.getKey(),
|
||||
generateRandomValue(baseRecord.getKey(), instantTime, false, ts));
|
||||
updates.add(record);
|
||||
}
|
||||
@@ -735,7 +736,7 @@ public class HoodieTestDataGenerator implements AutoCloseable {
|
||||
logger.debug("key getting updated: " + kp.key.getRecordKey());
|
||||
used.add(kp);
|
||||
try {
|
||||
return new HoodieRecord(kp.key, generateRandomValueAsPerSchema(schemaStr, kp.key, instantTime, false));
|
||||
return new HoodieAvroRecord(kp.key, generateRandomValueAsPerSchema(schemaStr, kp.key, instantTime, false));
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException(e.getMessage(), e);
|
||||
}
|
||||
@@ -801,7 +802,7 @@ public class HoodieTestDataGenerator implements AutoCloseable {
|
||||
numExistingKeys--;
|
||||
used.add(kp);
|
||||
try {
|
||||
result.add(new HoodieRecord(kp.key, generateRandomDeleteValue(kp.key, instantTime)));
|
||||
result.add(new HoodieAvroRecord(kp.key, generateRandomDeleteValue(kp.key, instantTime)));
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException(e.getMessage(), e);
|
||||
}
|
||||
|
||||
@@ -21,6 +21,7 @@ package org.apache.hudi.common.testutils;
|
||||
import org.apache.hudi.avro.HoodieAvroUtils;
|
||||
import org.apache.hudi.avro.MercifulJsonConverter;
|
||||
import org.apache.hudi.common.model.HoodieAvroPayload;
|
||||
import org.apache.hudi.common.model.HoodieAvroRecord;
|
||||
import org.apache.hudi.common.model.HoodieKey;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||
@@ -152,7 +153,7 @@ public final class SchemaTestUtil {
|
||||
}
|
||||
|
||||
private static HoodieRecord convertToHoodieRecords(IndexedRecord iRecord, String key, String partitionPath) {
|
||||
return new HoodieRecord<>(new HoodieKey(key, partitionPath),
|
||||
return new HoodieAvroRecord<>(new HoodieKey(key, partitionPath),
|
||||
new HoodieAvroPayload(Option.of((GenericRecord) iRecord)));
|
||||
}
|
||||
|
||||
@@ -172,7 +173,7 @@ public final class SchemaTestUtil {
|
||||
throws IOException, URISyntaxException {
|
||||
|
||||
List<IndexedRecord> iRecords = generateTestRecords(from, limit);
|
||||
return iRecords.stream().map(r -> new HoodieRecord<>(new HoodieKey(UUID.randomUUID().toString(), "0000/00/00"),
|
||||
return iRecords.stream().map(r -> new HoodieAvroRecord<>(new HoodieKey(UUID.randomUUID().toString(), "0000/00/00"),
|
||||
new HoodieAvroPayload(Option.of((GenericRecord) r)))).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
@@ -180,9 +181,9 @@ public final class SchemaTestUtil {
|
||||
Schema schema, String fieldNameToUpdate, String newValue) {
|
||||
return oldRecords.stream().map(r -> {
|
||||
try {
|
||||
GenericRecord rec = (GenericRecord) r.getData().getInsertValue(schema).get();
|
||||
GenericRecord rec = (GenericRecord) ((HoodieAvroRecord) r).getData().getInsertValue(schema).get();
|
||||
rec.put(fieldNameToUpdate, newValue);
|
||||
return new HoodieRecord<>(r.getKey(), new HoodieAvroPayload(Option.of(rec)));
|
||||
return new HoodieAvroRecord<>(r.getKey(), new HoodieAvroPayload(Option.of(rec)));
|
||||
} catch (IOException io) {
|
||||
throw new HoodieIOException("unable to get data from hoodie record", io);
|
||||
}
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
package org.apache.hudi.common.testutils;
|
||||
|
||||
import org.apache.hudi.common.model.HoodieAvroPayload;
|
||||
import org.apache.hudi.common.model.HoodieAvroRecord;
|
||||
import org.apache.hudi.common.model.HoodieKey;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.model.HoodieRecordLocation;
|
||||
@@ -48,7 +49,7 @@ public class SpillableMapTestUtils {
|
||||
String partitionPath = ((GenericRecord) r).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString();
|
||||
recordKeys.add(key);
|
||||
HoodieRecord record =
|
||||
new HoodieRecord<>(new HoodieKey(key, partitionPath), new HoodieAvroPayload(Option.of((GenericRecord) r)));
|
||||
new HoodieAvroRecord<>(new HoodieKey(key, partitionPath), new HoodieAvroPayload(Option.of((GenericRecord) r)));
|
||||
record.unseal();
|
||||
record.setCurrentLocation(new HoodieRecordLocation("DUMMY_COMMIT_TIME", "DUMMY_FILE_ID"));
|
||||
record.seal();
|
||||
|
||||
@@ -20,6 +20,7 @@ package org.apache.hudi.common.util.collection;
|
||||
|
||||
import org.apache.hudi.avro.HoodieAvroUtils;
|
||||
import org.apache.hudi.common.model.HoodieAvroPayload;
|
||||
import org.apache.hudi.common.model.HoodieAvroRecord;
|
||||
import org.apache.hudi.common.model.HoodieKey;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.model.HoodieRecordPayload;
|
||||
@@ -186,7 +187,7 @@ public class TestBitCaskDiskMap extends HoodieCommonTestHarness {
|
||||
schema = SchemaTestUtil.getSimpleSchema();
|
||||
List<IndexedRecord> indexedRecords = SchemaTestUtil.generateHoodieTestRecords(0, 1);
|
||||
hoodieRecords =
|
||||
indexedRecords.stream().map(r -> new HoodieRecord<>(new HoodieKey(UUID.randomUUID().toString(), "0000/00/00"),
|
||||
indexedRecords.stream().map(r -> new HoodieAvroRecord<>(new HoodieKey(UUID.randomUUID().toString(), "0000/00/00"),
|
||||
new AvroBinaryTestPayload(Option.of((GenericRecord) r)))).collect(Collectors.toList());
|
||||
payloadSize = SpillableMapUtils.computePayloadSize(hoodieRecords.remove(0), new HoodieRecordSizeEstimator(schema));
|
||||
assertTrue(payloadSize > 0);
|
||||
@@ -195,7 +196,7 @@ public class TestBitCaskDiskMap extends HoodieCommonTestHarness {
|
||||
final Schema simpleSchemaWithMetadata = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getSimpleSchema());
|
||||
indexedRecords = SchemaTestUtil.generateHoodieTestRecords(0, 1);
|
||||
hoodieRecords = indexedRecords.stream()
|
||||
.map(r -> new HoodieRecord<>(new HoodieKey(UUID.randomUUID().toString(), "0000/00/00"),
|
||||
.map(r -> new HoodieAvroRecord<>(new HoodieKey(UUID.randomUUID().toString(), "0000/00/00"),
|
||||
new AvroBinaryTestPayload(
|
||||
Option.of(HoodieAvroUtils.rewriteRecord((GenericRecord) r, simpleSchemaWithMetadata)))))
|
||||
.collect(Collectors.toList());
|
||||
@@ -212,7 +213,7 @@ public class TestBitCaskDiskMap extends HoodieCommonTestHarness {
|
||||
iRecords.forEach(r -> {
|
||||
String key = ((GenericRecord) r).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
|
||||
String partitionPath = ((GenericRecord) r).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString();
|
||||
HoodieRecord value = new HoodieRecord<>(new HoodieKey(key, partitionPath), new HoodieAvroPayload(Option.of((GenericRecord) r)));
|
||||
HoodieRecord value = new HoodieAvroRecord<>(new HoodieKey(key, partitionPath), new HoodieAvroPayload(Option.of((GenericRecord) r)));
|
||||
recordMap.put(key, value);
|
||||
});
|
||||
|
||||
|
||||
@@ -20,6 +20,7 @@ package org.apache.hudi.common.util.collection;
|
||||
|
||||
import org.apache.hudi.avro.HoodieAvroUtils;
|
||||
import org.apache.hudi.common.model.HoodieAvroPayload;
|
||||
import org.apache.hudi.common.model.HoodieAvroRecord;
|
||||
import org.apache.hudi.common.model.HoodieKey;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.model.HoodieRecordPayload;
|
||||
@@ -135,7 +136,7 @@ public class TestExternalSpillableMap extends HoodieCommonTestHarness {
|
||||
updatedRecords.forEach(record -> {
|
||||
HoodieRecord rec = records.get(((GenericRecord) record).get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
|
||||
try {
|
||||
assertEquals(rec.getData().getInsertValue(schema).get(), record);
|
||||
assertEquals(((HoodieAvroRecord) rec).getData().getInsertValue(schema).get(), record);
|
||||
} catch (IOException io) {
|
||||
throw new UncheckedIOException(io);
|
||||
}
|
||||
@@ -159,13 +160,13 @@ public class TestExternalSpillableMap extends HoodieCommonTestHarness {
|
||||
IndexedRecord inMemoryRecord = iRecords.get(0);
|
||||
String ikey = ((GenericRecord) inMemoryRecord).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
|
||||
String iPartitionPath = ((GenericRecord) inMemoryRecord).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString();
|
||||
HoodieRecord inMemoryHoodieRecord = new HoodieRecord<>(new HoodieKey(ikey, iPartitionPath),
|
||||
HoodieRecord inMemoryHoodieRecord = new HoodieAvroRecord<>(new HoodieKey(ikey, iPartitionPath),
|
||||
new HoodieAvroPayload(Option.of((GenericRecord) inMemoryRecord)));
|
||||
|
||||
IndexedRecord onDiskRecord = iRecords.get(99);
|
||||
String dkey = ((GenericRecord) onDiskRecord).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
|
||||
String dPartitionPath = ((GenericRecord) onDiskRecord).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString();
|
||||
HoodieRecord onDiskHoodieRecord = new HoodieRecord<>(new HoodieKey(dkey, dPartitionPath),
|
||||
HoodieRecord onDiskHoodieRecord = new HoodieAvroRecord<>(new HoodieKey(dkey, dPartitionPath),
|
||||
new HoodieAvroPayload(Option.of((GenericRecord) onDiskRecord)));
|
||||
// assert size
|
||||
assert records.size() == 100;
|
||||
@@ -241,7 +242,7 @@ public class TestExternalSpillableMap extends HoodieCommonTestHarness {
|
||||
|
||||
// Get a record from the in-Memory map
|
||||
String key = recordKeys.get(0);
|
||||
HoodieRecord record = records.get(key);
|
||||
HoodieAvroRecord record = (HoodieAvroRecord) records.get(key);
|
||||
List<IndexedRecord> recordsToUpdate = new ArrayList<>();
|
||||
recordsToUpdate.add((IndexedRecord) record.getData().getInsertValue(schema).get());
|
||||
|
||||
@@ -259,7 +260,7 @@ public class TestExternalSpillableMap extends HoodieCommonTestHarness {
|
||||
|
||||
// Get a record from the disk based map
|
||||
key = recordKeys.get(recordKeys.size() - 1);
|
||||
record = records.get(key);
|
||||
record = (HoodieAvroRecord) records.get(key);
|
||||
recordsToUpdate = new ArrayList<>();
|
||||
recordsToUpdate.add((IndexedRecord) record.getData().getInsertValue(schema).get());
|
||||
|
||||
|
||||
@@ -18,12 +18,9 @@
|
||||
|
||||
package org.apache.hudi.common.util.collection;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.avro.generic.IndexedRecord;
|
||||
|
||||
import org.apache.hudi.avro.HoodieAvroUtils;
|
||||
import org.apache.hudi.common.model.HoodieAvroPayload;
|
||||
import org.apache.hudi.common.model.HoodieAvroRecord;
|
||||
import org.apache.hudi.common.model.HoodieKey;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.model.HoodieRecordPayload;
|
||||
@@ -33,6 +30,9 @@ import org.apache.hudi.common.testutils.SchemaTestUtil;
|
||||
import org.apache.hudi.common.testutils.SpillableMapTestUtils;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.avro.generic.IndexedRecord;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
@@ -166,7 +166,7 @@ public class TestRocksDbDiskMap extends HoodieCommonTestHarness {
|
||||
iRecords.forEach(r -> {
|
||||
String key = ((GenericRecord) r).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
|
||||
String partitionPath = ((GenericRecord) r).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString();
|
||||
HoodieRecord value = new HoodieRecord<>(new HoodieKey(key, partitionPath), new HoodieAvroPayload(Option.of((GenericRecord) r)));
|
||||
HoodieRecord value = new HoodieAvroRecord<>(new HoodieKey(key, partitionPath), new HoodieAvroPayload(Option.of((GenericRecord) r)));
|
||||
recordMap.put(key, value);
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user