1
0

[HUDI-2902] Fixing populate meta fields with Hfile writers and Disabling virtual keys by default for metadata table (#4194)

This commit is contained in:
Sivabalan Narayanan
2021-12-03 07:20:21 -05:00
committed by GitHub
parent ca427240c0
commit e483f7c776
11 changed files with 287 additions and 28 deletions

View File

@@ -27,6 +27,7 @@ import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieUpsertException;
import org.apache.hudi.keygen.BaseKeyGenerator;
import org.apache.hudi.keygen.KeyGenUtils;
import org.apache.hudi.table.HoodieTable;
import org.apache.avro.generic.GenericRecord;
@@ -72,7 +73,7 @@ public class HoodieSortedMergeHandle<T extends HoodieRecordPayload, I, K, O> ext
*/
@Override
public void write(GenericRecord oldRecord) {
String key = oldRecord.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
String key = KeyGenUtils.getRecordKeyFromGenericRecord(oldRecord, keyGeneratorOpt);
// To maintain overall sorted order across updates and inserts, write any new inserts whose keys are less than
// the oldRecord's key.

View File

@@ -89,7 +89,7 @@ public class HoodieFileWriterFactory {
config.getHFileCompressionAlgorithm(), config.getHFileBlockSize(), config.getHFileMaxFileSize(),
PREFETCH_ON_OPEN, CACHE_DATA_IN_L1, DROP_BEHIND_CACHE_COMPACTION, filter, HFILE_COMPARATOR);
return new HoodieHFileWriter<>(instantTime, path, hfileConfig, schema, taskContextSupplier);
return new HoodieHFileWriter<>(instantTime, path, hfileConfig, schema, taskContextSupplier, config.populateMetaFields());
}
private static <T extends HoodieRecordPayload, R extends IndexedRecord> HoodieFileWriter<R> newOrcFileWriter(

View File

@@ -62,6 +62,7 @@ public class HoodieHFileWriter<T extends HoodieRecordPayload, R extends IndexedR
private final long maxFileSize;
private final String instantTime;
private final TaskContextSupplier taskContextSupplier;
private final boolean populateMetaFields;
private HFile.Writer writer;
private String minRecordKey;
private String maxRecordKey;
@@ -70,7 +71,7 @@ public class HoodieHFileWriter<T extends HoodieRecordPayload, R extends IndexedR
private static String DROP_BEHIND_CACHE_COMPACTION_KEY = "hbase.hfile.drop.behind.compaction";
public HoodieHFileWriter(String instantTime, Path file, HoodieHFileConfig hfileConfig, Schema schema,
TaskContextSupplier taskContextSupplier) throws IOException {
TaskContextSupplier taskContextSupplier, boolean populateMetaFields) throws IOException {
Configuration conf = FSUtils.registerFileSystem(file, hfileConfig.getHadoopConf());
this.file = HoodieWrapperFileSystem.convertToHoodiePath(file, conf);
@@ -84,6 +85,7 @@ public class HoodieHFileWriter<T extends HoodieRecordPayload, R extends IndexedR
this.maxFileSize = hfileConfig.getMaxFileSize();
this.instantTime = instantTime;
this.taskContextSupplier = taskContextSupplier;
this.populateMetaFields = populateMetaFields;
HFileContext context = new HFileContextBuilder().withBlockSize(hfileConfig.getBlockSize())
.withCompression(hfileConfig.getCompressionAlgorithm())
@@ -104,9 +106,13 @@ public class HoodieHFileWriter<T extends HoodieRecordPayload, R extends IndexedR
@Override
public void writeAvroWithMetadata(R avroRecord, HoodieRecord record) throws IOException {
prepRecordWithMetadata(avroRecord, record, instantTime,
taskContextSupplier.getPartitionIdSupplier().get(), recordIndex, file.getName());
writeAvro(record.getRecordKey(), (IndexedRecord) avroRecord);
if (populateMetaFields) {
prepRecordWithMetadata(avroRecord, record, instantTime,
taskContextSupplier.getPartitionIdSupplier().get(), recordIndex, file.getName());
writeAvro(record.getRecordKey(), (IndexedRecord) avroRecord);
} else {
writeAvro(record.getRecordKey(), (IndexedRecord) avroRecord);
}
}
@Override