1
0

[HUDI-3213] Making commit preserve metadata to true for compaction (#4811)

* Making commit preserve metadata to true

* Fixing integ tests

* Fixing preserve commit metadata for metadata table

* fixed bootstrap tests

* temp diff

* Fixing merge handle

* renaming fallback record

* fixing build issue

* Fixing test failures
This commit is contained in:
Sivabalan Narayanan
2022-03-07 07:32:05 -05:00
committed by GitHub
parent 6f57bbfac4
commit 3539578ccb
15 changed files with 88 additions and 48 deletions

View File

@@ -250,7 +250,7 @@ public class HoodieCompactionConfig extends HoodieConfig {
public static final ConfigProperty<Boolean> PRESERVE_COMMIT_METADATA = ConfigProperty
.key("hoodie.compaction.preserve.commit.metadata")
.defaultValue(false)
.defaultValue(true)
.sinceVersion("0.11.0")
.withDocumentation("When rewriting data, preserves existing hoodie_commit_time");

View File

@@ -1072,8 +1072,7 @@ public class HoodieWriteConfig extends HoodieConfig {
}
public boolean populateMetaFields() {
return Boolean.parseBoolean(getStringOrDefault(HoodieTableConfig.POPULATE_META_FIELDS,
HoodieTableConfig.POPULATE_META_FIELDS.defaultValue()));
return getBooleanOrDefault(HoodieTableConfig.POPULATE_META_FIELDS);
}
/**

View File

@@ -61,6 +61,8 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import static org.apache.hudi.common.model.HoodieRecord.FILENAME_METADATA_FIELD_POS;
@SuppressWarnings("Duplicates")
/**
* Handle to merge incoming records to those in storage.
@@ -262,7 +264,7 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload, I, K, O> extends H
isDelete = HoodieOperation.isDelete(hoodieRecord.getOperation());
}
}
return writeRecord(hoodieRecord, indexedRecord, isDelete);
return writeRecord(hoodieRecord, indexedRecord, isDelete, oldRecord);
}
protected void writeInsertRecord(HoodieRecord<T> hoodieRecord) throws IOException {
@@ -272,16 +274,16 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload, I, K, O> extends H
if (insertRecord.isPresent() && insertRecord.get().equals(IGNORE_RECORD)) {
return;
}
if (writeRecord(hoodieRecord, insertRecord, HoodieOperation.isDelete(hoodieRecord.getOperation()))) {
if (writeRecord(hoodieRecord, insertRecord, HoodieOperation.isDelete(hoodieRecord.getOperation()), null)) {
insertRecordsWritten++;
}
}
protected boolean writeRecord(HoodieRecord<T> hoodieRecord, Option<IndexedRecord> indexedRecord) {
return writeRecord(hoodieRecord, indexedRecord, false);
return writeRecord(hoodieRecord, indexedRecord, false, null);
}
protected boolean writeRecord(HoodieRecord<T> hoodieRecord, Option<IndexedRecord> indexedRecord, boolean isDelete) {
protected boolean writeRecord(HoodieRecord<T> hoodieRecord, Option<IndexedRecord> indexedRecord, boolean isDelete, GenericRecord oldRecord) {
Option recordMetadata = hoodieRecord.getData().getMetadata();
if (!partitionPath.equals(hoodieRecord.getPartitionPath())) {
HoodieUpsertException failureEx = new HoodieUpsertException("mismatched partition path, record partition: "
@@ -292,8 +294,10 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload, I, K, O> extends H
try {
if (indexedRecord.isPresent() && !isDelete) {
// Convert GenericRecord to GenericRecord with hoodie commit metadata in schema
IndexedRecord recordWithMetadataInSchema = rewriteRecord((GenericRecord) indexedRecord.get());
if (preserveMetadata) {
IndexedRecord recordWithMetadataInSchema = rewriteRecord((GenericRecord) indexedRecord.get(), preserveMetadata, oldRecord);
if (preserveMetadata && useWriterSchema) { // useWriteSchema will be true only incase of compaction.
// do not preserve FILENAME_METADATA_FIELD
recordWithMetadataInSchema.put(FILENAME_METADATA_FIELD_POS, newFilePath.getName());
fileWriter.writeAvro(hoodieRecord.getRecordKey(), recordWithMetadataInSchema);
} else {
fileWriter.writeAvroWithMetadata(recordWithMetadataInSchema, hoodieRecord);

View File

@@ -227,6 +227,10 @@ public abstract class HoodieWriteHandle<T extends HoodieRecordPayload, I, K, O>
return HoodieAvroUtils.rewriteRecord(record, writeSchemaWithMetaFields);
}
protected GenericRecord rewriteRecord(GenericRecord record, boolean copyOverMetaFields, GenericRecord fallbackRecord) {
return HoodieAvroUtils.rewriteRecord(record, writeSchemaWithMetaFields, copyOverMetaFields, fallbackRecord);
}
public abstract List<WriteStatus> close();
public List<WriteStatus> writeStatuses() {