[HUDI-44] Adding support to preserve commit metadata for compaction (#4428)
This commit is contained in:
committed by
GitHub
parent
50fa5a6aa7
commit
b6891d253f
@@ -224,6 +224,12 @@ public class HoodieCompactionConfig extends HoodieConfig {
|
||||
.withDocumentation("Used by org.apache.hudi.io.compact.strategy.DayBasedCompactionStrategy to denote the number of "
|
||||
+ "latest partitions to compact during a compaction run.");
|
||||
|
||||
public static final ConfigProperty<Boolean> PRESERVE_COMMIT_METADATA = ConfigProperty
|
||||
.key("hoodie.compaction.preserve.commit.metadata")
|
||||
.defaultValue(false)
|
||||
.sinceVersion("0.11.0")
|
||||
.withDocumentation("When rewriting data, preserves existing hoodie_commit_time");
|
||||
|
||||
/**
|
||||
* Configs related to specific table types.
|
||||
*/
|
||||
@@ -621,6 +627,11 @@ public class HoodieCompactionConfig extends HoodieConfig {
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withPreserveCommitMetadata(boolean preserveCommitMetadata) {
|
||||
compactionConfig.setValue(PRESERVE_COMMIT_METADATA, String.valueOf(preserveCommitMetadata));
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withCommitsArchivalBatchSize(int batchSize) {
|
||||
compactionConfig.setValue(COMMITS_ARCHIVAL_BATCH_SIZE, String.valueOf(batchSize));
|
||||
return this;
|
||||
|
||||
@@ -1163,10 +1163,14 @@ public class HoodieWriteConfig extends HoodieConfig {
|
||||
return getBoolean(HoodieClusteringConfig.ASYNC_CLUSTERING_ENABLE);
|
||||
}
|
||||
|
||||
public boolean isPreserveHoodieCommitMetadata() {
|
||||
public boolean isPreserveHoodieCommitMetadataForClustering() {
|
||||
return getBoolean(HoodieClusteringConfig.PRESERVE_COMMIT_METADATA);
|
||||
}
|
||||
|
||||
public boolean isPreserveHoodieCommitMetadataForCompaction() {
|
||||
return getBoolean(HoodieCompactionConfig.PRESERVE_COMMIT_METADATA);
|
||||
}
|
||||
|
||||
public boolean isClusteringEnabled() {
|
||||
// TODO: future support async clustering
|
||||
return inlineClusteringEnabled() || isAsyncClusteringEnabled();
|
||||
|
||||
@@ -97,6 +97,7 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload, I, K, O> extends H
|
||||
protected Map<String, HoodieRecord<T>> keyToNewRecords;
|
||||
protected Set<String> writtenRecordKeys;
|
||||
protected HoodieFileWriter<IndexedRecord> fileWriter;
|
||||
private boolean preserveMetadata = false;
|
||||
|
||||
protected Path newFilePath;
|
||||
protected Path oldFilePath;
|
||||
@@ -133,6 +134,7 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload, I, K, O> extends H
|
||||
super(config, instantTime, partitionPath, fileId, hoodieTable, taskContextSupplier);
|
||||
this.keyToNewRecords = keyToNewRecords;
|
||||
this.useWriterSchema = true;
|
||||
this.preserveMetadata = config.isPreserveHoodieCommitMetadataForCompaction();
|
||||
init(fileId, this.partitionPath, dataFileToBeMerged);
|
||||
validateAndSetAndKeyGenProps(keyGeneratorOpt, config.populateMetaFields());
|
||||
}
|
||||
@@ -291,7 +293,11 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload, I, K, O> extends H
|
||||
if (indexedRecord.isPresent() && !isDelete) {
|
||||
// Convert GenericRecord to GenericRecord with hoodie commit metadata in schema
|
||||
IndexedRecord recordWithMetadataInSchema = rewriteRecord((GenericRecord) indexedRecord.get());
|
||||
fileWriter.writeAvroWithMetadata(recordWithMetadataInSchema, hoodieRecord);
|
||||
if (preserveMetadata) {
|
||||
fileWriter.writeAvro(hoodieRecord.getRecordKey(), recordWithMetadataInSchema);
|
||||
} else {
|
||||
fileWriter.writeAvroWithMetadata(recordWithMetadataInSchema, hoodieRecord);
|
||||
}
|
||||
recordsWritten++;
|
||||
} else {
|
||||
recordsDeleted++;
|
||||
|
||||
@@ -105,7 +105,7 @@ public abstract class PartitionAwareClusteringPlanStrategy<T extends HoodieRecor
|
||||
.setInputGroups(clusteringGroups)
|
||||
.setExtraMetadata(getExtraMetadata())
|
||||
.setVersion(getPlanVersion())
|
||||
.setPreserveHoodieMetadata(getWriteConfig().isPreserveHoodieCommitMetadata())
|
||||
.setPreserveHoodieMetadata(getWriteConfig().isPreserveHoodieCommitMetadataForClustering())
|
||||
.build());
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user