[HUDI-1771] Propagate CDC format for hoodie (#3285)
This commit is contained in:
@@ -373,6 +373,13 @@ public class HoodieWriteConfig extends HoodieConfig {
|
||||
.withDocumentation("Whether to allow generation of empty commits, even if no data was written in the commit. "
|
||||
+ "It's useful in cases where extra metadata needs to be published regardless e.g tracking source offsets when ingesting data");
|
||||
|
||||
public static final ConfigProperty<Boolean> ALLOW_OPERATION_METADATA_FIELD = ConfigProperty
|
||||
.key("hoodie.allow.operation.metadata.field")
|
||||
.defaultValue(false)
|
||||
.sinceVersion("0.9")
|
||||
.withDocumentation("Whether to include '_hoodie_operation' in the metadata fields. "
|
||||
+ "Once enabled, all the changes of a record are persisted to the delta log directly without merge");
|
||||
|
||||
private ConsistencyGuardConfig consistencyGuardConfig;
|
||||
|
||||
// Hoodie Write Client transparently rewrites File System View config when embedded mode is enabled
|
||||
@@ -1309,6 +1316,10 @@ public class HoodieWriteConfig extends HoodieConfig {
|
||||
return getBooleanOrDefault(ALLOW_EMPTY_COMMIT);
|
||||
}
|
||||
|
||||
public boolean allowOperationMetadataField() {
|
||||
return getBooleanOrDefault(ALLOW_OPERATION_METADATA_FIELD);
|
||||
}
|
||||
|
||||
public static class Builder {
|
||||
|
||||
protected final HoodieWriteConfig writeConfig = new HoodieWriteConfig();
|
||||
@@ -1615,6 +1626,11 @@ public class HoodieWriteConfig extends HoodieConfig {
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withAllowOperationMetadataField(boolean allowOperationMetadataField) {
|
||||
writeConfig.setValue(ALLOW_OPERATION_METADATA_FIELD, Boolean.toString(allowOperationMetadataField));
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withProperties(Properties properties) {
|
||||
this.writeConfig.getProps().putAll(properties);
|
||||
return this;
|
||||
|
||||
@@ -27,6 +27,7 @@ import org.apache.hudi.common.model.FileSlice;
|
||||
import org.apache.hudi.common.model.HoodieDeltaWriteStat;
|
||||
import org.apache.hudi.common.model.HoodieKey;
|
||||
import org.apache.hudi.common.model.HoodieLogFile;
|
||||
import org.apache.hudi.common.model.HoodieOperation;
|
||||
import org.apache.hudi.common.model.HoodiePartitionMetadata;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.model.HoodieRecordLocation;
|
||||
@@ -197,20 +198,26 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload, I, K, O> extends
|
||||
// Pass the isUpdateRecord to the props for HoodieRecordPayload to judge
|
||||
// Whether it is a update or insert record.
|
||||
boolean isUpdateRecord = isUpdateRecord(hoodieRecord);
|
||||
// If the format can not record the operation field, nullify the DELETE payload manually.
|
||||
boolean nullifyPayload = HoodieOperation.isDelete(hoodieRecord.getOperation()) && !config.allowOperationMetadataField();
|
||||
recordProperties.put(HoodiePayloadProps.PAYLOAD_IS_UPDATE_RECORD_FOR_MOR, String.valueOf(isUpdateRecord));
|
||||
Option<IndexedRecord> avroRecord = hoodieRecord.getData().getInsertValue(tableSchema, recordProperties);
|
||||
Option<IndexedRecord> avroRecord = nullifyPayload ? Option.empty() : hoodieRecord.getData().getInsertValue(tableSchema, recordProperties);
|
||||
if (avroRecord.isPresent()) {
|
||||
if (avroRecord.get().equals(IGNORE_RECORD)) {
|
||||
return avroRecord;
|
||||
}
|
||||
// Convert GenericRecord to GenericRecord with hoodie commit metadata in schema
|
||||
avroRecord = Option.of(rewriteRecord((GenericRecord) avroRecord.get()));
|
||||
GenericRecord rewriteRecord = rewriteRecord((GenericRecord) avroRecord.get());
|
||||
avroRecord = Option.of(rewriteRecord);
|
||||
String seqId =
|
||||
HoodieRecord.generateSequenceId(instantTime, getPartitionId(), RECORD_COUNTER.getAndIncrement());
|
||||
if (config.populateMetaFields()) {
|
||||
HoodieAvroUtils.addHoodieKeyToRecord((GenericRecord) avroRecord.get(), hoodieRecord.getRecordKey(),
|
||||
HoodieAvroUtils.addHoodieKeyToRecord(rewriteRecord, hoodieRecord.getRecordKey(),
|
||||
hoodieRecord.getPartitionPath(), fileId);
|
||||
HoodieAvroUtils.addCommitMetadataToRecord((GenericRecord) avroRecord.get(), instantTime, seqId);
|
||||
HoodieAvroUtils.addCommitMetadataToRecord(rewriteRecord, instantTime, seqId);
|
||||
}
|
||||
if (config.allowOperationMetadataField()) {
|
||||
HoodieAvroUtils.addOperationToRecord(rewriteRecord, hoodieRecord.getOperation());
|
||||
}
|
||||
if (isUpdateRecord(hoodieRecord)) {
|
||||
updatedRecordsWritten++;
|
||||
|
||||
@@ -22,6 +22,7 @@ import org.apache.avro.Schema;
|
||||
import org.apache.hudi.client.WriteStatus;
|
||||
import org.apache.hudi.common.engine.TaskContextSupplier;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.model.HoodieOperation;
|
||||
import org.apache.hudi.common.model.HoodiePartitionMetadata;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.model.HoodieRecordLocation;
|
||||
@@ -127,6 +128,9 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload, I, K, O> extends
|
||||
@Override
|
||||
public void write(HoodieRecord record, Option<IndexedRecord> avroRecord) {
|
||||
Option recordMetadata = record.getData().getMetadata();
|
||||
if (HoodieOperation.isDelete(record.getOperation())) {
|
||||
avroRecord = Option.empty();
|
||||
}
|
||||
try {
|
||||
if (avroRecord.isPresent()) {
|
||||
if (avroRecord.get().equals(IGNORE_RECORD)) {
|
||||
|
||||
@@ -22,6 +22,7 @@ import org.apache.hudi.client.WriteStatus;
|
||||
import org.apache.hudi.common.engine.TaskContextSupplier;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.model.HoodieBaseFile;
|
||||
import org.apache.hudi.common.model.HoodieOperation;
|
||||
import org.apache.hudi.common.model.HoodiePartitionMetadata;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.model.HoodieRecordLocation;
|
||||
@@ -264,6 +265,9 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload, I, K, O> extends H
|
||||
writeStatus.markFailure(hoodieRecord, failureEx, recordMetadata);
|
||||
return false;
|
||||
}
|
||||
if (HoodieOperation.isDelete(hoodieRecord.getOperation())) {
|
||||
indexedRecord = Option.empty();
|
||||
}
|
||||
try {
|
||||
if (indexedRecord.isPresent()) {
|
||||
// Convert GenericRecord to GenericRecord with hoodie commit metadata in schema
|
||||
|
||||
@@ -112,9 +112,9 @@ public abstract class HoodieWriteHandle<T extends HoodieRecordPayload, I, K, O>
|
||||
this.partitionPath = partitionPath;
|
||||
this.fileId = fileId;
|
||||
this.tableSchema = overriddenSchema.orElseGet(() -> getSpecifiedTableSchema(config));
|
||||
this.tableSchemaWithMetaFields = HoodieAvroUtils.addMetadataFields(tableSchema);
|
||||
this.tableSchemaWithMetaFields = HoodieAvroUtils.addMetadataFields(tableSchema, config.allowOperationMetadataField());
|
||||
this.writeSchema = overriddenSchema.orElseGet(() -> getWriteSchema(config));
|
||||
this.writeSchemaWithMetaFields = HoodieAvroUtils.addMetadataFields(writeSchema);
|
||||
this.writeSchemaWithMetaFields = HoodieAvroUtils.addMetadataFields(writeSchema, config.allowOperationMetadataField());
|
||||
this.timer = new HoodieTimer().startTimer();
|
||||
this.writeStatus = (WriteStatus) ReflectionUtils.loadClass(config.getWriteStatusClassName(),
|
||||
!hoodieTable.getIndex().isImplicitWithStorage(), config.getWriteStatusFailureFraction());
|
||||
|
||||
Reference in New Issue
Block a user