1
0

[HUDI-1771] Propagate CDC format for hoodie (#3285)

This commit is contained in:
swuferhong
2021-08-10 20:23:23 +08:00
committed by GitHub
parent b4441abcf7
commit 21db6d7a84
50 changed files with 1081 additions and 199 deletions

View File

@@ -373,6 +373,13 @@ public class HoodieWriteConfig extends HoodieConfig {
.withDocumentation("Whether to allow generation of empty commits, even if no data was written in the commit. "
+ "It's useful in cases where extra metadata needs to be published regardless e.g tracking source offsets when ingesting data");
public static final ConfigProperty<Boolean> ALLOW_OPERATION_METADATA_FIELD = ConfigProperty
.key("hoodie.allow.operation.metadata.field")
.defaultValue(false)
.sinceVersion("0.9")
.withDocumentation("Whether to include '_hoodie_operation' in the metadata fields. "
+ "Once enabled, all the changes of a record are persisted to the delta log directly without merge");
private ConsistencyGuardConfig consistencyGuardConfig;
// Hoodie Write Client transparently rewrites File System View config when embedded mode is enabled
@@ -1309,6 +1316,10 @@ public class HoodieWriteConfig extends HoodieConfig {
return getBooleanOrDefault(ALLOW_EMPTY_COMMIT);
}
public boolean allowOperationMetadataField() {
return getBooleanOrDefault(ALLOW_OPERATION_METADATA_FIELD);
}
public static class Builder {
protected final HoodieWriteConfig writeConfig = new HoodieWriteConfig();
@@ -1615,6 +1626,11 @@ public class HoodieWriteConfig extends HoodieConfig {
return this;
}
public Builder withAllowOperationMetadataField(boolean allowOperationMetadataField) {
writeConfig.setValue(ALLOW_OPERATION_METADATA_FIELD, Boolean.toString(allowOperationMetadataField));
return this;
}
public Builder withProperties(Properties properties) {
this.writeConfig.getProps().putAll(properties);
return this;

View File

@@ -27,6 +27,7 @@ import org.apache.hudi.common.model.FileSlice;
import org.apache.hudi.common.model.HoodieDeltaWriteStat;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodieOperation;
import org.apache.hudi.common.model.HoodiePartitionMetadata;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieRecordLocation;
@@ -197,20 +198,26 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload, I, K, O> extends
// Pass the isUpdateRecord to the props for HoodieRecordPayload to judge
// Whether it is a update or insert record.
boolean isUpdateRecord = isUpdateRecord(hoodieRecord);
// If the format can not record the operation field, nullify the DELETE payload manually.
boolean nullifyPayload = HoodieOperation.isDelete(hoodieRecord.getOperation()) && !config.allowOperationMetadataField();
recordProperties.put(HoodiePayloadProps.PAYLOAD_IS_UPDATE_RECORD_FOR_MOR, String.valueOf(isUpdateRecord));
Option<IndexedRecord> avroRecord = hoodieRecord.getData().getInsertValue(tableSchema, recordProperties);
Option<IndexedRecord> avroRecord = nullifyPayload ? Option.empty() : hoodieRecord.getData().getInsertValue(tableSchema, recordProperties);
if (avroRecord.isPresent()) {
if (avroRecord.get().equals(IGNORE_RECORD)) {
return avroRecord;
}
// Convert GenericRecord to GenericRecord with hoodie commit metadata in schema
avroRecord = Option.of(rewriteRecord((GenericRecord) avroRecord.get()));
GenericRecord rewriteRecord = rewriteRecord((GenericRecord) avroRecord.get());
avroRecord = Option.of(rewriteRecord);
String seqId =
HoodieRecord.generateSequenceId(instantTime, getPartitionId(), RECORD_COUNTER.getAndIncrement());
if (config.populateMetaFields()) {
HoodieAvroUtils.addHoodieKeyToRecord((GenericRecord) avroRecord.get(), hoodieRecord.getRecordKey(),
HoodieAvroUtils.addHoodieKeyToRecord(rewriteRecord, hoodieRecord.getRecordKey(),
hoodieRecord.getPartitionPath(), fileId);
HoodieAvroUtils.addCommitMetadataToRecord((GenericRecord) avroRecord.get(), instantTime, seqId);
HoodieAvroUtils.addCommitMetadataToRecord(rewriteRecord, instantTime, seqId);
}
if (config.allowOperationMetadataField()) {
HoodieAvroUtils.addOperationToRecord(rewriteRecord, hoodieRecord.getOperation());
}
if (isUpdateRecord(hoodieRecord)) {
updatedRecordsWritten++;

View File

@@ -22,6 +22,7 @@ import org.apache.avro.Schema;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.engine.TaskContextSupplier;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.HoodieOperation;
import org.apache.hudi.common.model.HoodiePartitionMetadata;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieRecordLocation;
@@ -127,6 +128,9 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload, I, K, O> extends
@Override
public void write(HoodieRecord record, Option<IndexedRecord> avroRecord) {
Option recordMetadata = record.getData().getMetadata();
if (HoodieOperation.isDelete(record.getOperation())) {
avroRecord = Option.empty();
}
try {
if (avroRecord.isPresent()) {
if (avroRecord.get().equals(IGNORE_RECORD)) {

View File

@@ -22,6 +22,7 @@ import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.engine.TaskContextSupplier;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.HoodieBaseFile;
import org.apache.hudi.common.model.HoodieOperation;
import org.apache.hudi.common.model.HoodiePartitionMetadata;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieRecordLocation;
@@ -264,6 +265,9 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload, I, K, O> extends H
writeStatus.markFailure(hoodieRecord, failureEx, recordMetadata);
return false;
}
if (HoodieOperation.isDelete(hoodieRecord.getOperation())) {
indexedRecord = Option.empty();
}
try {
if (indexedRecord.isPresent()) {
// Convert GenericRecord to GenericRecord with hoodie commit metadata in schema

View File

@@ -112,9 +112,9 @@ public abstract class HoodieWriteHandle<T extends HoodieRecordPayload, I, K, O>
this.partitionPath = partitionPath;
this.fileId = fileId;
this.tableSchema = overriddenSchema.orElseGet(() -> getSpecifiedTableSchema(config));
this.tableSchemaWithMetaFields = HoodieAvroUtils.addMetadataFields(tableSchema);
this.tableSchemaWithMetaFields = HoodieAvroUtils.addMetadataFields(tableSchema, config.allowOperationMetadataField());
this.writeSchema = overriddenSchema.orElseGet(() -> getWriteSchema(config));
this.writeSchemaWithMetaFields = HoodieAvroUtils.addMetadataFields(writeSchema);
this.writeSchemaWithMetaFields = HoodieAvroUtils.addMetadataFields(writeSchema, config.allowOperationMetadataField());
this.timer = new HoodieTimer().startTimer();
this.writeStatus = (WriteStatus) ReflectionUtils.loadClass(config.getWriteStatusClassName(),
!hoodieTable.getIndex().isImplicitWithStorage(), config.getWriteStatusFailureFraction());