[HUDI-3365] Make sure Metadata Table records are updated appropriately on HDFS (#4739)
- This change makes sure MT records are updated appropriately on HDFS: previously after Log File append operations MT records were updated w/ just the size of the deltas being appended to the original files, which have been found to be the cause of issues in case of Rollbacks that were instead updating MT with records bearing the full file-size. - To make sure that we hedge against similar issues going f/w, this PR alleviates this discrepancy and streamlines the flow of MT table always ingesting records bearing full file-sizes.
This commit is contained in:
@@ -18,6 +18,8 @@
|
||||
|
||||
package org.apache.hudi.client;
|
||||
|
||||
import com.codahale.metrics.Timer;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hudi.async.AsyncCleanerService;
|
||||
import org.apache.hudi.client.common.HoodieFlinkEngineContext;
|
||||
import org.apache.hudi.common.data.HoodieList;
|
||||
@@ -62,9 +64,6 @@ import org.apache.hudi.table.marker.WriteMarkersFactory;
|
||||
import org.apache.hudi.table.upgrade.FlinkUpgradeDowngradeHelper;
|
||||
import org.apache.hudi.table.upgrade.UpgradeDowngrade;
|
||||
import org.apache.hudi.util.FlinkClientUtil;
|
||||
|
||||
import com.codahale.metrics.Timer;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@@ -93,7 +92,7 @@ public class HoodieFlinkWriteClient<T extends HoodieRecordPayload> extends
|
||||
private Option<HoodieBackedTableMetadataWriter> metadataWriterOption = Option.empty();
|
||||
|
||||
public HoodieFlinkWriteClient(HoodieEngineContext context, HoodieWriteConfig writeConfig) {
|
||||
super(context, writeConfig);
|
||||
super(context, writeConfig, FlinkUpgradeDowngradeHelper.getInstance());
|
||||
this.bucketToHandles = new HashMap<>();
|
||||
}
|
||||
|
||||
@@ -136,7 +135,7 @@ public class HoodieFlinkWriteClient<T extends HoodieRecordPayload> extends
|
||||
@Override
|
||||
public List<WriteStatus> upsert(List<HoodieRecord<T>> records, String instantTime) {
|
||||
HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> table =
|
||||
getTableAndInitCtx(WriteOperationType.UPSERT, instantTime);
|
||||
initTable(WriteOperationType.UPSERT, Option.ofNullable(instantTime));
|
||||
table.validateUpsertSchema();
|
||||
preWrite(instantTime, WriteOperationType.UPSERT, table.getMetaClient());
|
||||
final HoodieWriteHandle<?, ?, ?, ?> writeHandle = getOrCreateWriteHandle(records.get(0), getConfig(),
|
||||
@@ -152,7 +151,7 @@ public class HoodieFlinkWriteClient<T extends HoodieRecordPayload> extends
|
||||
public List<WriteStatus> upsertPreppedRecords(List<HoodieRecord<T>> preppedRecords, String instantTime) {
|
||||
// only used for metadata table, the upsert happens in single thread
|
||||
HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> table =
|
||||
getTableAndInitCtx(WriteOperationType.UPSERT, instantTime);
|
||||
initTable(WriteOperationType.UPSERT, Option.ofNullable(instantTime));
|
||||
table.validateUpsertSchema();
|
||||
preWrite(instantTime, WriteOperationType.UPSERT_PREPPED, table.getMetaClient());
|
||||
final HoodieWriteHandle<?, ?, ?, ?> writeHandle = getOrCreateWriteHandle(preppedRecords.get(0), getConfig(),
|
||||
@@ -164,7 +163,7 @@ public class HoodieFlinkWriteClient<T extends HoodieRecordPayload> extends
|
||||
@Override
|
||||
public List<WriteStatus> insert(List<HoodieRecord<T>> records, String instantTime) {
|
||||
HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> table =
|
||||
getTableAndInitCtx(WriteOperationType.INSERT, instantTime);
|
||||
initTable(WriteOperationType.INSERT, Option.ofNullable(instantTime));
|
||||
table.validateUpsertSchema();
|
||||
preWrite(instantTime, WriteOperationType.INSERT, table.getMetaClient());
|
||||
// create the write handle if not exists
|
||||
@@ -187,7 +186,7 @@ public class HoodieFlinkWriteClient<T extends HoodieRecordPayload> extends
|
||||
public List<WriteStatus> insertOverwrite(
|
||||
List<HoodieRecord<T>> records, final String instantTime) {
|
||||
HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> table =
|
||||
getTableAndInitCtx(WriteOperationType.INSERT_OVERWRITE, instantTime);
|
||||
initTable(WriteOperationType.INSERT_OVERWRITE, Option.ofNullable(instantTime));
|
||||
table.validateInsertSchema();
|
||||
preWrite(instantTime, WriteOperationType.INSERT_OVERWRITE, table.getMetaClient());
|
||||
// create the write handle if not exists
|
||||
@@ -206,7 +205,7 @@ public class HoodieFlinkWriteClient<T extends HoodieRecordPayload> extends
|
||||
*/
|
||||
public List<WriteStatus> insertOverwriteTable(
|
||||
List<HoodieRecord<T>> records, final String instantTime) {
|
||||
HoodieTable table = getTableAndInitCtx(WriteOperationType.INSERT_OVERWRITE_TABLE, instantTime);
|
||||
HoodieTable table = initTable(WriteOperationType.INSERT_OVERWRITE_TABLE, Option.ofNullable(instantTime));
|
||||
table.validateInsertSchema();
|
||||
preWrite(instantTime, WriteOperationType.INSERT_OVERWRITE_TABLE, table.getMetaClient());
|
||||
// create the write handle if not exists
|
||||
@@ -239,7 +238,7 @@ public class HoodieFlinkWriteClient<T extends HoodieRecordPayload> extends
|
||||
@Override
|
||||
public List<WriteStatus> delete(List<HoodieKey> keys, String instantTime) {
|
||||
HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> table =
|
||||
getTableAndInitCtx(WriteOperationType.DELETE, instantTime);
|
||||
initTable(WriteOperationType.DELETE, Option.ofNullable(instantTime));
|
||||
preWrite(instantTime, WriteOperationType.DELETE, table.getMetaClient());
|
||||
HoodieWriteMetadata<List<WriteStatus>> result = table.delete(context, instantTime, keys);
|
||||
return postWrite(result, instantTime, table);
|
||||
@@ -397,11 +396,9 @@ public class HoodieFlinkWriteClient<T extends HoodieRecordPayload> extends
|
||||
}
|
||||
|
||||
@Override
|
||||
protected HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> getTableAndInitCtx(WriteOperationType operationType, String instantTime) {
|
||||
HoodieTableMetaClient metaClient = createMetaClient(true);
|
||||
new UpgradeDowngrade(metaClient, config, context, FlinkUpgradeDowngradeHelper.getInstance())
|
||||
.run(HoodieTableVersion.current(), instantTime);
|
||||
return getTableAndInitCtx(metaClient, operationType);
|
||||
protected HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> doInitTable(HoodieTableMetaClient metaClient, Option<String> instantTime) {
|
||||
// Create a Hoodie table which encapsulated the commits and files visible
|
||||
return getHoodieTable();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -488,20 +485,6 @@ public class HoodieFlinkWriteClient<T extends HoodieRecordPayload> extends
|
||||
return writeHandle;
|
||||
}
|
||||
|
||||
private HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> getTableAndInitCtx(HoodieTableMetaClient metaClient, WriteOperationType operationType) {
|
||||
if (operationType == WriteOperationType.DELETE) {
|
||||
setWriteSchemaForDeletes(metaClient);
|
||||
}
|
||||
// Create a Hoodie table which encapsulated the commits and files visible
|
||||
HoodieFlinkTable<T> table = getHoodieTable();
|
||||
if (table.getMetaClient().getCommitActionType().equals(HoodieTimeline.COMMIT_ACTION)) {
|
||||
writeTimer = metrics.getCommitCtx();
|
||||
} else {
|
||||
writeTimer = metrics.getDeltaCommitCtx();
|
||||
}
|
||||
return table;
|
||||
}
|
||||
|
||||
public HoodieFlinkTable<T> getHoodieTable() {
|
||||
return HoodieFlinkTable.create(config, (HoodieFlinkEngineContext) context);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user