1
0

[HUDI-2474] Refreshing timeline for every operation in Hudi when metadata is enabled (#3698)

This commit is contained in:
Sivabalan Narayanan
2021-09-28 05:16:52 -04:00
committed by GitHub
parent 9067657a5f
commit f0585facd6
7 changed files with 52 additions and 21 deletions

View File

@@ -126,8 +126,9 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
@Override
protected HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> createTable(HoodieWriteConfig config,
Configuration hadoopConf) {
return HoodieSparkTable.create(config, context);
Configuration hadoopConf,
boolean refreshTimeline) {
return HoodieSparkTable.create(config, context, refreshTimeline);
}
@Override
@@ -319,7 +320,7 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
@Override
protected JavaRDD<WriteStatus> compact(String compactionInstantTime, boolean shouldComplete) {
HoodieSparkTable<T> table = HoodieSparkTable.create(config, context);
HoodieSparkTable<T> table = HoodieSparkTable.create(config, context, config.isMetadataTableEnabled());
preWrite(compactionInstantTime, WriteOperationType.COMPACT, table.getMetaClient());
HoodieTimeline pendingCompactionTimeline = table.getActiveTimeline().filterPendingCompactionTimeline();
HoodieInstant inflightInstant = HoodieTimeline.getCompactionInflightInstant(compactionInstantTime);
@@ -338,7 +339,7 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
@Override
public HoodieWriteMetadata<JavaRDD<WriteStatus>> cluster(String clusteringInstant, boolean shouldComplete) {
HoodieSparkTable<T> table = HoodieSparkTable.create(config, context);
HoodieSparkTable<T> table = HoodieSparkTable.create(config, context, config.isMetadataTableEnabled());
preWrite(clusteringInstant, WriteOperationType.CLUSTER, table.getMetaClient());
HoodieTimeline pendingClusteringTimeline = table.getActiveTimeline().filterPendingReplaceTimeline();
HoodieInstant inflightInstant = HoodieTimeline.getReplaceCommitInflightInstant(clusteringInstant);
@@ -438,7 +439,7 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
setWriteSchemaForDeletes(metaClient);
}
// Create a Hoodie table which encapsulated the commits and files visible
HoodieSparkTable<T> table = HoodieSparkTable.create(config, (HoodieSparkEngineContext) context, metaClient);
HoodieSparkTable<T> table = HoodieSparkTable.create(config, (HoodieSparkEngineContext) context, metaClient, config.isMetadataTableEnabled());
if (table.getMetaClient().getCommitActionType().equals(HoodieTimeline.COMMIT_ACTION)) {
writeTimer = metrics.getCommitCtx();
} else {

View File

@@ -42,24 +42,43 @@ public abstract class HoodieSparkTable<T extends HoodieRecordPayload>
}
public static <T extends HoodieRecordPayload> HoodieSparkTable<T> create(HoodieWriteConfig config, HoodieEngineContext context) {
return create(config, context, false);
}
public static <T extends HoodieRecordPayload> HoodieSparkTable<T> create(HoodieWriteConfig config, HoodieEngineContext context,
boolean refreshTimeline) {
HoodieTableMetaClient metaClient =
HoodieTableMetaClient.builder().setConf(context.getHadoopConf().get()).setBasePath(config.getBasePath())
.setLoadActiveTimelineOnLoad(true).setConsistencyGuardConfig(config.getConsistencyGuardConfig())
.setLayoutVersion(Option.of(new TimelineLayoutVersion(config.getTimelineLayoutVersion()))).build();
return HoodieSparkTable.create(config, (HoodieSparkEngineContext) context, metaClient);
return HoodieSparkTable.create(config, (HoodieSparkEngineContext) context, metaClient, refreshTimeline);
}
public static <T extends HoodieRecordPayload> HoodieSparkTable<T> create(HoodieWriteConfig config,
HoodieSparkEngineContext context,
HoodieTableMetaClient metaClient) {
return create(config, context, metaClient, false);
}
public static <T extends HoodieRecordPayload> HoodieSparkTable<T> create(HoodieWriteConfig config,
HoodieSparkEngineContext context,
HoodieTableMetaClient metaClient,
boolean refreshTimeline) {
HoodieSparkTable hoodieSparkTable;
switch (metaClient.getTableType()) {
case COPY_ON_WRITE:
return new HoodieSparkCopyOnWriteTable<>(config, context, metaClient);
hoodieSparkTable = new HoodieSparkCopyOnWriteTable<>(config, context, metaClient);
break;
case MERGE_ON_READ:
return new HoodieSparkMergeOnReadTable<>(config, context, metaClient);
hoodieSparkTable = new HoodieSparkMergeOnReadTable<>(config, context, metaClient);
break;
default:
throw new HoodieException("Unsupported table type :" + metaClient.getTableType());
}
if (refreshTimeline) {
hoodieSparkTable.getHoodieView().sync();
}
return hoodieSparkTable;
}
@Override