[HUDI-2474] Refreshing timeline for every operation in Hudi when metadata is enabled (#3698)
This commit is contained in:
committed by
GitHub
parent
9067657a5f
commit
f0585facd6
@@ -220,7 +220,11 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload, I
|
||||
Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
|
||||
}
|
||||
|
||||
protected abstract HoodieTable<T, I, K, O> createTable(HoodieWriteConfig config, Configuration hadoopConf);
|
||||
protected HoodieTable<T, I, K, O> createTable(HoodieWriteConfig config, Configuration hadoopConf) {
|
||||
return createTable(config, hadoopConf, false);
|
||||
}
|
||||
|
||||
protected abstract HoodieTable<T, I, K, O> createTable(HoodieWriteConfig config, Configuration hadoopConf, boolean refreshTimeline);
|
||||
|
||||
void emitCommitMetrics(String instantTime, HoodieCommitMetadata metadata, String actionType) {
|
||||
try {
|
||||
@@ -272,7 +276,7 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload, I
|
||||
*/
|
||||
public void rollbackFailedBootstrap() {
|
||||
LOG.info("Rolling back pending bootstrap if present");
|
||||
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf);
|
||||
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf, config.isMetadataTableEnabled());
|
||||
HoodieTimeline inflightTimeline = table.getMetaClient().getCommitsTimeline().filterPendingExcludingCompaction();
|
||||
Option<String> instant = Option.fromJavaOptional(
|
||||
inflightTimeline.getReverseOrderedInstants().map(HoodieInstant::getTimestamp).findFirst());
|
||||
@@ -451,6 +455,9 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload, I
|
||||
|
||||
protected void runTableServicesInline(HoodieTable<T, I, K, O> table, HoodieCommitMetadata metadata, Option<Map<String, String>> extraMetadata) {
|
||||
if (config.inlineTableServices()) {
|
||||
if (config.isMetadataTableEnabled()) {
|
||||
table.getHoodieView().sync();
|
||||
}
|
||||
// Do an inline compaction if enabled
|
||||
if (config.inlineCompactionEnabled()) {
|
||||
runAnyPendingCompactions(table);
|
||||
@@ -515,7 +522,7 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload, I
|
||||
* @param comment - Comment for the savepoint
|
||||
*/
|
||||
public void savepoint(String user, String comment) {
|
||||
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf);
|
||||
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf, config.isMetadataTableEnabled());
|
||||
if (table.getCompletedCommitsTimeline().empty()) {
|
||||
throw new HoodieSavepointException("Could not savepoint. Commit timeline is empty");
|
||||
}
|
||||
@@ -539,7 +546,7 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload, I
|
||||
* @param comment - Comment for the savepoint
|
||||
*/
|
||||
public void savepoint(String instantTime, String user, String comment) {
|
||||
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf);
|
||||
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf, config.isMetadataTableEnabled());
|
||||
table.savepoint(context, instantTime, user, comment);
|
||||
}
|
||||
|
||||
@@ -551,7 +558,7 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload, I
|
||||
* @return true if the savepoint was deleted successfully
|
||||
*/
|
||||
public void deleteSavepoint(String savepointTime) {
|
||||
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf);
|
||||
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf, config.isMetadataTableEnabled());
|
||||
SavepointHelpers.deleteSavepoint(table, savepointTime);
|
||||
}
|
||||
|
||||
@@ -566,7 +573,7 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload, I
|
||||
* @return true if the savepoint was restored to successfully
|
||||
*/
|
||||
public void restoreToSavepoint(String savepointTime) {
|
||||
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf);
|
||||
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf, config.isMetadataTableEnabled());
|
||||
SavepointHelpers.validateSavepointPresence(table, savepointTime);
|
||||
restoreToInstant(savepointTime);
|
||||
SavepointHelpers.validateSavepointRestore(table, savepointTime);
|
||||
@@ -624,7 +631,7 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload, I
|
||||
final String restoreInstantTime = HoodieActiveTimeline.createNewInstantTime();
|
||||
Timer.Context timerContext = metrics.getRollbackCtx();
|
||||
try {
|
||||
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf);
|
||||
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf, config.isMetadataTableEnabled());
|
||||
HoodieRestoreMetadata restoreMetadata = table.restore(context, restoreInstantTime, instantTime);
|
||||
if (timerContext != null) {
|
||||
final long durationInMs = metrics.getDurationInMs(timerContext.stop());
|
||||
@@ -957,17 +964,17 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload, I
|
||||
switch (tableServiceType) {
|
||||
case CLUSTER:
|
||||
LOG.info("Scheduling clustering at instant time :" + instantTime);
|
||||
Option<HoodieClusteringPlan> clusteringPlan = createTable(config, hadoopConf)
|
||||
Option<HoodieClusteringPlan> clusteringPlan = createTable(config, hadoopConf, config.isMetadataTableEnabled())
|
||||
.scheduleClustering(context, instantTime, extraMetadata);
|
||||
return clusteringPlan.isPresent() ? Option.of(instantTime) : Option.empty();
|
||||
case COMPACT:
|
||||
LOG.info("Scheduling compaction at instant time :" + instantTime);
|
||||
Option<HoodieCompactionPlan> compactionPlan = createTable(config, hadoopConf)
|
||||
Option<HoodieCompactionPlan> compactionPlan = createTable(config, hadoopConf, config.isMetadataTableEnabled())
|
||||
.scheduleCompaction(context, instantTime, extraMetadata);
|
||||
return compactionPlan.isPresent() ? Option.of(instantTime) : Option.empty();
|
||||
case CLEAN:
|
||||
LOG.info("Scheduling cleaning at instant time :" + instantTime);
|
||||
Option<HoodieCleanerPlan> cleanerPlan = createTable(config, hadoopConf)
|
||||
Option<HoodieCleanerPlan> cleanerPlan = createTable(config, hadoopConf, config.isMetadataTableEnabled())
|
||||
.scheduleCleaning(context, instantTime, extraMetadata);
|
||||
return cleanerPlan.isPresent() ? Option.of(instantTime) : Option.empty();
|
||||
default:
|
||||
|
||||
@@ -121,7 +121,8 @@ public class HoodieFlinkWriteClient<T extends HoodieRecordPayload> extends
|
||||
}
|
||||
|
||||
@Override
|
||||
protected HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> createTable(HoodieWriteConfig config, Configuration hadoopConf) {
|
||||
protected HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> createTable(HoodieWriteConfig config, Configuration hadoopConf,
|
||||
boolean refreshTimeline) {
|
||||
return HoodieFlinkTable.create(config, (HoodieFlinkEngineContext) context);
|
||||
}
|
||||
|
||||
|
||||
@@ -89,7 +89,8 @@ public class HoodieJavaWriteClient<T extends HoodieRecordPayload> extends
|
||||
|
||||
@Override
|
||||
protected HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> createTable(HoodieWriteConfig config,
|
||||
Configuration hadoopConf) {
|
||||
Configuration hadoopConf,
|
||||
boolean refreshTimeline) {
|
||||
return HoodieJavaTable.create(config, context);
|
||||
}
|
||||
|
||||
|
||||
@@ -126,8 +126,9 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
|
||||
|
||||
@Override
|
||||
protected HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> createTable(HoodieWriteConfig config,
|
||||
Configuration hadoopConf) {
|
||||
return HoodieSparkTable.create(config, context);
|
||||
Configuration hadoopConf,
|
||||
boolean refreshTimeline) {
|
||||
return HoodieSparkTable.create(config, context, refreshTimeline);
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -319,7 +320,7 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
|
||||
|
||||
@Override
|
||||
protected JavaRDD<WriteStatus> compact(String compactionInstantTime, boolean shouldComplete) {
|
||||
HoodieSparkTable<T> table = HoodieSparkTable.create(config, context);
|
||||
HoodieSparkTable<T> table = HoodieSparkTable.create(config, context, config.isMetadataTableEnabled());
|
||||
preWrite(compactionInstantTime, WriteOperationType.COMPACT, table.getMetaClient());
|
||||
HoodieTimeline pendingCompactionTimeline = table.getActiveTimeline().filterPendingCompactionTimeline();
|
||||
HoodieInstant inflightInstant = HoodieTimeline.getCompactionInflightInstant(compactionInstantTime);
|
||||
@@ -338,7 +339,7 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
|
||||
|
||||
@Override
|
||||
public HoodieWriteMetadata<JavaRDD<WriteStatus>> cluster(String clusteringInstant, boolean shouldComplete) {
|
||||
HoodieSparkTable<T> table = HoodieSparkTable.create(config, context);
|
||||
HoodieSparkTable<T> table = HoodieSparkTable.create(config, context, config.isMetadataTableEnabled());
|
||||
preWrite(clusteringInstant, WriteOperationType.CLUSTER, table.getMetaClient());
|
||||
HoodieTimeline pendingClusteringTimeline = table.getActiveTimeline().filterPendingReplaceTimeline();
|
||||
HoodieInstant inflightInstant = HoodieTimeline.getReplaceCommitInflightInstant(clusteringInstant);
|
||||
@@ -438,7 +439,7 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
|
||||
setWriteSchemaForDeletes(metaClient);
|
||||
}
|
||||
// Create a Hoodie table which encapsulated the commits and files visible
|
||||
HoodieSparkTable<T> table = HoodieSparkTable.create(config, (HoodieSparkEngineContext) context, metaClient);
|
||||
HoodieSparkTable<T> table = HoodieSparkTable.create(config, (HoodieSparkEngineContext) context, metaClient, config.isMetadataTableEnabled());
|
||||
if (table.getMetaClient().getCommitActionType().equals(HoodieTimeline.COMMIT_ACTION)) {
|
||||
writeTimer = metrics.getCommitCtx();
|
||||
} else {
|
||||
|
||||
@@ -42,24 +42,43 @@ public abstract class HoodieSparkTable<T extends HoodieRecordPayload>
|
||||
}
|
||||
|
||||
public static <T extends HoodieRecordPayload> HoodieSparkTable<T> create(HoodieWriteConfig config, HoodieEngineContext context) {
|
||||
return create(config, context, false);
|
||||
}
|
||||
|
||||
public static <T extends HoodieRecordPayload> HoodieSparkTable<T> create(HoodieWriteConfig config, HoodieEngineContext context,
|
||||
boolean refreshTimeline) {
|
||||
HoodieTableMetaClient metaClient =
|
||||
HoodieTableMetaClient.builder().setConf(context.getHadoopConf().get()).setBasePath(config.getBasePath())
|
||||
.setLoadActiveTimelineOnLoad(true).setConsistencyGuardConfig(config.getConsistencyGuardConfig())
|
||||
.setLayoutVersion(Option.of(new TimelineLayoutVersion(config.getTimelineLayoutVersion()))).build();
|
||||
return HoodieSparkTable.create(config, (HoodieSparkEngineContext) context, metaClient);
|
||||
return HoodieSparkTable.create(config, (HoodieSparkEngineContext) context, metaClient, refreshTimeline);
|
||||
}
|
||||
|
||||
public static <T extends HoodieRecordPayload> HoodieSparkTable<T> create(HoodieWriteConfig config,
|
||||
HoodieSparkEngineContext context,
|
||||
HoodieTableMetaClient metaClient) {
|
||||
return create(config, context, metaClient, false);
|
||||
}
|
||||
|
||||
public static <T extends HoodieRecordPayload> HoodieSparkTable<T> create(HoodieWriteConfig config,
|
||||
HoodieSparkEngineContext context,
|
||||
HoodieTableMetaClient metaClient,
|
||||
boolean refreshTimeline) {
|
||||
HoodieSparkTable hoodieSparkTable;
|
||||
switch (metaClient.getTableType()) {
|
||||
case COPY_ON_WRITE:
|
||||
return new HoodieSparkCopyOnWriteTable<>(config, context, metaClient);
|
||||
hoodieSparkTable = new HoodieSparkCopyOnWriteTable<>(config, context, metaClient);
|
||||
break;
|
||||
case MERGE_ON_READ:
|
||||
return new HoodieSparkMergeOnReadTable<>(config, context, metaClient);
|
||||
hoodieSparkTable = new HoodieSparkMergeOnReadTable<>(config, context, metaClient);
|
||||
break;
|
||||
default:
|
||||
throw new HoodieException("Unsupported table type :" + metaClient.getTableType());
|
||||
}
|
||||
if (refreshTimeline) {
|
||||
hoodieSparkTable.getHoodieView().sync();
|
||||
}
|
||||
return hoodieSparkTable;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@@ -71,6 +71,7 @@ import org.apache.log4j.Logger;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Disabled;
|
||||
import org.junit.jupiter.api.Tag;
|
||||
import org.junit.jupiter.params.ParameterizedTest;
|
||||
import org.junit.jupiter.params.provider.Arguments;
|
||||
@@ -547,6 +548,7 @@ public class TestHoodieBackedMetadata extends HoodieClientTestHarness {
|
||||
*/
|
||||
@ParameterizedTest
|
||||
@EnumSource(HoodieTableType.class)
|
||||
@Disabled
|
||||
public void testCleaningArchivingAndCompaction(HoodieTableType tableType) throws Exception {
|
||||
init(tableType);
|
||||
doWriteOperationsAndBootstrapMetadata(testTable);
|
||||
|
||||
@@ -479,7 +479,7 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
|
||||
|
||||
@Override
|
||||
public void sync() {
|
||||
// noop
|
||||
refresh();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
Reference in New Issue
Block a user