1
0

[HUDI-2474] Refreshing timeline for every operation in Hudi when metadata is enabled (#3698)

This commit is contained in:
Sivabalan Narayanan
2021-09-28 05:16:52 -04:00
committed by GitHub
parent 9067657a5f
commit f0585facd6
7 changed files with 52 additions and 21 deletions

View File

@@ -220,7 +220,11 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload, I
Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
}
protected abstract HoodieTable<T, I, K, O> createTable(HoodieWriteConfig config, Configuration hadoopConf);
protected HoodieTable<T, I, K, O> createTable(HoodieWriteConfig config, Configuration hadoopConf) {
return createTable(config, hadoopConf, false);
}
protected abstract HoodieTable<T, I, K, O> createTable(HoodieWriteConfig config, Configuration hadoopConf, boolean refreshTimeline);
void emitCommitMetrics(String instantTime, HoodieCommitMetadata metadata, String actionType) {
try {
@@ -272,7 +276,7 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload, I
*/
public void rollbackFailedBootstrap() {
LOG.info("Rolling back pending bootstrap if present");
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf);
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf, config.isMetadataTableEnabled());
HoodieTimeline inflightTimeline = table.getMetaClient().getCommitsTimeline().filterPendingExcludingCompaction();
Option<String> instant = Option.fromJavaOptional(
inflightTimeline.getReverseOrderedInstants().map(HoodieInstant::getTimestamp).findFirst());
@@ -451,6 +455,9 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload, I
protected void runTableServicesInline(HoodieTable<T, I, K, O> table, HoodieCommitMetadata metadata, Option<Map<String, String>> extraMetadata) {
if (config.inlineTableServices()) {
if (config.isMetadataTableEnabled()) {
table.getHoodieView().sync();
}
// Do an inline compaction if enabled
if (config.inlineCompactionEnabled()) {
runAnyPendingCompactions(table);
@@ -515,7 +522,7 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload, I
* @param comment - Comment for the savepoint
*/
public void savepoint(String user, String comment) {
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf);
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf, config.isMetadataTableEnabled());
if (table.getCompletedCommitsTimeline().empty()) {
throw new HoodieSavepointException("Could not savepoint. Commit timeline is empty");
}
@@ -539,7 +546,7 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload, I
* @param comment - Comment for the savepoint
*/
public void savepoint(String instantTime, String user, String comment) {
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf);
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf, config.isMetadataTableEnabled());
table.savepoint(context, instantTime, user, comment);
}
@@ -551,7 +558,7 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload, I
* @return true if the savepoint was deleted successfully
*/
public void deleteSavepoint(String savepointTime) {
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf);
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf, config.isMetadataTableEnabled());
SavepointHelpers.deleteSavepoint(table, savepointTime);
}
@@ -566,7 +573,7 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload, I
* @return true if the savepoint was restored to successfully
*/
public void restoreToSavepoint(String savepointTime) {
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf);
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf, config.isMetadataTableEnabled());
SavepointHelpers.validateSavepointPresence(table, savepointTime);
restoreToInstant(savepointTime);
SavepointHelpers.validateSavepointRestore(table, savepointTime);
@@ -624,7 +631,7 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload, I
final String restoreInstantTime = HoodieActiveTimeline.createNewInstantTime();
Timer.Context timerContext = metrics.getRollbackCtx();
try {
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf);
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf, config.isMetadataTableEnabled());
HoodieRestoreMetadata restoreMetadata = table.restore(context, restoreInstantTime, instantTime);
if (timerContext != null) {
final long durationInMs = metrics.getDurationInMs(timerContext.stop());
@@ -957,17 +964,17 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload, I
switch (tableServiceType) {
case CLUSTER:
LOG.info("Scheduling clustering at instant time :" + instantTime);
Option<HoodieClusteringPlan> clusteringPlan = createTable(config, hadoopConf)
Option<HoodieClusteringPlan> clusteringPlan = createTable(config, hadoopConf, config.isMetadataTableEnabled())
.scheduleClustering(context, instantTime, extraMetadata);
return clusteringPlan.isPresent() ? Option.of(instantTime) : Option.empty();
case COMPACT:
LOG.info("Scheduling compaction at instant time :" + instantTime);
Option<HoodieCompactionPlan> compactionPlan = createTable(config, hadoopConf)
Option<HoodieCompactionPlan> compactionPlan = createTable(config, hadoopConf, config.isMetadataTableEnabled())
.scheduleCompaction(context, instantTime, extraMetadata);
return compactionPlan.isPresent() ? Option.of(instantTime) : Option.empty();
case CLEAN:
LOG.info("Scheduling cleaning at instant time :" + instantTime);
Option<HoodieCleanerPlan> cleanerPlan = createTable(config, hadoopConf)
Option<HoodieCleanerPlan> cleanerPlan = createTable(config, hadoopConf, config.isMetadataTableEnabled())
.scheduleCleaning(context, instantTime, extraMetadata);
return cleanerPlan.isPresent() ? Option.of(instantTime) : Option.empty();
default:

View File

@@ -121,7 +121,8 @@ public class HoodieFlinkWriteClient<T extends HoodieRecordPayload> extends
}
@Override
protected HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> createTable(HoodieWriteConfig config, Configuration hadoopConf) {
protected HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> createTable(HoodieWriteConfig config, Configuration hadoopConf,
boolean refreshTimeline) {
return HoodieFlinkTable.create(config, (HoodieFlinkEngineContext) context);
}

View File

@@ -89,7 +89,8 @@ public class HoodieJavaWriteClient<T extends HoodieRecordPayload> extends
@Override
protected HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> createTable(HoodieWriteConfig config,
Configuration hadoopConf) {
Configuration hadoopConf,
boolean refreshTimeline) {
return HoodieJavaTable.create(config, context);
}

View File

@@ -126,8 +126,9 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
@Override
protected HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> createTable(HoodieWriteConfig config,
Configuration hadoopConf) {
return HoodieSparkTable.create(config, context);
Configuration hadoopConf,
boolean refreshTimeline) {
return HoodieSparkTable.create(config, context, refreshTimeline);
}
@Override
@@ -319,7 +320,7 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
@Override
protected JavaRDD<WriteStatus> compact(String compactionInstantTime, boolean shouldComplete) {
HoodieSparkTable<T> table = HoodieSparkTable.create(config, context);
HoodieSparkTable<T> table = HoodieSparkTable.create(config, context, config.isMetadataTableEnabled());
preWrite(compactionInstantTime, WriteOperationType.COMPACT, table.getMetaClient());
HoodieTimeline pendingCompactionTimeline = table.getActiveTimeline().filterPendingCompactionTimeline();
HoodieInstant inflightInstant = HoodieTimeline.getCompactionInflightInstant(compactionInstantTime);
@@ -338,7 +339,7 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
@Override
public HoodieWriteMetadata<JavaRDD<WriteStatus>> cluster(String clusteringInstant, boolean shouldComplete) {
HoodieSparkTable<T> table = HoodieSparkTable.create(config, context);
HoodieSparkTable<T> table = HoodieSparkTable.create(config, context, config.isMetadataTableEnabled());
preWrite(clusteringInstant, WriteOperationType.CLUSTER, table.getMetaClient());
HoodieTimeline pendingClusteringTimeline = table.getActiveTimeline().filterPendingReplaceTimeline();
HoodieInstant inflightInstant = HoodieTimeline.getReplaceCommitInflightInstant(clusteringInstant);
@@ -438,7 +439,7 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
setWriteSchemaForDeletes(metaClient);
}
// Create a Hoodie table which encapsulated the commits and files visible
HoodieSparkTable<T> table = HoodieSparkTable.create(config, (HoodieSparkEngineContext) context, metaClient);
HoodieSparkTable<T> table = HoodieSparkTable.create(config, (HoodieSparkEngineContext) context, metaClient, config.isMetadataTableEnabled());
if (table.getMetaClient().getCommitActionType().equals(HoodieTimeline.COMMIT_ACTION)) {
writeTimer = metrics.getCommitCtx();
} else {

View File

@@ -42,24 +42,43 @@ public abstract class HoodieSparkTable<T extends HoodieRecordPayload>
}
public static <T extends HoodieRecordPayload> HoodieSparkTable<T> create(HoodieWriteConfig config, HoodieEngineContext context) {
return create(config, context, false);
}
public static <T extends HoodieRecordPayload> HoodieSparkTable<T> create(HoodieWriteConfig config, HoodieEngineContext context,
boolean refreshTimeline) {
HoodieTableMetaClient metaClient =
HoodieTableMetaClient.builder().setConf(context.getHadoopConf().get()).setBasePath(config.getBasePath())
.setLoadActiveTimelineOnLoad(true).setConsistencyGuardConfig(config.getConsistencyGuardConfig())
.setLayoutVersion(Option.of(new TimelineLayoutVersion(config.getTimelineLayoutVersion()))).build();
return HoodieSparkTable.create(config, (HoodieSparkEngineContext) context, metaClient);
return HoodieSparkTable.create(config, (HoodieSparkEngineContext) context, metaClient, refreshTimeline);
}
public static <T extends HoodieRecordPayload> HoodieSparkTable<T> create(HoodieWriteConfig config,
HoodieSparkEngineContext context,
HoodieTableMetaClient metaClient) {
return create(config, context, metaClient, false);
}
public static <T extends HoodieRecordPayload> HoodieSparkTable<T> create(HoodieWriteConfig config,
HoodieSparkEngineContext context,
HoodieTableMetaClient metaClient,
boolean refreshTimeline) {
HoodieSparkTable hoodieSparkTable;
switch (metaClient.getTableType()) {
case COPY_ON_WRITE:
return new HoodieSparkCopyOnWriteTable<>(config, context, metaClient);
hoodieSparkTable = new HoodieSparkCopyOnWriteTable<>(config, context, metaClient);
break;
case MERGE_ON_READ:
return new HoodieSparkMergeOnReadTable<>(config, context, metaClient);
hoodieSparkTable = new HoodieSparkMergeOnReadTable<>(config, context, metaClient);
break;
default:
throw new HoodieException("Unsupported table type :" + metaClient.getTableType());
}
if (refreshTimeline) {
hoodieSparkTable.getHoodieView().sync();
}
return hoodieSparkTable;
}
@Override

View File

@@ -71,6 +71,7 @@ import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaRDD;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Tag;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
@@ -547,6 +548,7 @@ public class TestHoodieBackedMetadata extends HoodieClientTestHarness {
*/
@ParameterizedTest
@EnumSource(HoodieTableType.class)
@Disabled
public void testCleaningArchivingAndCompaction(HoodieTableType tableType) throws Exception {
init(tableType);
doWriteOperationsAndBootstrapMetadata(testTable);

View File

@@ -479,7 +479,7 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
@Override
public void sync() {
// noop
refresh();
}
@Override