1
0

[HUDI-4167] Remove the timeline refresh with initializing hoodie table (#5716)

The timeline refresh on table initialization invokes the fs view #sync, which has two actions now:

1. reload the timeline of the fs view, so that the next fs view request is based on this timeline metadata
2. if this is a local fs view, clear all the local states; if this is a remote fs view, send request to sync the remote fs view

But, let's see the construction, the meta client is instantiated freshly so the timeline is already the latest,
the table is also constructed freshly, so the fs view has no local states, that means, the #sync is unnecessary totally.

In this patch, the metadata lifecycle and data set fs view are kept in sync, when the fs view is refreshed, the underneath metadata
is also refreshed synchronouly. The freshness of the metadata follows the same rules as data fs view:

1. if the fs view is local, the visibility is based on the client table metadata client's latest commit
2. if the fs view is remote, the timeline server would #sync the fs view and metadata together based on the lagging server local timeline

From the perspective of client, no need to care about the refresh action anymore no matter whether the metadata table is enabled or not.
That make the client logic more clear and less error-prone.

Removes the timeline refresh has another benefit: if avoids unncecessary #refresh of the remote fs view, if all the clients send request to #sync the
remote fs view, the server would encounter conflicts and the client encounters a response error.
This commit is contained in:
Danny Chan
2022-06-02 09:48:48 +08:00
committed by GitHub
parent 7276d0eaa6
commit 7f8630cc57
16 changed files with 59 additions and 67 deletions

View File

@@ -296,11 +296,7 @@ public abstract class BaseHoodieWriteClient<T extends HoodieRecordPayload, I, K,
}
}
protected HoodieTable createTable(HoodieWriteConfig config, Configuration hadoopConf) {
return createTable(config, hadoopConf, false);
}
protected abstract HoodieTable createTable(HoodieWriteConfig config, Configuration hadoopConf, boolean refreshTimeline);
protected abstract HoodieTable<T, I, K, O> createTable(HoodieWriteConfig config, Configuration hadoopConf);
void emitCommitMetrics(String instantTime, HoodieCommitMetadata metadata, String actionType) {
try {
@@ -365,7 +361,7 @@ public abstract class BaseHoodieWriteClient<T extends HoodieRecordPayload, I, K,
*/
protected void rollbackFailedBootstrap() {
LOG.info("Rolling back pending bootstrap if present");
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf, config.isMetadataTableEnabled());
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf);
HoodieTimeline inflightTimeline = table.getMetaClient().getCommitsTimeline().filterPendingExcludingCompaction();
Option<String> instant = Option.fromJavaOptional(
inflightTimeline.getReverseOrderedInstants().map(HoodieInstant::getTimestamp).findFirst());
@@ -634,7 +630,7 @@ public abstract class BaseHoodieWriteClient<T extends HoodieRecordPayload, I, K,
* Run any pending compactions.
*/
public void runAnyPendingCompactions() {
runAnyPendingCompactions(createTable(config, hadoopConf, config.isMetadataTableEnabled()));
runAnyPendingCompactions(createTable(config, hadoopConf));
}
/**
@@ -644,7 +640,7 @@ public abstract class BaseHoodieWriteClient<T extends HoodieRecordPayload, I, K,
* @param comment - Comment for the savepoint
*/
public void savepoint(String user, String comment) {
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf, config.isMetadataTableEnabled());
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf);
if (table.getCompletedCommitsTimeline().empty()) {
throw new HoodieSavepointException("Could not savepoint. Commit timeline is empty");
}
@@ -668,7 +664,7 @@ public abstract class BaseHoodieWriteClient<T extends HoodieRecordPayload, I, K,
* @param comment - Comment for the savepoint
*/
public void savepoint(String instantTime, String user, String comment) {
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf, config.isMetadataTableEnabled());
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf);
table.savepoint(context, instantTime, user, comment);
}
@@ -680,7 +676,7 @@ public abstract class BaseHoodieWriteClient<T extends HoodieRecordPayload, I, K,
* @return true if the savepoint was deleted successfully
*/
public void deleteSavepoint(String savepointTime) {
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf, config.isMetadataTableEnabled());
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf);
SavepointHelpers.deleteSavepoint(table, savepointTime);
}
@@ -1012,7 +1008,7 @@ public abstract class BaseHoodieWriteClient<T extends HoodieRecordPayload, I, K,
*/
public Option<String> scheduleIndexing(List<MetadataPartitionType> partitionTypes) {
String instantTime = HoodieActiveTimeline.createNewInstantTime();
Option<HoodieIndexPlan> indexPlan = createTable(config, hadoopConf, config.isMetadataTableEnabled())
Option<HoodieIndexPlan> indexPlan = createTable(config, hadoopConf)
.scheduleIndexing(context, instantTime, partitionTypes);
return indexPlan.isPresent() ? Option.of(instantTime) : Option.empty();
}
@@ -1024,7 +1020,7 @@ public abstract class BaseHoodieWriteClient<T extends HoodieRecordPayload, I, K,
* @return {@link Option<HoodieIndexCommitMetadata>} after successful indexing.
*/
public Option<HoodieIndexCommitMetadata> index(String indexInstantTime) {
return createTable(config, hadoopConf, config.isMetadataTableEnabled()).index(context, indexInstantTime);
return createTable(config, hadoopConf).index(context, indexInstantTime);
}
/**
@@ -1339,17 +1335,17 @@ public abstract class BaseHoodieWriteClient<T extends HoodieRecordPayload, I, K,
return Option.empty();
case CLUSTER:
LOG.info("Scheduling clustering at instant time :" + instantTime);
Option<HoodieClusteringPlan> clusteringPlan = createTable(config, hadoopConf, config.isMetadataTableEnabled())
Option<HoodieClusteringPlan> clusteringPlan = createTable(config, hadoopConf)
.scheduleClustering(context, instantTime, extraMetadata);
return clusteringPlan.isPresent() ? Option.of(instantTime) : Option.empty();
case COMPACT:
LOG.info("Scheduling compaction at instant time :" + instantTime);
Option<HoodieCompactionPlan> compactionPlan = createTable(config, hadoopConf, config.isMetadataTableEnabled())
Option<HoodieCompactionPlan> compactionPlan = createTable(config, hadoopConf)
.scheduleCompaction(context, instantTime, extraMetadata);
return compactionPlan.isPresent() ? Option.of(instantTime) : Option.empty();
case CLEAN:
LOG.info("Scheduling cleaning at instant time :" + instantTime);
Option<HoodieCleanerPlan> cleanerPlan = createTable(config, hadoopConf, config.isMetadataTableEnabled())
Option<HoodieCleanerPlan> cleanerPlan = createTable(config, hadoopConf)
.scheduleCleaning(context, instantTime, extraMetadata);
return cleanerPlan.isPresent() ? Option.of(instantTime) : Option.empty();
default:
@@ -1702,6 +1698,6 @@ public abstract class BaseHoodieWriteClient<T extends HoodieRecordPayload, I, K,
// try to save history schemas
FileBasedInternalSchemaStorageManager schemasManager = new FileBasedInternalSchemaStorageManager(metaClient);
schemasManager.persistHistorySchemaStr(instantTime, SerDeHelper.inheritSchemas(newSchema, historySchemaStr));
commitStats(instantTime, Collections.EMPTY_LIST, Option.of(extraMeta), commitActionType);
commitStats(instantTime, Collections.emptyList(), Option.of(extraMeta), commitActionType);
}
}

View File

@@ -364,8 +364,8 @@ public class HoodieWriteConfig extends HoodieConfig {
public static final ConfigProperty<Boolean> REFRESH_TIMELINE_SERVER_BASED_ON_LATEST_COMMIT = ConfigProperty
.key("hoodie.refresh.timeline.server.based.on.latest.commit")
.defaultValue(false)
.withDocumentation("Refresh timeline in timeline server based on latest commit apart from timeline hash difference. By default (false), ");
.defaultValue(true)
.withDocumentation("Refresh timeline in timeline server based on latest commit apart from timeline hash difference. By default (true).");
public static final ConfigProperty<Long> INITIAL_CONSISTENCY_CHECK_INTERVAL_MS = ConfigProperty
.key("hoodie.consistency.check.initial_interval_ms")
@@ -2499,6 +2499,11 @@ public class HoodieWriteConfig extends HoodieConfig {
return this;
}
public Builder withRefreshTimelineServerBasedOnLatestCommit(boolean refreshTimelineServerBasedOnLatestCommit) {
writeConfig.setValue(REFRESH_TIMELINE_SERVER_BASED_ON_LATEST_COMMIT, Boolean.toString(refreshTimelineServerBasedOnLatestCommit));
return this;
}
protected void setDefaults() {
writeConfig.setDefaultValue(MARKERS_TYPE, getDefaultMarkersType(engineType));
// Check for mandatory properties