[HUDI-2474] Refreshing timeline for every operation in Hudi when metadata is enabled (#3698)
This commit is contained in:
committed by
GitHub
parent
9067657a5f
commit
f0585facd6
@@ -220,7 +220,11 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload, I
|
|||||||
Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
|
Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
|
||||||
}
|
}
|
||||||
|
|
||||||
protected abstract HoodieTable<T, I, K, O> createTable(HoodieWriteConfig config, Configuration hadoopConf);
|
protected HoodieTable<T, I, K, O> createTable(HoodieWriteConfig config, Configuration hadoopConf) {
|
||||||
|
return createTable(config, hadoopConf, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected abstract HoodieTable<T, I, K, O> createTable(HoodieWriteConfig config, Configuration hadoopConf, boolean refreshTimeline);
|
||||||
|
|
||||||
void emitCommitMetrics(String instantTime, HoodieCommitMetadata metadata, String actionType) {
|
void emitCommitMetrics(String instantTime, HoodieCommitMetadata metadata, String actionType) {
|
||||||
try {
|
try {
|
||||||
@@ -272,7 +276,7 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload, I
|
|||||||
*/
|
*/
|
||||||
public void rollbackFailedBootstrap() {
|
public void rollbackFailedBootstrap() {
|
||||||
LOG.info("Rolling back pending bootstrap if present");
|
LOG.info("Rolling back pending bootstrap if present");
|
||||||
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf);
|
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf, config.isMetadataTableEnabled());
|
||||||
HoodieTimeline inflightTimeline = table.getMetaClient().getCommitsTimeline().filterPendingExcludingCompaction();
|
HoodieTimeline inflightTimeline = table.getMetaClient().getCommitsTimeline().filterPendingExcludingCompaction();
|
||||||
Option<String> instant = Option.fromJavaOptional(
|
Option<String> instant = Option.fromJavaOptional(
|
||||||
inflightTimeline.getReverseOrderedInstants().map(HoodieInstant::getTimestamp).findFirst());
|
inflightTimeline.getReverseOrderedInstants().map(HoodieInstant::getTimestamp).findFirst());
|
||||||
@@ -451,6 +455,9 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload, I
|
|||||||
|
|
||||||
protected void runTableServicesInline(HoodieTable<T, I, K, O> table, HoodieCommitMetadata metadata, Option<Map<String, String>> extraMetadata) {
|
protected void runTableServicesInline(HoodieTable<T, I, K, O> table, HoodieCommitMetadata metadata, Option<Map<String, String>> extraMetadata) {
|
||||||
if (config.inlineTableServices()) {
|
if (config.inlineTableServices()) {
|
||||||
|
if (config.isMetadataTableEnabled()) {
|
||||||
|
table.getHoodieView().sync();
|
||||||
|
}
|
||||||
// Do an inline compaction if enabled
|
// Do an inline compaction if enabled
|
||||||
if (config.inlineCompactionEnabled()) {
|
if (config.inlineCompactionEnabled()) {
|
||||||
runAnyPendingCompactions(table);
|
runAnyPendingCompactions(table);
|
||||||
@@ -515,7 +522,7 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload, I
|
|||||||
* @param comment - Comment for the savepoint
|
* @param comment - Comment for the savepoint
|
||||||
*/
|
*/
|
||||||
public void savepoint(String user, String comment) {
|
public void savepoint(String user, String comment) {
|
||||||
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf);
|
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf, config.isMetadataTableEnabled());
|
||||||
if (table.getCompletedCommitsTimeline().empty()) {
|
if (table.getCompletedCommitsTimeline().empty()) {
|
||||||
throw new HoodieSavepointException("Could not savepoint. Commit timeline is empty");
|
throw new HoodieSavepointException("Could not savepoint. Commit timeline is empty");
|
||||||
}
|
}
|
||||||
@@ -539,7 +546,7 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload, I
|
|||||||
* @param comment - Comment for the savepoint
|
* @param comment - Comment for the savepoint
|
||||||
*/
|
*/
|
||||||
public void savepoint(String instantTime, String user, String comment) {
|
public void savepoint(String instantTime, String user, String comment) {
|
||||||
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf);
|
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf, config.isMetadataTableEnabled());
|
||||||
table.savepoint(context, instantTime, user, comment);
|
table.savepoint(context, instantTime, user, comment);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -551,7 +558,7 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload, I
|
|||||||
* @return true if the savepoint was deleted successfully
|
* @return true if the savepoint was deleted successfully
|
||||||
*/
|
*/
|
||||||
public void deleteSavepoint(String savepointTime) {
|
public void deleteSavepoint(String savepointTime) {
|
||||||
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf);
|
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf, config.isMetadataTableEnabled());
|
||||||
SavepointHelpers.deleteSavepoint(table, savepointTime);
|
SavepointHelpers.deleteSavepoint(table, savepointTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -566,7 +573,7 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload, I
|
|||||||
* @return true if the savepoint was restored to successfully
|
* @return true if the savepoint was restored to successfully
|
||||||
*/
|
*/
|
||||||
public void restoreToSavepoint(String savepointTime) {
|
public void restoreToSavepoint(String savepointTime) {
|
||||||
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf);
|
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf, config.isMetadataTableEnabled());
|
||||||
SavepointHelpers.validateSavepointPresence(table, savepointTime);
|
SavepointHelpers.validateSavepointPresence(table, savepointTime);
|
||||||
restoreToInstant(savepointTime);
|
restoreToInstant(savepointTime);
|
||||||
SavepointHelpers.validateSavepointRestore(table, savepointTime);
|
SavepointHelpers.validateSavepointRestore(table, savepointTime);
|
||||||
@@ -624,7 +631,7 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload, I
|
|||||||
final String restoreInstantTime = HoodieActiveTimeline.createNewInstantTime();
|
final String restoreInstantTime = HoodieActiveTimeline.createNewInstantTime();
|
||||||
Timer.Context timerContext = metrics.getRollbackCtx();
|
Timer.Context timerContext = metrics.getRollbackCtx();
|
||||||
try {
|
try {
|
||||||
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf);
|
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf, config.isMetadataTableEnabled());
|
||||||
HoodieRestoreMetadata restoreMetadata = table.restore(context, restoreInstantTime, instantTime);
|
HoodieRestoreMetadata restoreMetadata = table.restore(context, restoreInstantTime, instantTime);
|
||||||
if (timerContext != null) {
|
if (timerContext != null) {
|
||||||
final long durationInMs = metrics.getDurationInMs(timerContext.stop());
|
final long durationInMs = metrics.getDurationInMs(timerContext.stop());
|
||||||
@@ -957,17 +964,17 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload, I
|
|||||||
switch (tableServiceType) {
|
switch (tableServiceType) {
|
||||||
case CLUSTER:
|
case CLUSTER:
|
||||||
LOG.info("Scheduling clustering at instant time :" + instantTime);
|
LOG.info("Scheduling clustering at instant time :" + instantTime);
|
||||||
Option<HoodieClusteringPlan> clusteringPlan = createTable(config, hadoopConf)
|
Option<HoodieClusteringPlan> clusteringPlan = createTable(config, hadoopConf, config.isMetadataTableEnabled())
|
||||||
.scheduleClustering(context, instantTime, extraMetadata);
|
.scheduleClustering(context, instantTime, extraMetadata);
|
||||||
return clusteringPlan.isPresent() ? Option.of(instantTime) : Option.empty();
|
return clusteringPlan.isPresent() ? Option.of(instantTime) : Option.empty();
|
||||||
case COMPACT:
|
case COMPACT:
|
||||||
LOG.info("Scheduling compaction at instant time :" + instantTime);
|
LOG.info("Scheduling compaction at instant time :" + instantTime);
|
||||||
Option<HoodieCompactionPlan> compactionPlan = createTable(config, hadoopConf)
|
Option<HoodieCompactionPlan> compactionPlan = createTable(config, hadoopConf, config.isMetadataTableEnabled())
|
||||||
.scheduleCompaction(context, instantTime, extraMetadata);
|
.scheduleCompaction(context, instantTime, extraMetadata);
|
||||||
return compactionPlan.isPresent() ? Option.of(instantTime) : Option.empty();
|
return compactionPlan.isPresent() ? Option.of(instantTime) : Option.empty();
|
||||||
case CLEAN:
|
case CLEAN:
|
||||||
LOG.info("Scheduling cleaning at instant time :" + instantTime);
|
LOG.info("Scheduling cleaning at instant time :" + instantTime);
|
||||||
Option<HoodieCleanerPlan> cleanerPlan = createTable(config, hadoopConf)
|
Option<HoodieCleanerPlan> cleanerPlan = createTable(config, hadoopConf, config.isMetadataTableEnabled())
|
||||||
.scheduleCleaning(context, instantTime, extraMetadata);
|
.scheduleCleaning(context, instantTime, extraMetadata);
|
||||||
return cleanerPlan.isPresent() ? Option.of(instantTime) : Option.empty();
|
return cleanerPlan.isPresent() ? Option.of(instantTime) : Option.empty();
|
||||||
default:
|
default:
|
||||||
|
|||||||
@@ -121,7 +121,8 @@ public class HoodieFlinkWriteClient<T extends HoodieRecordPayload> extends
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> createTable(HoodieWriteConfig config, Configuration hadoopConf) {
|
protected HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> createTable(HoodieWriteConfig config, Configuration hadoopConf,
|
||||||
|
boolean refreshTimeline) {
|
||||||
return HoodieFlinkTable.create(config, (HoodieFlinkEngineContext) context);
|
return HoodieFlinkTable.create(config, (HoodieFlinkEngineContext) context);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -89,7 +89,8 @@ public class HoodieJavaWriteClient<T extends HoodieRecordPayload> extends
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> createTable(HoodieWriteConfig config,
|
protected HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> createTable(HoodieWriteConfig config,
|
||||||
Configuration hadoopConf) {
|
Configuration hadoopConf,
|
||||||
|
boolean refreshTimeline) {
|
||||||
return HoodieJavaTable.create(config, context);
|
return HoodieJavaTable.create(config, context);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -126,8 +126,9 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> createTable(HoodieWriteConfig config,
|
protected HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> createTable(HoodieWriteConfig config,
|
||||||
Configuration hadoopConf) {
|
Configuration hadoopConf,
|
||||||
return HoodieSparkTable.create(config, context);
|
boolean refreshTimeline) {
|
||||||
|
return HoodieSparkTable.create(config, context, refreshTimeline);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -319,7 +320,7 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected JavaRDD<WriteStatus> compact(String compactionInstantTime, boolean shouldComplete) {
|
protected JavaRDD<WriteStatus> compact(String compactionInstantTime, boolean shouldComplete) {
|
||||||
HoodieSparkTable<T> table = HoodieSparkTable.create(config, context);
|
HoodieSparkTable<T> table = HoodieSparkTable.create(config, context, config.isMetadataTableEnabled());
|
||||||
preWrite(compactionInstantTime, WriteOperationType.COMPACT, table.getMetaClient());
|
preWrite(compactionInstantTime, WriteOperationType.COMPACT, table.getMetaClient());
|
||||||
HoodieTimeline pendingCompactionTimeline = table.getActiveTimeline().filterPendingCompactionTimeline();
|
HoodieTimeline pendingCompactionTimeline = table.getActiveTimeline().filterPendingCompactionTimeline();
|
||||||
HoodieInstant inflightInstant = HoodieTimeline.getCompactionInflightInstant(compactionInstantTime);
|
HoodieInstant inflightInstant = HoodieTimeline.getCompactionInflightInstant(compactionInstantTime);
|
||||||
@@ -338,7 +339,7 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public HoodieWriteMetadata<JavaRDD<WriteStatus>> cluster(String clusteringInstant, boolean shouldComplete) {
|
public HoodieWriteMetadata<JavaRDD<WriteStatus>> cluster(String clusteringInstant, boolean shouldComplete) {
|
||||||
HoodieSparkTable<T> table = HoodieSparkTable.create(config, context);
|
HoodieSparkTable<T> table = HoodieSparkTable.create(config, context, config.isMetadataTableEnabled());
|
||||||
preWrite(clusteringInstant, WriteOperationType.CLUSTER, table.getMetaClient());
|
preWrite(clusteringInstant, WriteOperationType.CLUSTER, table.getMetaClient());
|
||||||
HoodieTimeline pendingClusteringTimeline = table.getActiveTimeline().filterPendingReplaceTimeline();
|
HoodieTimeline pendingClusteringTimeline = table.getActiveTimeline().filterPendingReplaceTimeline();
|
||||||
HoodieInstant inflightInstant = HoodieTimeline.getReplaceCommitInflightInstant(clusteringInstant);
|
HoodieInstant inflightInstant = HoodieTimeline.getReplaceCommitInflightInstant(clusteringInstant);
|
||||||
@@ -438,7 +439,7 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
|
|||||||
setWriteSchemaForDeletes(metaClient);
|
setWriteSchemaForDeletes(metaClient);
|
||||||
}
|
}
|
||||||
// Create a Hoodie table which encapsulated the commits and files visible
|
// Create a Hoodie table which encapsulated the commits and files visible
|
||||||
HoodieSparkTable<T> table = HoodieSparkTable.create(config, (HoodieSparkEngineContext) context, metaClient);
|
HoodieSparkTable<T> table = HoodieSparkTable.create(config, (HoodieSparkEngineContext) context, metaClient, config.isMetadataTableEnabled());
|
||||||
if (table.getMetaClient().getCommitActionType().equals(HoodieTimeline.COMMIT_ACTION)) {
|
if (table.getMetaClient().getCommitActionType().equals(HoodieTimeline.COMMIT_ACTION)) {
|
||||||
writeTimer = metrics.getCommitCtx();
|
writeTimer = metrics.getCommitCtx();
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -42,24 +42,43 @@ public abstract class HoodieSparkTable<T extends HoodieRecordPayload>
|
|||||||
}
|
}
|
||||||
|
|
||||||
public static <T extends HoodieRecordPayload> HoodieSparkTable<T> create(HoodieWriteConfig config, HoodieEngineContext context) {
|
public static <T extends HoodieRecordPayload> HoodieSparkTable<T> create(HoodieWriteConfig config, HoodieEngineContext context) {
|
||||||
|
return create(config, context, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static <T extends HoodieRecordPayload> HoodieSparkTable<T> create(HoodieWriteConfig config, HoodieEngineContext context,
|
||||||
|
boolean refreshTimeline) {
|
||||||
HoodieTableMetaClient metaClient =
|
HoodieTableMetaClient metaClient =
|
||||||
HoodieTableMetaClient.builder().setConf(context.getHadoopConf().get()).setBasePath(config.getBasePath())
|
HoodieTableMetaClient.builder().setConf(context.getHadoopConf().get()).setBasePath(config.getBasePath())
|
||||||
.setLoadActiveTimelineOnLoad(true).setConsistencyGuardConfig(config.getConsistencyGuardConfig())
|
.setLoadActiveTimelineOnLoad(true).setConsistencyGuardConfig(config.getConsistencyGuardConfig())
|
||||||
.setLayoutVersion(Option.of(new TimelineLayoutVersion(config.getTimelineLayoutVersion()))).build();
|
.setLayoutVersion(Option.of(new TimelineLayoutVersion(config.getTimelineLayoutVersion()))).build();
|
||||||
return HoodieSparkTable.create(config, (HoodieSparkEngineContext) context, metaClient);
|
return HoodieSparkTable.create(config, (HoodieSparkEngineContext) context, metaClient, refreshTimeline);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static <T extends HoodieRecordPayload> HoodieSparkTable<T> create(HoodieWriteConfig config,
|
public static <T extends HoodieRecordPayload> HoodieSparkTable<T> create(HoodieWriteConfig config,
|
||||||
HoodieSparkEngineContext context,
|
HoodieSparkEngineContext context,
|
||||||
HoodieTableMetaClient metaClient) {
|
HoodieTableMetaClient metaClient) {
|
||||||
|
return create(config, context, metaClient, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static <T extends HoodieRecordPayload> HoodieSparkTable<T> create(HoodieWriteConfig config,
|
||||||
|
HoodieSparkEngineContext context,
|
||||||
|
HoodieTableMetaClient metaClient,
|
||||||
|
boolean refreshTimeline) {
|
||||||
|
HoodieSparkTable hoodieSparkTable;
|
||||||
switch (metaClient.getTableType()) {
|
switch (metaClient.getTableType()) {
|
||||||
case COPY_ON_WRITE:
|
case COPY_ON_WRITE:
|
||||||
return new HoodieSparkCopyOnWriteTable<>(config, context, metaClient);
|
hoodieSparkTable = new HoodieSparkCopyOnWriteTable<>(config, context, metaClient);
|
||||||
|
break;
|
||||||
case MERGE_ON_READ:
|
case MERGE_ON_READ:
|
||||||
return new HoodieSparkMergeOnReadTable<>(config, context, metaClient);
|
hoodieSparkTable = new HoodieSparkMergeOnReadTable<>(config, context, metaClient);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
throw new HoodieException("Unsupported table type :" + metaClient.getTableType());
|
throw new HoodieException("Unsupported table type :" + metaClient.getTableType());
|
||||||
}
|
}
|
||||||
|
if (refreshTimeline) {
|
||||||
|
hoodieSparkTable.getHoodieView().sync();
|
||||||
|
}
|
||||||
|
return hoodieSparkTable;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|||||||
@@ -71,6 +71,7 @@ import org.apache.log4j.Logger;
|
|||||||
import org.apache.spark.api.java.JavaRDD;
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
import org.junit.jupiter.api.AfterEach;
|
import org.junit.jupiter.api.AfterEach;
|
||||||
import org.junit.jupiter.api.Assertions;
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.Disabled;
|
||||||
import org.junit.jupiter.api.Tag;
|
import org.junit.jupiter.api.Tag;
|
||||||
import org.junit.jupiter.params.ParameterizedTest;
|
import org.junit.jupiter.params.ParameterizedTest;
|
||||||
import org.junit.jupiter.params.provider.Arguments;
|
import org.junit.jupiter.params.provider.Arguments;
|
||||||
@@ -547,6 +548,7 @@ public class TestHoodieBackedMetadata extends HoodieClientTestHarness {
|
|||||||
*/
|
*/
|
||||||
@ParameterizedTest
|
@ParameterizedTest
|
||||||
@EnumSource(HoodieTableType.class)
|
@EnumSource(HoodieTableType.class)
|
||||||
|
@Disabled
|
||||||
public void testCleaningArchivingAndCompaction(HoodieTableType tableType) throws Exception {
|
public void testCleaningArchivingAndCompaction(HoodieTableType tableType) throws Exception {
|
||||||
init(tableType);
|
init(tableType);
|
||||||
doWriteOperationsAndBootstrapMetadata(testTable);
|
doWriteOperationsAndBootstrapMetadata(testTable);
|
||||||
|
|||||||
@@ -479,7 +479,7 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void sync() {
|
public void sync() {
|
||||||
// noop
|
refresh();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|||||||
Reference in New Issue
Block a user