1
0

[HUDI-2796] Metadata table support for Restore action to first commit (#4039)

- Adding support for the metadata table to restore to first commit and
   take proper action for the bootstrap on subequent commits.
This commit is contained in:
Manoj Govindassamy
2021-11-19 17:02:57 -08:00
committed by GitHub
parent c8617d9390
commit 0230d40b74
3 changed files with 59 additions and 18 deletions

View File

@@ -353,21 +353,11 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
return false;
}
boolean isRollbackAction = false;
List<String> rollbackedTimestamps = Collections.emptyList();
if (actionMetadata.isPresent() && actionMetadata.get() instanceof HoodieRollbackMetadata) {
isRollbackAction = true;
List<HoodieInstantInfo> rollbackedInstants =
((HoodieRollbackMetadata) actionMetadata.get()).getInstantsRollback();
rollbackedTimestamps = rollbackedInstants.stream().map(instant -> {
return instant.getCommitTime().toString();
}).collect(Collectors.toList());
}
// Detect the commit gaps if any from the data and the metadata active timeline
if (dataMetaClient.getActiveTimeline().getAllCommitsTimeline().isBeforeTimelineStarts(
latestMetadataInstant.get().getTimestamp())
&& (!isRollbackAction || !rollbackedTimestamps.contains(latestMetadataInstantTimestamp))) {
LOG.warn("Metadata Table will need to be re-bootstrapped as un-synced instants have been archived."
&& !isCommitRevertedByInFlightAction(actionMetadata, latestMetadataInstantTimestamp)) {
LOG.error("Metadata Table will need to be re-bootstrapped as un-synced instants have been archived."
+ " latestMetadataInstant=" + latestMetadataInstant.get().getTimestamp()
+ ", latestDataInstant=" + dataMetaClient.getActiveTimeline().firstInstant().get().getTimestamp());
return true;
@@ -376,10 +366,59 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
return false;
}
/**
* Is the latest commit instant reverted by the in-flight instant action?
*
* @param actionMetadata - In-flight instant action metadata
* @param latestMetadataInstantTimestamp - Metadata table latest instant timestamp
* @param <T> - ActionMetadata type
* @return True if the latest instant action is reverted by the action
*/
private <T extends SpecificRecordBase> boolean isCommitRevertedByInFlightAction(Option<T> actionMetadata,
final String latestMetadataInstantTimestamp) {
if (!actionMetadata.isPresent()) {
return false;
}
final String INSTANT_ACTION = (actionMetadata.get() instanceof HoodieRollbackMetadata
? HoodieTimeline.ROLLBACK_ACTION
: (actionMetadata.get() instanceof HoodieRestoreMetadata ? HoodieTimeline.RESTORE_ACTION : ""));
List<String> affectedInstantTimestamps;
switch (INSTANT_ACTION) {
case HoodieTimeline.ROLLBACK_ACTION:
List<HoodieInstantInfo> rollbackedInstants =
((HoodieRollbackMetadata) actionMetadata.get()).getInstantsRollback();
affectedInstantTimestamps = rollbackedInstants.stream().map(instant -> {
return instant.getCommitTime().toString();
}).collect(Collectors.toList());
if (affectedInstantTimestamps.contains(latestMetadataInstantTimestamp)) {
return true;
}
break;
case HoodieTimeline.RESTORE_ACTION:
List<HoodieInstantInfo> restoredInstants =
((HoodieRestoreMetadata) actionMetadata.get()).getRestoreInstantInfo();
affectedInstantTimestamps = restoredInstants.stream().map(instant -> {
return instant.getCommitTime().toString();
}).collect(Collectors.toList());
if (affectedInstantTimestamps.contains(latestMetadataInstantTimestamp)) {
return true;
}
break;
default:
return false;
}
return false;
}
/**
* Initialize the Metadata Table by listing files and partitions from the file system.
*
* @param dataMetaClient {@code HoodieTableMetaClient} for the dataset.
* @param dataMetaClient {@code HoodieTableMetaClient} for the dataset.
* @param inflightInstantTimestamp
*/
private boolean bootstrapFromFilesystem(HoodieEngineContext engineContext, HoodieTableMetaClient dataMetaClient,

View File

@@ -81,6 +81,6 @@ public abstract class BaseActionExecutor<T extends HoodieRecordPayload, I, K, O,
* @param metadata restore metadata of interest.
*/
protected final void writeTableMetadata(HoodieRestoreMetadata metadata) {
table.getMetadataWriter().ifPresent(w -> w.update(metadata, instantTime));
table.getMetadataWriter(Option.of(metadata)).ifPresent(w -> w.update(metadata, instantTime));
}
}

View File

@@ -139,7 +139,8 @@ public class TestHoodieSparkMergeOnReadTableRollback extends SparkClientFunction
@ParameterizedTest
@ValueSource(booleans = {true, false})
void testRollbackWithDeltaAndCompactionCommit(boolean rollbackUsingMarkers) throws Exception {
HoodieWriteConfig.Builder cfgBuilder = getConfigBuilder(false, rollbackUsingMarkers, HoodieIndex.IndexType.SIMPLE);
HoodieWriteConfig.Builder cfgBuilder = getConfigBuilder(false, rollbackUsingMarkers, HoodieIndex.IndexType.SIMPLE)
.withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(true).build());
addConfigsForPopulateMetaFields(cfgBuilder, true);
HoodieWriteConfig cfg = cfgBuilder.build();
@@ -294,7 +295,8 @@ public class TestHoodieSparkMergeOnReadTableRollback extends SparkClientFunction
@Test
void testMultiRollbackWithDeltaAndCompactionCommit() throws Exception {
boolean populateMetaFields = true;
HoodieWriteConfig.Builder cfgBuilder = getConfigBuilder(false).withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(false).build());
HoodieWriteConfig.Builder cfgBuilder = getConfigBuilder(false)
.withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(true).build());
addConfigsForPopulateMetaFields(cfgBuilder, populateMetaFields);
HoodieWriteConfig cfg = cfgBuilder.build();
@@ -344,7 +346,7 @@ public class TestHoodieSparkMergeOnReadTableRollback extends SparkClientFunction
newCommitTime = "002";
// WriteClient with custom config (disable small file handling)
HoodieWriteConfig smallFileWriteConfig = getHoodieWriteConfigWithSmallFileHandlingOffBuilder(populateMetaFields)
.withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(false).build()).build();
.withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(true).build()).build();
try (SparkRDDWriteClient nClient = getHoodieWriteClient(smallFileWriteConfig)) {
nClient.startCommitWithTime(newCommitTime);