1
0

[HUDI-2119] Ensure the rolled-back instance was previously synced to the Metadata Table when syncing a Rollback Instant. (#3210)

* [HUDI-2119] Ensure the rolled-back instance was previously synced to the Metadata Table when syncing a Rollback Instant.

If the rolled-back instant was synced to the Metadata Table, a corresponding deltacommit with the same timestamp should have been created on the Metadata Table timeline. To ensure we can always perfomr this check, the Metadata Table instants should not be archived until their corresponding instants are present in the dataset timeline. But ensuring this requires a large number of instants to be kept on the metadata table.

In this change, the metadata table will keep atleast the number of instants that the main dataset is keeping. If the instant being rolled back was before the metadata table timeline, the code will throw an exception and the metadata table will have to be re-bootstrapped. This should be a very rare occurance and should occur only when the dataset is being repaired by rolling back multiple commits or restoring to an much older time.

* Fixed checkstyle

* Improvements from review comments.

Fixed  checkstyle
Replaced explicit null check with Option.ofNullable
Removed redundant function getSynedInstantTime

* Renamed getSyncedInstantTime and getSyncedInstantTimeForReader.

Sync is confusing so renamed to getUpdateTime() and getReaderTime().

* Removed getReaderTime which is only for testing as the same method can be accessed during testing differently without making it part of the public interface.

* Fix compilation error

* Reverting changes to HoodieMetadataFileSystemView

Co-authored-by: Vinoth Chandar <vinoth@apache.org>
This commit is contained in:
Prashant Wason
2021-08-13 21:23:34 -07:00
committed by GitHub
parent 642b1b671d
commit 8eed440694
13 changed files with 295 additions and 129 deletions

View File

@@ -137,6 +137,9 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
private HoodieWriteConfig createMetadataWriteConfig(HoodieWriteConfig writeConfig) {
int parallelism = writeConfig.getMetadataInsertParallelism();
int minCommitsToKeep = Math.max(writeConfig.getMetadataMinCommitsToKeep(), writeConfig.getMinCommitsToKeep());
int maxCommitsToKeep = Math.max(writeConfig.getMetadataMaxCommitsToKeep(), writeConfig.getMaxCommitsToKeep());
// Create the write config for the metadata table by borrowing options from the main write config.
HoodieWriteConfig.Builder builder = HoodieWriteConfig.newBuilder()
.withTimelineLayoutVersion(TimelineLayoutVersion.CURR_VERSION)
@@ -162,7 +165,7 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
.withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS)
.withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.EAGER)
.retainCommits(writeConfig.getMetadataCleanerCommitsRetained())
.archiveCommitsWith(writeConfig.getMetadataMinCommitsToKeep(), writeConfig.getMetadataMaxCommitsToKeep())
.archiveCommitsWith(minCommitsToKeep, maxCommitsToKeep)
// we will trigger compaction manually, to control the instant times
.withInlineCompaction(false)
.withMaxNumDeltaCommitsBeforeCompaction(writeConfig.getMetadataCompactDeltaCommitMax()).build())
@@ -416,7 +419,8 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
for (HoodieInstant instant : instantsToSync) {
LOG.info("Syncing instant " + instant + " to metadata table");
Option<List<HoodieRecord>> records = HoodieTableMetadataUtil.convertInstantToMetaRecords(datasetMetaClient, instant, getLatestSyncedInstantTime());
Option<List<HoodieRecord>> records = HoodieTableMetadataUtil.convertInstantToMetaRecords(datasetMetaClient,
metaClient.getActiveTimeline(), instant, metadata.getUpdateTime());
if (records.isPresent()) {
commit(records.get(), MetadataPartitionType.FILES.partitionPath(), instant.getTimestamp());
}
@@ -478,7 +482,8 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
@Override
public void update(HoodieRestoreMetadata restoreMetadata, String instantTime) {
if (enabled) {
List<HoodieRecord> records = HoodieTableMetadataUtil.convertMetadataToRecords(restoreMetadata, instantTime, metadata.getSyncedInstantTime());
List<HoodieRecord> records = HoodieTableMetadataUtil.convertMetadataToRecords(metaClient.getActiveTimeline(),
restoreMetadata, instantTime, metadata.getUpdateTime());
commit(records, MetadataPartitionType.FILES.partitionPath(), instantTime);
}
}
@@ -492,7 +497,8 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
@Override
public void update(HoodieRollbackMetadata rollbackMetadata, String instantTime) {
if (enabled) {
List<HoodieRecord> records = HoodieTableMetadataUtil.convertMetadataToRecords(rollbackMetadata, instantTime, metadata.getSyncedInstantTime());
List<HoodieRecord> records = HoodieTableMetadataUtil.convertMetadataToRecords(metaClient.getActiveTimeline(),
rollbackMetadata, instantTime, metadata.getUpdateTime());
commit(records, MetadataPartitionType.FILES.partitionPath(), instantTime);
}
}
@@ -504,6 +510,10 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
}
}
public HoodieBackedTableMetadata getMetadataReader() {
return metadata;
}
/**
* Commit the {@code HoodieRecord}s to Metadata Table as a new delta-commit.
*

View File

@@ -23,7 +23,6 @@ import org.apache.hudi.avro.model.HoodieCleanerPlan;
import org.apache.hudi.avro.model.HoodieRestoreMetadata;
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.util.Option;
import java.io.Serializable;
@@ -41,9 +40,4 @@ public interface HoodieTableMetadataWriter extends Serializable, AutoCloseable {
void update(HoodieRestoreMetadata restoreMetadata, String instantTime);
void update(HoodieRollbackMetadata rollbackMetadata, String instantTime);
/**
* Return the timestamp of the latest instant synced to the metadata table.
*/
Option<String> getLatestSyncedInstantTime();
}

View File

@@ -205,7 +205,7 @@ public class HoodieTimelineArchiveLog<T extends HoodieAvroPayload, I, K, O> {
if (config.isMetadataTableEnabled()) {
try (HoodieTableMetadata tableMetadata = HoodieTableMetadata.create(table.getContext(), config.getMetadataConfig(),
config.getBasePath(), FileSystemViewStorageConfig.FILESYSTEM_VIEW_SPILLABLE_DIR.defaultValue())) {
Option<String> lastSyncedInstantTime = tableMetadata.getSyncedInstantTime();
Option<String> lastSyncedInstantTime = tableMetadata.getUpdateTime();
if (lastSyncedInstantTime.isPresent()) {
LOG.info("Limiting archiving of instants to last synced instant on metadata table at " + lastSyncedInstantTime.get());