1
0

[HUDI-839] Introducing support for rollbacks using marker files (#1756)

* [HUDI-839] Introducing rollback strategy using marker files

 - Adds a new mechanism for rollbacks where it's based on the marker files generated during the write
 - Consequently, marker file/dir deletion now happens post commit, instead of during finalize 
 - Marker files are also generated for AppendHandle, making it consistent throughout the write path 
 - Until upgrade-downgrade mechanism can upgrade non-marker based inflight writes to marker based, this should only be turned on for new datasets.
 - Added marker dir deletion after successful commit/rollback, individual files are not deleted during finalize
 - Fail safe for deleting marker directories, now during timeline archival process
 - Added check to ensure completed instants are not rolled back using marker based strategy. This will be incorrect
 - Reworked tests to rollback inflight instants, instead of completed instants whenever necessary
 - Added an unit test for MarkerBasedRollbackStrategy


Co-authored-by: Vinoth Chandar <vinoth@apache.org>
This commit is contained in:
lw0090
2020-07-21 13:41:42 +08:00
committed by GitHub
parent b71f25f210
commit 1ec89e9a94
43 changed files with 1947 additions and 512 deletions

View File

@@ -91,8 +91,8 @@ public class TestArchivedCommitsCommand extends AbstractShellIntegrationTest {
metaClient.getActiveTimeline().reload().getAllCommitsTimeline().filterCompletedInstants();
// archive
HoodieTimelineArchiveLog archiveLog = new HoodieTimelineArchiveLog(cfg, metaClient);
archiveLog.archiveIfRequired(hadoopConf);
HoodieTimelineArchiveLog archiveLog = new HoodieTimelineArchiveLog(cfg, hadoopConf);
archiveLog.archiveIfRequired();
}
@AfterEach

View File

@@ -175,8 +175,8 @@ public class TestCommitsCommand extends AbstractShellIntegrationTest {
// archive
metaClient = HoodieTableMetaClient.reload(HoodieCLI.getTableMetaClient());
HoodieTimelineArchiveLog archiveLog = new HoodieTimelineArchiveLog(cfg, metaClient);
archiveLog.archiveIfRequired(jsc.hadoopConfiguration());
HoodieTimelineArchiveLog archiveLog = new HoodieTimelineArchiveLog(cfg, jsc.hadoopConfiguration());
archiveLog.archiveIfRequired();
CommandResult cr = getShell().executeCommand(String.format("commits showarchived --startTs %s --endTs %s", "100", "104"));
assertTrue(cr.isSuccess());