[HUDI-839] Introducing support for rollbacks using marker files (#1756)
* [HUDI-839] Introducing rollback strategy using marker files - Adds a new mechanism for rollbacks where it's based on the marker files generated during the write - Consequently, marker file/dir deletion now happens post commit, instead of during finalize - Marker files are also generated for AppendHandle, making it consistent throughout the write path - Until upgrade-downgrade mechanism can upgrade non-marker based inflight writes to marker based, this should only be turned on for new datasets. - Added marker dir deletion after successful commit/rollback, individual files are not deleted during finalize - Fail safe for deleting marker directories, now during timeline archival process - Added check to ensure completed instants are not rolled back using marker based strategy. This will be incorrect - Reworked tests to rollback inflight instants, instead of completed instants whenever necessary - Added an unit test for MarkerBasedRollbackStrategy Co-authored-by: Vinoth Chandar <vinoth@apache.org>
This commit is contained in:
@@ -18,6 +18,8 @@
|
||||
|
||||
package org.apache.hudi.utilities.functional;
|
||||
|
||||
import org.apache.hadoop.fs.LocatedFileStatus;
|
||||
import org.apache.hadoop.fs.RemoteIterator;
|
||||
import org.apache.hudi.client.HoodieWriteClient;
|
||||
import org.apache.hudi.common.model.HoodieAvroPayload;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
@@ -35,9 +37,7 @@ import org.apache.hudi.utilities.HoodieSnapshotExporter.Partitioner;
|
||||
import org.apache.hudi.utilities.exception.HoodieSnapshotExporterException;
|
||||
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.LocatedFileStatus;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.RemoteIterator;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
@@ -92,7 +92,6 @@ public class TestHoodieSnapshotExporter extends FunctionalTestHarness {
|
||||
JavaRDD<HoodieRecord> recordsRDD = jsc().parallelize(records, 1);
|
||||
hdfsWriteClient.bulkInsert(recordsRDD, COMMIT_TIME);
|
||||
hdfsWriteClient.close();
|
||||
|
||||
RemoteIterator<LocatedFileStatus> itr = dfs().listFiles(new Path(sourcePath), true);
|
||||
while (itr.hasNext()) {
|
||||
LOG.info(">>> Prepared test file: " + itr.next().getPath());
|
||||
|
||||
Reference in New Issue
Block a user