Nicer handling of timeline archival for Cloud storage
- When append() is not supported, rollover to new file always (instead of failing) - Provide way to configure archive log folder (avoids small files inside .hoodie) - Datasets written via Spark datasource archive to .hoodie/archived - HoodieClientExample will now retain only 2,3 commits to exercise archival path during dev cycles - Few tweaks to code structure around CommitArchiveLog
This commit is contained in:
committed by
vinoth chandar
parent
0cd186c899
commit
cf7f7aabb9
@@ -91,7 +91,6 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
|
||||
private final HoodieWriteConfig config;
|
||||
private transient final HoodieMetrics metrics;
|
||||
private transient final HoodieIndex<T> index;
|
||||
private transient final HoodieCommitArchiveLog archiveLog;
|
||||
private transient Timer.Context writeContext = null;
|
||||
|
||||
/**
|
||||
@@ -116,7 +115,6 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
|
||||
this.config = clientConfig;
|
||||
this.index = HoodieIndex.createIndex(config, jsc);
|
||||
this.metrics = new HoodieMetrics(config, config.getTableName());
|
||||
this.archiveLog = new HoodieCommitArchiveLog(clientConfig, fs);
|
||||
|
||||
if (rollbackInFlight) {
|
||||
rollbackInflightCommits();
|
||||
@@ -446,6 +444,8 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
|
||||
}
|
||||
|
||||
// We cannot have unbounded commit files. Archive commits if we have to archive
|
||||
HoodieCommitArchiveLog archiveLog = new HoodieCommitArchiveLog(config,
|
||||
new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true));
|
||||
archiveLog.archiveIfRequired();
|
||||
if (config.isAutoClean()) {
|
||||
// Call clean to cleanup if there is anything to cleanup after the commit,
|
||||
|
||||
Reference in New Issue
Block a user