1
0

[HUDI-1399] support a independent clustering spark job to asynchronously clustering (#2379)

* [HUDI-1481]  add  structured streaming and delta streamer clustering unit test

* [HUDI-1399] support a independent clustering spark job to asynchronously clustering

* [HUDI-1399]  support a  independent clustering spark job to asynchronously clustering

* [HUDI-1498] Read clustering plan from requested file for inflight instant (#2389)

* [HUDI-1399]  support  a independent clustering spark job with schedule generate instant time

Co-authored-by: satishkotha <satishkotha@uber.com>
This commit is contained in:
lw0090
2021-01-10 09:30:16 +08:00
committed by GitHub
parent 65866c45ec
commit 368c1a8f5c
10 changed files with 338 additions and 38 deletions

View File

@@ -731,12 +731,30 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload, I
table.getActiveTimeline().revertCompactionInflightToRequested(inflightInstant);
}
/**
* Get inflight time line exclude compaction and clustering.
* @param table
* @return
*/
private HoodieTimeline getInflightTimelineExcludeCompactionAndClustering(HoodieTable<T, I, K, O> table) {
HoodieTimeline inflightTimelineWithReplaceCommit = table.getMetaClient().getCommitsTimeline().filterPendingExcludingCompaction();
HoodieTimeline inflightTimelineExcludeClusteringCommit = inflightTimelineWithReplaceCommit.filter(instant -> {
if (instant.getAction().equals(HoodieTimeline.REPLACE_COMMIT_ACTION)) {
Option<Pair<HoodieInstant, HoodieClusteringPlan>> instantPlan = ClusteringUtils.getClusteringPlan(table.getMetaClient(), instant);
return !instantPlan.isPresent();
} else {
return true;
}
});
return inflightTimelineExcludeClusteringCommit;
}
/**
* Cleanup all pending commits.
*/
private void rollbackPendingCommits() {
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf);
HoodieTimeline inflightTimeline = table.getMetaClient().getCommitsTimeline().filterPendingExcludingCompaction();
HoodieTimeline inflightTimeline = getInflightTimelineExcludeCompactionAndClustering(table);
List<String> commits = inflightTimeline.getReverseOrderedInstants().map(HoodieInstant::getTimestamp)
.collect(Collectors.toList());
for (String commit : commits) {