[HUDI-4145] Archives the metadata file in HoodieInstant.State sequence (part2) (#5676)
This commit is contained in:
@@ -506,13 +506,7 @@ public class HoodieTimelineArchiver<T extends HoodieAvroPayload, I, K, O> {
|
|||||||
List<HoodieInstant> instantsToStream = groupByTsAction.get(Pair.of(hoodieInstant.getTimestamp(),
|
List<HoodieInstant> instantsToStream = groupByTsAction.get(Pair.of(hoodieInstant.getTimestamp(),
|
||||||
HoodieInstant.getComparableAction(hoodieInstant.getAction())));
|
HoodieInstant.getComparableAction(hoodieInstant.getAction())));
|
||||||
if (instantsToStream != null) {
|
if (instantsToStream != null) {
|
||||||
// sorts the instants in natural order to make sure the metadata files be removed
|
return instantsToStream.stream();
|
||||||
// in HoodieInstant.State sequence: requested -> inflight -> completed,
|
|
||||||
// this is important because when a COMPLETED metadata file is removed first,
|
|
||||||
// other monitors on the timeline(such as the compaction or clustering services) would
|
|
||||||
// mistakenly recognize the pending file as a pending operation,
|
|
||||||
// then all kinds of weird bugs occur.
|
|
||||||
return instantsToStream.stream().sorted();
|
|
||||||
} else {
|
} else {
|
||||||
// if a concurrent writer archived the instant
|
// if a concurrent writer archived the instant
|
||||||
return Stream.empty();
|
return Stream.empty();
|
||||||
@@ -522,18 +516,28 @@ public class HoodieTimelineArchiver<T extends HoodieAvroPayload, I, K, O> {
|
|||||||
|
|
||||||
private boolean deleteArchivedInstants(List<HoodieInstant> archivedInstants, HoodieEngineContext context) throws IOException {
|
private boolean deleteArchivedInstants(List<HoodieInstant> archivedInstants, HoodieEngineContext context) throws IOException {
|
||||||
LOG.info("Deleting instants " + archivedInstants);
|
LOG.info("Deleting instants " + archivedInstants);
|
||||||
boolean success = true;
|
|
||||||
List<String> instantFiles = archivedInstants.stream().map(archivedInstant ->
|
List<String> pendingInstantFiles = new ArrayList<>();
|
||||||
new Path(metaClient.getMetaPath(), archivedInstant.getFileName())
|
List<String> completedInstantFiles = new ArrayList<>();
|
||||||
).map(Path::toString).collect(Collectors.toList());
|
|
||||||
|
for (HoodieInstant instant : archivedInstants) {
|
||||||
|
String filePath = new Path(metaClient.getMetaPath(), instant.getFileName()).toString();
|
||||||
|
if (instant.isCompleted()) {
|
||||||
|
completedInstantFiles.add(filePath);
|
||||||
|
} else {
|
||||||
|
pendingInstantFiles.add(filePath);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
context.setJobStatus(this.getClass().getSimpleName(), "Delete archived instants: " + config.getTableName());
|
context.setJobStatus(this.getClass().getSimpleName(), "Delete archived instants: " + config.getTableName());
|
||||||
Map<String, Boolean> resultDeleteInstantFiles = deleteFilesParallelize(metaClient, instantFiles, context, false);
|
// Delete the metadata files
|
||||||
|
// in HoodieInstant.State sequence: requested -> inflight -> completed,
|
||||||
for (Map.Entry<String, Boolean> result : resultDeleteInstantFiles.entrySet()) {
|
// this is important because when a COMPLETED metadata file is removed first,
|
||||||
LOG.info("Archived and deleted instant file " + result.getKey() + " : " + result.getValue());
|
// other monitors on the timeline(such as the compaction or clustering services) would
|
||||||
success &= result.getValue();
|
// mistakenly recognize the pending file as a pending operation,
|
||||||
}
|
// then all kinds of weird bugs occur.
|
||||||
|
boolean success = deleteArchivedInstantFiles(context, true, pendingInstantFiles);
|
||||||
|
success &= deleteArchivedInstantFiles(context, success, completedInstantFiles);
|
||||||
|
|
||||||
// Remove older meta-data from auxiliary path too
|
// Remove older meta-data from auxiliary path too
|
||||||
Option<HoodieInstant> latestCommitted = Option.fromJavaOptional(archivedInstants.stream().filter(i -> i.isCompleted() && (i.getAction().equals(HoodieTimeline.COMMIT_ACTION)
|
Option<HoodieInstant> latestCommitted = Option.fromJavaOptional(archivedInstants.stream().filter(i -> i.isCompleted() && (i.getAction().equals(HoodieTimeline.COMMIT_ACTION)
|
||||||
@@ -545,6 +549,16 @@ public class HoodieTimelineArchiver<T extends HoodieAvroPayload, I, K, O> {
|
|||||||
return success;
|
return success;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean deleteArchivedInstantFiles(HoodieEngineContext context, boolean success, List<String> files) {
|
||||||
|
Map<String, Boolean> resultDeleteInstantFiles = deleteFilesParallelize(metaClient, files, context, false);
|
||||||
|
|
||||||
|
for (Map.Entry<String, Boolean> result : resultDeleteInstantFiles.entrySet()) {
|
||||||
|
LOG.info("Archived and deleted instant file " + result.getKey() + " : " + result.getValue());
|
||||||
|
success &= result.getValue();
|
||||||
|
}
|
||||||
|
return success;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Remove older instants from auxiliary meta folder.
|
* Remove older instants from auxiliary meta folder.
|
||||||
*
|
*
|
||||||
|
|||||||
Reference in New Issue
Block a user