From eb219010d2491165dbdc00a6b9547a9072e012ac Mon Sep 17 00:00:00 2001 From: Danny Chan Date: Tue, 24 May 2022 17:33:30 +0800 Subject: [PATCH] [HUDI-4145] Archives the metadata file in HoodieInstant.State sequence (#5669) --- .../apache/hudi/client/HoodieTimelineArchiver.java | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/HoodieTimelineArchiver.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/HoodieTimelineArchiver.java index 2974cc2ef..f111bb70e 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/HoodieTimelineArchiver.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/HoodieTimelineArchiver.java @@ -72,7 +72,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; -import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.List; @@ -507,10 +506,16 @@ public class HoodieTimelineArchiver { List instantsToStream = groupByTsAction.get(Pair.of(hoodieInstant.getTimestamp(), HoodieInstant.getComparableAction(hoodieInstant.getAction()))); if (instantsToStream != null) { - return instantsToStream.stream(); + // sorts the instants in natural order to make sure the metadata files be removed + // in HoodieInstant.State sequence: requested -> inflight -> completed, + // this is important because when a COMPLETED metadata file is removed first, + // other monitors on the timeline(such as the compaction or clustering services) would + // mistakenly recognize the pending file as a pending operation, + // then all kinds of weird bugs occur. + return instantsToStream.stream().sorted(); } else { // if a concurrent writer archived the instant - return Collections.EMPTY_LIST.stream(); + return Stream.empty(); } }); }