From f1286c2c764d6be9f23b41c76f4de1c8734c1f3b Mon Sep 17 00:00:00 2001 From: Danny Chan Date: Wed, 22 Dec 2021 11:10:27 +0800 Subject: [PATCH] [HUDI-3032] Do not clean the log files right after compaction for metadata table (#4336) --- .../metadata/HoodieBackedTableMetadataWriter.java | 15 ++++++++++++++- .../FlinkHoodieBackedTableMetadataWriter.java | 2 +- .../SparkHoodieBackedTableMetadataWriter.java | 2 +- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java index 83ad16d8e..f8a6154b6 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java @@ -706,7 +706,20 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta } } - protected void doClean(AbstractHoodieWriteClient writeClient, String instantTime) { + protected void cleanIfNecessary(AbstractHoodieWriteClient writeClient, String instantTime) { + Option lastCompletedCompactionInstant = metadataMetaClient.reloadActiveTimeline() + .getCommitTimeline().filterCompletedInstants().lastInstant(); + if (lastCompletedCompactionInstant.isPresent() + && metadataMetaClient.getActiveTimeline().filterCompletedInstants() + .findInstantsAfter(lastCompletedCompactionInstant.get().getTimestamp()).countInstants() < 3) { + // do not clean the log files immediately after compaction to give some buffer time for metadata table reader, + // because there is case that the reader has prepared for the log file readers already before the compaction completes + // while before/during the reading of the log files, the cleaning triggers and delete the reading files, + // then a FileNotFoundException(for LogFormatReader) or NPE(for HFileReader) would throw. + + // 3 is a value that I think is enough for metadata table reader. + return; + } // Trigger cleaning with suffixes based on the same instant time. This ensures that any future // delta commits synced over will not have an instant time lesser than the last completed instant on the // metadata table. diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/metadata/FlinkHoodieBackedTableMetadataWriter.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/metadata/FlinkHoodieBackedTableMetadataWriter.java index 204a8fc31..af9fee068 100644 --- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/metadata/FlinkHoodieBackedTableMetadataWriter.java +++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/metadata/FlinkHoodieBackedTableMetadataWriter.java @@ -139,7 +139,7 @@ public class FlinkHoodieBackedTableMetadataWriter extends HoodieBackedTableMetad metadataMetaClient.reloadActiveTimeline(); if (canTriggerTableService) { compactIfNecessary(writeClient, instantTime); - doClean(writeClient, instantTime); + cleanIfNecessary(writeClient, instantTime); writeClient.archive(); } } diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/metadata/SparkHoodieBackedTableMetadataWriter.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/metadata/SparkHoodieBackedTableMetadataWriter.java index 5aa6917d6..5329751b2 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/metadata/SparkHoodieBackedTableMetadataWriter.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/metadata/SparkHoodieBackedTableMetadataWriter.java @@ -154,7 +154,7 @@ public class SparkHoodieBackedTableMetadataWriter extends HoodieBackedTableMetad metadataMetaClient.reloadActiveTimeline(); if (canTriggerTableService) { compactIfNecessary(writeClient, instantTime); - doClean(writeClient, instantTime); + cleanIfNecessary(writeClient, instantTime); writeClient.archive(); } }