From c19f505b5ad8c292d259b68c57f6d768a2a1dbf4 Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Sat, 2 Apr 2022 13:16:17 -0700 Subject: [PATCH] [HUDI-3784] Improve docs and logs of HoodieMetadataTableValidator (#5216) --- .../HoodieMetadataTableValidator.java | 38 +++++++++++++++++-- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java index 991edc530..5a11570ce 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java @@ -43,6 +43,7 @@ import org.apache.hudi.common.table.view.HoodieTableFileSystemView; import org.apache.hudi.common.util.CleanerUtils; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.ParquetUtils; +import org.apache.hudi.common.util.StringUtils; import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; @@ -86,6 +87,9 @@ import java.util.stream.Collectors; * - `--validate-all-file-groups`: validate all file groups, and all file slices within file groups. * - `--validate-all-column-stats`: validate column stats for all columns in the schema * - `--validate-bloom-filters`: validate bloom filters of base files + * + * If the Hudi table is on the local file system, the base path passed to `--base-path` must have + * "file:" prefix to avoid validation failure. *

* - Default : This validator will compare the results between metadata table and filesystem only once. *

@@ -139,8 +143,11 @@ public class HoodieMetadataTableValidator implements Serializable { protected transient Option asyncMetadataTableValidateService; + private final String taskLabels; + public HoodieMetadataTableValidator(HoodieTableMetaClient metaClient) { this.metaClient = metaClient; + this.taskLabels = StringUtils.EMPTY_STRING; } public HoodieMetadataTableValidator(JavaSparkContext jsc, Config cfg) { @@ -157,6 +164,27 @@ public class HoodieMetadataTableValidator implements Serializable { .build(); this.asyncMetadataTableValidateService = cfg.continuous ? Option.of(new AsyncMetadataTableValidateService()) : Option.empty(); + this.taskLabels = generateValidationTaskLabels(); + } + + private String generateValidationTaskLabels() { + List labelList = new ArrayList<>(); + if (cfg.validateLatestBaseFiles) { + labelList.add("validate-latest-base-files"); + } + if (cfg.validateLatestFileSlices) { + labelList.add("validate-latest-file-slices"); + } + if (cfg.validateAllFileGroups) { + labelList.add("validate-all-file-groups"); + } + if (cfg.validateAllColumnStats) { + labelList.add("validate-all-column-stats"); + } + if (cfg.validateBloomFilters) { + labelList.add("validate-bloom-filters"); + } + return String.join(",", labelList); } /** @@ -397,10 +425,12 @@ public class HoodieMetadataTableValidator implements Serializable { List result = engineContext.parallelize(allPartitions, allPartitions.size()).map(partitionPath -> { try { validateFilesInPartition(metadataTableBasedContext, fsBasedContext, partitionPath, finalBaseFilesForCleaning); - LOG.info("Metadata table validation succeeded for " + partitionPath); + LOG.info(String.format("Metadata table validation succeeded for partition %s (partition %s)", partitionPath, taskLabels)); return true; } catch (HoodieValidationException e) { - LOG.error("Metadata table validation failed for " + partitionPath + " due to HoodieValidationException", e); + LOG.error( + String.format("Metadata table validation failed for partition %s due to HoodieValidationException (partition %s)", + partitionPath, taskLabels), e); if (!cfg.ignoreFailed) { throw e; } @@ -413,9 +443,9 @@ public class HoodieMetadataTableValidator implements Serializable { } if (finalResult) { - LOG.info("Metadata table validation succeeded."); + LOG.info(String.format("Metadata table validation succeeded (%s).", taskLabels)); } else { - LOG.warn("Metadata table validation failed."); + LOG.warn(String.format("Metadata table validation failed (%s).", taskLabels)); } }