diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java
index 991edc530..5a11570ce 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java
@@ -43,6 +43,7 @@ import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
import org.apache.hudi.common.util.CleanerUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.ParquetUtils;
+import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
@@ -86,6 +87,9 @@ import java.util.stream.Collectors;
* - `--validate-all-file-groups`: validate all file groups, and all file slices within file groups.
* - `--validate-all-column-stats`: validate column stats for all columns in the schema
* - `--validate-bloom-filters`: validate bloom filters of base files
+ *
+ * If the Hudi table is on the local file system, the base path passed to `--base-path` must have
+ * "file:" prefix to avoid validation failure.
*
* - Default : This validator will compare the results between metadata table and filesystem only once.
*
@@ -139,8 +143,11 @@ public class HoodieMetadataTableValidator implements Serializable {
protected transient Option asyncMetadataTableValidateService;
+ private final String taskLabels;
+
public HoodieMetadataTableValidator(HoodieTableMetaClient metaClient) {
this.metaClient = metaClient;
+ this.taskLabels = StringUtils.EMPTY_STRING;
}
public HoodieMetadataTableValidator(JavaSparkContext jsc, Config cfg) {
@@ -157,6 +164,27 @@ public class HoodieMetadataTableValidator implements Serializable {
.build();
this.asyncMetadataTableValidateService = cfg.continuous ? Option.of(new AsyncMetadataTableValidateService()) : Option.empty();
+ this.taskLabels = generateValidationTaskLabels();
+ }
+
+ private String generateValidationTaskLabels() {
+ List labelList = new ArrayList<>();
+ if (cfg.validateLatestBaseFiles) {
+ labelList.add("validate-latest-base-files");
+ }
+ if (cfg.validateLatestFileSlices) {
+ labelList.add("validate-latest-file-slices");
+ }
+ if (cfg.validateAllFileGroups) {
+ labelList.add("validate-all-file-groups");
+ }
+ if (cfg.validateAllColumnStats) {
+ labelList.add("validate-all-column-stats");
+ }
+ if (cfg.validateBloomFilters) {
+ labelList.add("validate-bloom-filters");
+ }
+ return String.join(",", labelList);
}
/**
@@ -397,10 +425,12 @@ public class HoodieMetadataTableValidator implements Serializable {
List result = engineContext.parallelize(allPartitions, allPartitions.size()).map(partitionPath -> {
try {
validateFilesInPartition(metadataTableBasedContext, fsBasedContext, partitionPath, finalBaseFilesForCleaning);
- LOG.info("Metadata table validation succeeded for " + partitionPath);
+ LOG.info(String.format("Metadata table validation succeeded for partition %s (partition %s)", partitionPath, taskLabels));
return true;
} catch (HoodieValidationException e) {
- LOG.error("Metadata table validation failed for " + partitionPath + " due to HoodieValidationException", e);
+ LOG.error(
+ String.format("Metadata table validation failed for partition %s due to HoodieValidationException (partition %s)",
+ partitionPath, taskLabels), e);
if (!cfg.ignoreFailed) {
throw e;
}
@@ -413,9 +443,9 @@ public class HoodieMetadataTableValidator implements Serializable {
}
if (finalResult) {
- LOG.info("Metadata table validation succeeded.");
+ LOG.info(String.format("Metadata table validation succeeded (%s).", taskLabels));
} else {
- LOG.warn("Metadata table validation failed.");
+ LOG.warn(String.format("Metadata table validation failed (%s).", taskLabels));
}
}