[HUDI-3784] Improve docs and logs of HoodieMetadataTableValidator (#5216)
This commit is contained in:
@@ -43,6 +43,7 @@ import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
|
||||
import org.apache.hudi.common.util.CleanerUtils;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.common.util.ParquetUtils;
|
||||
import org.apache.hudi.common.util.StringUtils;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
import org.apache.hudi.exception.HoodieIOException;
|
||||
@@ -86,6 +87,9 @@ import java.util.stream.Collectors;
|
||||
* - `--validate-all-file-groups`: validate all file groups, and all file slices within file groups.
|
||||
* - `--validate-all-column-stats`: validate column stats for all columns in the schema
|
||||
* - `--validate-bloom-filters`: validate bloom filters of base files
|
||||
*
|
||||
* If the Hudi table is on the local file system, the base path passed to `--base-path` must have
|
||||
* "file:" prefix to avoid validation failure.
|
||||
* <p>
|
||||
* - Default : This validator will compare the results between metadata table and filesystem only once.
|
||||
* <p>
|
||||
@@ -139,8 +143,11 @@ public class HoodieMetadataTableValidator implements Serializable {
|
||||
|
||||
protected transient Option<AsyncMetadataTableValidateService> asyncMetadataTableValidateService;
|
||||
|
||||
private final String taskLabels;
|
||||
|
||||
public HoodieMetadataTableValidator(HoodieTableMetaClient metaClient) {
|
||||
this.metaClient = metaClient;
|
||||
this.taskLabels = StringUtils.EMPTY_STRING;
|
||||
}
|
||||
|
||||
public HoodieMetadataTableValidator(JavaSparkContext jsc, Config cfg) {
|
||||
@@ -157,6 +164,27 @@ public class HoodieMetadataTableValidator implements Serializable {
|
||||
.build();
|
||||
|
||||
this.asyncMetadataTableValidateService = cfg.continuous ? Option.of(new AsyncMetadataTableValidateService()) : Option.empty();
|
||||
this.taskLabels = generateValidationTaskLabels();
|
||||
}
|
||||
|
||||
private String generateValidationTaskLabels() {
|
||||
List<String> labelList = new ArrayList<>();
|
||||
if (cfg.validateLatestBaseFiles) {
|
||||
labelList.add("validate-latest-base-files");
|
||||
}
|
||||
if (cfg.validateLatestFileSlices) {
|
||||
labelList.add("validate-latest-file-slices");
|
||||
}
|
||||
if (cfg.validateAllFileGroups) {
|
||||
labelList.add("validate-all-file-groups");
|
||||
}
|
||||
if (cfg.validateAllColumnStats) {
|
||||
labelList.add("validate-all-column-stats");
|
||||
}
|
||||
if (cfg.validateBloomFilters) {
|
||||
labelList.add("validate-bloom-filters");
|
||||
}
|
||||
return String.join(",", labelList);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -397,10 +425,12 @@ public class HoodieMetadataTableValidator implements Serializable {
|
||||
List<Boolean> result = engineContext.parallelize(allPartitions, allPartitions.size()).map(partitionPath -> {
|
||||
try {
|
||||
validateFilesInPartition(metadataTableBasedContext, fsBasedContext, partitionPath, finalBaseFilesForCleaning);
|
||||
LOG.info("Metadata table validation succeeded for " + partitionPath);
|
||||
LOG.info(String.format("Metadata table validation succeeded for partition %s (partition %s)", partitionPath, taskLabels));
|
||||
return true;
|
||||
} catch (HoodieValidationException e) {
|
||||
LOG.error("Metadata table validation failed for " + partitionPath + " due to HoodieValidationException", e);
|
||||
LOG.error(
|
||||
String.format("Metadata table validation failed for partition %s due to HoodieValidationException (partition %s)",
|
||||
partitionPath, taskLabels), e);
|
||||
if (!cfg.ignoreFailed) {
|
||||
throw e;
|
||||
}
|
||||
@@ -413,9 +443,9 @@ public class HoodieMetadataTableValidator implements Serializable {
|
||||
}
|
||||
|
||||
if (finalResult) {
|
||||
LOG.info("Metadata table validation succeeded.");
|
||||
LOG.info(String.format("Metadata table validation succeeded (%s).", taskLabels));
|
||||
} else {
|
||||
LOG.warn("Metadata table validation failed.");
|
||||
LOG.warn(String.format("Metadata table validation failed (%s).", taskLabels));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user