diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java index bed4c812c..16866558c 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java @@ -357,6 +357,9 @@ public class HoodieMetadataTableValidator implements Serializable { String basePath = metaClient.getBasePath(); Set baseFilesForCleaning = Collections.emptySet(); + // check metadata table is available to read. + checkMetadataTableIsAvailable(); + if (cfg.skipDataFilesForCleaning) { HoodieTimeline inflightCleaningTimeline = metaClient.getActiveTimeline().getCleanerTimeline().filterInflights(); @@ -415,6 +418,25 @@ public class HoodieMetadataTableValidator implements Serializable { } } + /** + * Check metadata is initialized and available to ready. + * If not we will log.warn and skip current validation. + */ + private void checkMetadataTableIsAvailable() { + try { + HoodieTableMetaClient mdtMetaClient = HoodieTableMetaClient.builder() + .setConf(jsc.hadoopConfiguration()).setBasePath(new Path(cfg.basePath, HoodieTableMetaClient.METADATA_TABLE_FOLDER_PATH).toString()) + .setLoadActiveTimelineOnLoad(true) + .build(); + int finishedInstants = mdtMetaClient.getActiveTimeline().filterCompletedInstants().countInstants(); + if (finishedInstants == 0) { + throw new HoodieValidationException("There is no completed instant for metadata table."); + } + } catch (Exception ex) { + LOG.warn("Metadata table is not available to ready for now, ", ex); + } + } + /** * Compare the listing partitions result between metadata table and fileSystem. */