1
0

[HUDI-3635] Fix HoodieMetadataTableValidator around comparison of partition path listing (#5100)

Co-authored-by: yuezhang <yuezhang@freewheel.tv>
This commit is contained in:
YueZhang
2022-03-31 05:23:37 +08:00
committed by GitHub
parent eae8488536
commit 2b60641d17
2 changed files with 34 additions and 0 deletions

View File

@@ -32,6 +32,7 @@ import org.apache.hudi.common.model.HoodieBaseFile;
import org.apache.hudi.common.model.HoodieColumnRangeMetadata;
import org.apache.hudi.common.model.HoodieFileFormat;
import org.apache.hudi.common.model.HoodieFileGroup;
import org.apache.hudi.common.model.HoodiePartitionMetadata;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.TableSchemaResolver;
import org.apache.hudi.common.table.timeline.HoodieInstant;
@@ -443,6 +444,21 @@ public class HoodieMetadataTableValidator implements Serializable {
private List<String> validatePartitions(HoodieSparkEngineContext engineContext, String basePath) {
// compare partitions
List<String> allPartitionPathsFromFS = FSUtils.getAllPartitionPaths(engineContext, basePath, false, cfg.assumeDatePartitioning);
HoodieTimeline completedTimeline = metaClient.getActiveTimeline().filterCompletedInstants();
// ignore partitions created by uncommitted ingestion.
allPartitionPathsFromFS = allPartitionPathsFromFS.stream().parallel().filter(part -> {
HoodiePartitionMetadata hoodiePartitionMetadata = new HoodiePartitionMetadata(metaClient.getFs(), new Path(basePath, part));
Option<String> instantOption = hoodiePartitionMetadata.readPartitionCreatedCommitTime();
if (instantOption.isPresent()) {
String instantTime = instantOption.get();
return completedTimeline.containsOrBeforeTimelineStarts(instantTime);
} else {
return false;
}
}).collect(Collectors.toList());
List<String> allPartitionPathsMeta = FSUtils.getAllPartitionPaths(engineContext, basePath, true, cfg.assumeDatePartitioning);
Collections.sort(allPartitionPathsFromFS);