[HUDI-3656] Adding medium sized dataset for clustering and minor fixes to integ tests (#5063)
This commit is contained in:
committed by
GitHub
parent
6fe4d6e2f6
commit
2551c26183
@@ -18,8 +18,15 @@
|
||||
|
||||
package org.apache.hudi.integ.testsuite.dag.nodes;
|
||||
|
||||
import org.apache.hudi.avro.model.HoodieCleanMetadata;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.apache.hudi.common.util.CleanerUtils;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.common.util.ValidationUtils;
|
||||
import org.apache.hudi.integ.testsuite.configuration.DeltaConfig.Config;
|
||||
import org.apache.hudi.integ.testsuite.dag.ExecutionContext;
|
||||
|
||||
@@ -59,21 +66,19 @@ public class ValidateAsyncOperations extends DagNode<Option<String>> {
|
||||
|
||||
int maxCommitsRetained = executionContext.getHoodieTestSuiteWriter().getWriteConfig().getCleanerCommitsRetained() + 1;
|
||||
FileSystem fs = FSUtils.getFs(basePath, executionContext.getHoodieTestSuiteWriter().getConfiguration());
|
||||
Map<String, Integer> fileIdCount = new HashMap<>();
|
||||
|
||||
AtomicInteger maxVal = new AtomicInteger();
|
||||
List<String> partitionPaths = FSUtils.getAllPartitionFoldersThreeLevelsDown(fs, basePath);
|
||||
for (String partitionPath : partitionPaths) {
|
||||
List<FileStatus> fileStatuses = Arrays.stream(FSUtils.getAllDataFilesInPartition(fs, new Path(basePath + "/" + partitionPath))).collect(Collectors.toList());
|
||||
fileStatuses.forEach(entry -> {
|
||||
String fileId = FSUtils.getFileId(entry.getPath().getName());
|
||||
fileIdCount.computeIfAbsent(fileId, k -> 0);
|
||||
fileIdCount.put(fileId, fileIdCount.get(fileId) + 1);
|
||||
maxVal.set(Math.max(maxVal.get(), fileIdCount.get(fileId)));
|
||||
});
|
||||
}
|
||||
if (maxVal.get() > maxCommitsRetained) {
|
||||
throw new AssertionError("Total commits (" + maxVal + ") retained exceeds max value of " + maxCommitsRetained + ", total commits : ");
|
||||
|
||||
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setBasePath(executionContext.getHoodieTestSuiteWriter().getCfg().targetBasePath)
|
||||
.setConf(executionContext.getJsc().hadoopConfiguration()).build();
|
||||
Option<HoodieInstant> latestCleanInstant = metaClient.getActiveTimeline().filter(instant -> instant.getAction().equals(HoodieTimeline.CLEAN_ACTION)).lastInstant();
|
||||
if (latestCleanInstant.isPresent()) {
|
||||
log.warn("Latest clean commit " + latestCleanInstant.get());
|
||||
HoodieCleanMetadata cleanMetadata = CleanerUtils.getCleanerMetadata(metaClient, latestCleanInstant.get());
|
||||
String earliestCommitToRetain = cleanMetadata.getEarliestCommitToRetain();
|
||||
log.warn("Earliest commit to retain : " + earliestCommitToRetain);
|
||||
long unCleanedInstants = metaClient.getActiveTimeline().filterCompletedInstants().filter(instant ->
|
||||
HoodieTimeline.compareTimestamps(instant.getTimestamp(), HoodieTimeline.GREATER_THAN_OR_EQUALS, earliestCommitToRetain)).getInstants().count();
|
||||
ValidationUtils.checkArgument(unCleanedInstants >= (maxCommitsRetained + 1), "Total uncleaned instants " + unCleanedInstants +
|
||||
" mismatched with max commits retained " + (maxCommitsRetained + 1));
|
||||
}
|
||||
|
||||
if (config.validateArchival() || config.validateClean()) {
|
||||
|
||||
Reference in New Issue
Block a user