1
0

[HUDI-2683] Parallelize deleting archived hoodie commits (#3920)

Co-authored-by: yuezhang <yuezhang@freewheel.tv>
This commit is contained in:
zhangyue19921010
2021-11-15 22:36:54 +08:00
committed by GitHub
parent 53d2d6ae24
commit 38b6934352
4 changed files with 61 additions and 19 deletions

View File

@@ -670,19 +670,30 @@ public class FSUtils {
.filter(subPathPredicate)
.map(fileStatus -> fileStatus.getPath().toString())
.collect(Collectors.toList());
if (subPaths.size() > 0) {
SerializableConfiguration conf = new SerializableConfiguration(fs.getConf());
int actualParallelism = Math.min(subPaths.size(), parallelism);
result = hoodieEngineContext.mapToPair(subPaths,
subPath -> new ImmutablePair<>(subPath, pairFunction.apply(new ImmutablePair<>(subPath, conf))),
actualParallelism);
}
result = parallelizeFilesProcess(hoodieEngineContext, fs, parallelism, pairFunction, subPaths);
} catch (IOException ioe) {
throw new HoodieIOException(ioe.getMessage(), ioe);
}
return result;
}
public static <T> Map<String, T> parallelizeFilesProcess(
HoodieEngineContext hoodieEngineContext,
FileSystem fs,
int parallelism,
SerializableFunction<Pair<String, SerializableConfiguration>, T> pairFunction,
List<String> subPaths) {
Map<String, T> result = new HashMap<>();
if (subPaths.size() > 0) {
SerializableConfiguration conf = new SerializableConfiguration(fs.getConf());
int actualParallelism = Math.min(subPaths.size(), parallelism);
result = hoodieEngineContext.mapToPair(subPaths,
subPath -> new ImmutablePair<>(subPath, pairFunction.apply(new ImmutablePair<>(subPath, conf))),
actualParallelism);
}
return result;
}
/**
* Deletes a sub-path.
*