[HUDI-3007] Fix issues in HoodieRepairTool (#4564)
This commit is contained in:
@@ -721,6 +721,53 @@ public class FSUtils {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Lists file status at a certain level in the directory hierarchy.
|
||||
* <p>
|
||||
* E.g., given "/tmp/hoodie_table" as the rootPath, and 3 as the expected level,
|
||||
* this method gives back the {@link FileStatus} of all files under
|
||||
* "/tmp/hoodie_table/[*]/[*]/[*]/" folders.
|
||||
*
|
||||
* @param hoodieEngineContext {@link HoodieEngineContext} instance.
|
||||
* @param fs {@link FileSystem} instance.
|
||||
* @param rootPath Root path for the file listing.
|
||||
* @param expectLevel Expected level of directory hierarchy for files to be added.
|
||||
* @param parallelism Parallelism for the file listing.
|
||||
* @return A list of file status of files at the level.
|
||||
*/
|
||||
|
||||
public static List<FileStatus> getFileStatusAtLevel(
|
||||
HoodieEngineContext hoodieEngineContext, FileSystem fs, Path rootPath,
|
||||
int expectLevel, int parallelism) {
|
||||
List<String> levelPaths = new ArrayList<>();
|
||||
List<FileStatus> result = new ArrayList<>();
|
||||
levelPaths.add(rootPath.toString());
|
||||
|
||||
for (int i = 0; i <= expectLevel; i++) {
|
||||
result = FSUtils.parallelizeFilesProcess(hoodieEngineContext, fs, parallelism,
|
||||
pairOfSubPathAndConf -> {
|
||||
Path path = new Path(pairOfSubPathAndConf.getKey());
|
||||
try {
|
||||
FileSystem fileSystem = path.getFileSystem(pairOfSubPathAndConf.getValue().get());
|
||||
return Arrays.stream(fileSystem.listStatus(path))
|
||||
.collect(Collectors.toList());
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Failed to list " + path, e);
|
||||
}
|
||||
},
|
||||
levelPaths)
|
||||
.values().stream()
|
||||
.flatMap(list -> list.stream()).collect(Collectors.toList());
|
||||
if (i < expectLevel) {
|
||||
levelPaths = result.stream()
|
||||
.filter(FileStatus::isDirectory)
|
||||
.map(fileStatus -> fileStatus.getPath().toString())
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public interface SerializableFunction<T, R> extends Function<T, R>, Serializable {
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user