1
0

[HUDI-3007] Fix issues in HoodieRepairTool (#4564)

This commit is contained in:
Y Ethan Guo
2022-01-12 09:03:27 -08:00
committed by GitHub
parent 12e95771ee
commit 397795c7d0
7 changed files with 957 additions and 119 deletions

View File

@@ -721,6 +721,53 @@ public class FSUtils {
}
}
/**
* Lists file status at a certain level in the directory hierarchy.
* <p>
* E.g., given "/tmp/hoodie_table" as the rootPath, and 3 as the expected level,
* this method gives back the {@link FileStatus} of all files under
* "/tmp/hoodie_table/[*]/[*]/[*]/" folders.
*
* @param hoodieEngineContext {@link HoodieEngineContext} instance.
* @param fs {@link FileSystem} instance.
* @param rootPath Root path for the file listing.
* @param expectLevel Expected level of directory hierarchy for files to be added.
* @param parallelism Parallelism for the file listing.
* @return A list of file status of files at the level.
*/
public static List<FileStatus> getFileStatusAtLevel(
HoodieEngineContext hoodieEngineContext, FileSystem fs, Path rootPath,
int expectLevel, int parallelism) {
List<String> levelPaths = new ArrayList<>();
List<FileStatus> result = new ArrayList<>();
levelPaths.add(rootPath.toString());
for (int i = 0; i <= expectLevel; i++) {
result = FSUtils.parallelizeFilesProcess(hoodieEngineContext, fs, parallelism,
pairOfSubPathAndConf -> {
Path path = new Path(pairOfSubPathAndConf.getKey());
try {
FileSystem fileSystem = path.getFileSystem(pairOfSubPathAndConf.getValue().get());
return Arrays.stream(fileSystem.listStatus(path))
.collect(Collectors.toList());
} catch (IOException e) {
throw new HoodieIOException("Failed to list " + path, e);
}
},
levelPaths)
.values().stream()
.flatMap(list -> list.stream()).collect(Collectors.toList());
if (i < expectLevel) {
levelPaths = result.stream()
.filter(FileStatus::isDirectory)
.map(fileStatus -> fileStatus.getPath().toString())
.collect(Collectors.toList());
}
}
return result;
}
public interface SerializableFunction<T, R> extends Function<T, R>, Serializable {
}
}

View File

@@ -26,6 +26,7 @@ import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.testutils.HoodieCommonTestHarness;
import org.apache.hudi.common.testutils.HoodieTestUtils;
import org.apache.hudi.common.util.CollectionUtils;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
@@ -66,7 +67,7 @@ public class TestFSUtils extends HoodieCommonTestHarness {
private final long minRollbackToKeep = 10;
private final long minCleanToKeep = 10;
private static String TEST_WRITE_TOKEN = "1-0-1";
private static final String TEST_WRITE_TOKEN = "1-0-1";
private static final String BASE_FILE_EXTENSION = HoodieTableConfig.BASE_FILE_FORMAT.defaultValue().getFileExtension();
@Rule
@@ -455,4 +456,21 @@ public class TestFSUtils extends HoodieCommonTestHarness {
}
}
}
@Test
public void testGetFileStatusAtLevel() throws IOException {
String rootDir = basePath + "/.hoodie/.temp";
FileSystem fileSystem = metaClient.getFs();
prepareTestDirectory(fileSystem, rootDir);
List<FileStatus> fileStatusList = FSUtils.getFileStatusAtLevel(
new HoodieLocalEngineContext(fileSystem.getConf()), fileSystem,
new Path(basePath), 3, 2);
assertEquals(CollectionUtils.createImmutableList(
"file:" + basePath + "/.hoodie/.temp/subdir1/file1.txt",
"file:" + basePath + "/.hoodie/.temp/subdir2/file2.txt"),
fileStatusList.stream()
.map(fileStatus -> fileStatus.getPath().toString())
.filter(filePath -> filePath.endsWith(".txt"))
.collect(Collectors.toList()));
}
}