1
0

[HUDI-1902] Clean the corrupted files generated by FlinkMergeAndReplaceHandle (#2949)

Make the intermediate files of FlinkMergeAndReplaceHandle hidden, when
committing the instant, clean these files in case there was some
corrupted files left(in normal case, the intermediate files should be cleaned
by the FlinkMergeAndReplaceHandle itself).
This commit is contained in:
Danny Chan
2021-05-14 15:43:37 +08:00
committed by GitHub
parent 12443e4187
commit 8869b3b418
5 changed files with 32 additions and 8 deletions

View File

@@ -241,7 +241,8 @@ public class BucketAssigner {
.getLatestBaseFilesBeforeOrOn(partitionPath, latestCommitTime.getTimestamp()).collect(Collectors.toList());
for (HoodieBaseFile file : allFiles) {
if (file.getFileSize() < config.getParquetSmallFileLimit()) {
// filter out the corrupted files.
if (file.getFileSize() < config.getParquetSmallFileLimit() && file.getFileSize() > 0) {
String filename = file.getFileName();
SmallFile sf = new SmallFile();
sf.location = new HoodieRecordLocation(FSUtils.getCommitTime(filename), FSUtils.getFileId(filename));