[HUDI-1902] Clean the corrupted files generated by FlinkMergeAndReplaceHandle (#2949)
Make the intermediate files of FlinkMergeAndReplaceHandle hidden, when committing the instant, clean these files in case there was some corrupted files left(in normal case, the intermediate files should be cleaned by the FlinkMergeAndReplaceHandle itself).
This commit is contained in:
@@ -479,6 +479,13 @@ public abstract class HoodieTable<T extends HoodieRecordPayload, I, K, O> implem
|
||||
}, config.getFinalizeWriteParallelism());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the possible invalid data file name with given marker files.
|
||||
*/
|
||||
protected Set<String> getInvalidDataPaths(MarkerFiles markers) throws IOException {
|
||||
return markers.createdAndMergedDataPaths(context, config.getFinalizeWriteParallelism());
|
||||
}
|
||||
|
||||
/**
|
||||
* Reconciles WriteStats and marker files to detect and safely delete duplicate data files created because of Spark
|
||||
* retries.
|
||||
@@ -505,7 +512,7 @@ public abstract class HoodieTable<T extends HoodieRecordPayload, I, K, O> implem
|
||||
}
|
||||
|
||||
// we are not including log appends here, since they are already fail-safe.
|
||||
Set<String> invalidDataPaths = markers.createdAndMergedDataPaths(context, config.getFinalizeWriteParallelism());
|
||||
Set<String> invalidDataPaths = getInvalidDataPaths(markers);
|
||||
Set<String> validDataPaths = stats.stream()
|
||||
.map(HoodieWriteStat::getPath)
|
||||
.filter(p -> p.endsWith(this.getBaseFileExtension()))
|
||||
|
||||
Reference in New Issue
Block a user