1
0

[HUDI-3438] Avoid getSmallFiles if hoodie.parquet.small.file.limit is 0 (#4823)

Co-authored-by: Hui An <hui.an@shopee.com>
This commit is contained in:
RexAn
2022-02-18 21:57:04 +08:00
committed by GitHub
parent fba5822ee3
commit 5009138d04
5 changed files with 26 additions and 4 deletions

View File

@@ -135,7 +135,7 @@ public class JavaUpsertPartitioner<T extends HoodieRecordPayload<T>> implements
WorkloadStat pStat = profile.getWorkloadStat(partitionPath);
if (pStat.getNumInserts() > 0) {
List<SmallFile> smallFiles = partitionSmallFilesMap.get(partitionPath);
List<SmallFile> smallFiles = partitionSmallFilesMap.getOrDefault(partitionPath, new ArrayList<>());
this.smallFiles.addAll(smallFiles);
LOG.info("For partitionPath : " + partitionPath + " Small Files => " + smallFiles);
@@ -205,6 +205,11 @@ public class JavaUpsertPartitioner<T extends HoodieRecordPayload<T>> implements
private Map<String, List<SmallFile>> getSmallFilesForPartitions(List<String> partitionPaths, HoodieEngineContext context) {
Map<String, List<SmallFile>> partitionSmallFilesMap = new HashMap<>();
if (config.getParquetSmallFileLimit() <= 0) {
return partitionSmallFilesMap;
}
if (partitionPaths != null && partitionPaths.size() > 0) {
context.setJobStatus(this.getClass().getSimpleName(), "Getting small files from partitions");
partitionSmallFilesMap = context.mapToPair(partitionPaths,