1
0

[HUDI-1800] Exclude file slices in pending compaction when performing small file sizing (#2902)

Co-authored-by: Ryan Pifer <ryanpife@amazon.com>
This commit is contained in:
rmpifer
2021-05-29 05:06:01 -07:00
committed by GitHub
parent 974b476180
commit 0709c62a6b
3 changed files with 95 additions and 5 deletions

View File

@@ -44,8 +44,8 @@ import java.util.stream.Collectors;
*/
public class SparkUpsertDeltaCommitPartitioner<T extends HoodieRecordPayload<T>> extends UpsertPartitioner<T> {
SparkUpsertDeltaCommitPartitioner(WorkloadProfile profile, HoodieSparkEngineContext context, HoodieTable table,
HoodieWriteConfig config) {
public SparkUpsertDeltaCommitPartitioner(WorkloadProfile profile, HoodieSparkEngineContext context, HoodieTable table,
HoodieWriteConfig config) {
super(profile, context, table, config);
}
@@ -79,10 +79,10 @@ public class SparkUpsertDeltaCommitPartitioner<T extends HoodieRecordPayload<T>>
allSmallFileSlices.add(smallFileSlice.get());
}
} else {
// If we can index log files, we can add more inserts to log files for fileIds including those under
// pending compaction.
// If we can index log files, we can add more inserts to log files for fileIds NOT including those under
// pending compaction
List<FileSlice> allFileSlices =
table.getSliceView().getLatestFileSlicesBeforeOrOn(partitionPath, latestCommitTime.getTimestamp(), true)
table.getSliceView().getLatestFileSlicesBeforeOrOn(partitionPath, latestCommitTime.getTimestamp(), false)
.collect(Collectors.toList());
for (FileSlice fileSlice : allFileSlices) {
if (isSmallFile(fileSlice)) {