[HUDI-1800] Exclude file slices in pending compaction when performing small file sizing (#2902)
Co-authored-by: Ryan Pifer <ryanpife@amazon.com>
This commit is contained in:
@@ -44,8 +44,8 @@ import java.util.stream.Collectors;
|
||||
*/
|
||||
public class SparkUpsertDeltaCommitPartitioner<T extends HoodieRecordPayload<T>> extends UpsertPartitioner<T> {
|
||||
|
||||
SparkUpsertDeltaCommitPartitioner(WorkloadProfile profile, HoodieSparkEngineContext context, HoodieTable table,
|
||||
HoodieWriteConfig config) {
|
||||
public SparkUpsertDeltaCommitPartitioner(WorkloadProfile profile, HoodieSparkEngineContext context, HoodieTable table,
|
||||
HoodieWriteConfig config) {
|
||||
super(profile, context, table, config);
|
||||
}
|
||||
|
||||
@@ -79,10 +79,10 @@ public class SparkUpsertDeltaCommitPartitioner<T extends HoodieRecordPayload<T>>
|
||||
allSmallFileSlices.add(smallFileSlice.get());
|
||||
}
|
||||
} else {
|
||||
// If we can index log files, we can add more inserts to log files for fileIds including those under
|
||||
// pending compaction.
|
||||
// If we can index log files, we can add more inserts to log files for fileIds NOT including those under
|
||||
// pending compaction
|
||||
List<FileSlice> allFileSlices =
|
||||
table.getSliceView().getLatestFileSlicesBeforeOrOn(partitionPath, latestCommitTime.getTimestamp(), true)
|
||||
table.getSliceView().getLatestFileSlicesBeforeOrOn(partitionPath, latestCommitTime.getTimestamp(), false)
|
||||
.collect(Collectors.toList());
|
||||
for (FileSlice fileSlice : allFileSlices) {
|
||||
if (isSmallFile(fileSlice)) {
|
||||
|
||||
Reference in New Issue
Block a user