HUDI-148 Small File selection logic for MOR must skip fileIds selected for pending compaction correctly
This commit is contained in:
committed by
vinoth chandar
parent
8c9980f4f5
commit
a1483f2c5f
@@ -104,13 +104,15 @@ public interface TableFileSystemView {
|
||||
*/
|
||||
Stream<FileSlice> getLatestUnCompactedFileSlices(String partitionPath);
|
||||
|
||||
|
||||
/**
|
||||
* Stream all the latest file slices in the given partition with precondition that
|
||||
* commitTime(file) before maxCommitTime
|
||||
* Stream all latest file slices in given partition with precondition that commitTime(file) before maxCommitTime
|
||||
*
|
||||
* @param partitionPath Partition path
|
||||
* @param maxCommitTime Max Instant Time
|
||||
* @param includeFileSlicesInPendingCompaction include file-slices that are in pending compaction
|
||||
*/
|
||||
Stream<FileSlice> getLatestFileSlicesBeforeOrOn(String partitionPath,
|
||||
String maxCommitTime);
|
||||
String maxCommitTime, boolean includeFileSlicesInPendingCompaction);
|
||||
|
||||
/**
|
||||
* Stream all "merged" file-slices before on an instant time
|
||||
|
||||
@@ -476,13 +476,19 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
}
|
||||
|
||||
@Override
|
||||
public final Stream<FileSlice> getLatestFileSlicesBeforeOrOn(String partitionStr, String maxCommitTime) {
|
||||
public final Stream<FileSlice> getLatestFileSlicesBeforeOrOn(String partitionStr, String maxCommitTime,
|
||||
boolean includeFileSlicesInPendingCompaction) {
|
||||
try {
|
||||
readLock.lock();
|
||||
String partitionPath = formatPartitionKey(partitionStr);
|
||||
ensurePartitionLoadedCorrectly(partitionPath);
|
||||
return fetchLatestFileSlicesBeforeOrOn(partitionPath, maxCommitTime)
|
||||
.map(fs -> filterDataFileAfterPendingCompaction(fs));
|
||||
Stream<FileSlice> fileSliceStream =
|
||||
fetchLatestFileSlicesBeforeOrOn(partitionPath, maxCommitTime);
|
||||
if (includeFileSlicesInPendingCompaction) {
|
||||
return fileSliceStream.map(fs -> filterDataFileAfterPendingCompaction(fs));
|
||||
} else {
|
||||
return fileSliceStream.filter(fs -> !isPendingCompactionScheduledForFileId(fs.getFileGroupId()));
|
||||
}
|
||||
} finally {
|
||||
readLock.unlock();
|
||||
}
|
||||
|
||||
@@ -166,9 +166,10 @@ public class PriorityBasedFileSystemView implements SyncableFileSystemView, Seri
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<FileSlice> getLatestFileSlicesBeforeOrOn(String partitionPath, String maxCommitTime) {
|
||||
return execute(partitionPath, maxCommitTime, preferredView::getLatestFileSlicesBeforeOrOn,
|
||||
secondaryView::getLatestFileSlicesBeforeOrOn);
|
||||
public Stream<FileSlice> getLatestFileSlicesBeforeOrOn(String partitionPath, String maxCommitTime,
|
||||
boolean includeFileSlicesInPendingCompaction) {
|
||||
return execute(partitionPath, maxCommitTime, includeFileSlicesInPendingCompaction,
|
||||
preferredView::getLatestFileSlicesBeforeOrOn, secondaryView::getLatestFileSlicesBeforeOrOn);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@@ -108,6 +108,8 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
|
||||
public static final String LAST_INSTANT_TS = "lastinstantts";
|
||||
public static final String TIMELINE_HASH = "timelinehash";
|
||||
public static final String REFRESH_OFF = "refreshoff";
|
||||
public static final String INCLUDE_FILES_IN_PENDING_COMPACTION_PARAM = "includependingcompaction";
|
||||
|
||||
|
||||
private static Logger log = LogManager.getLogger(RemoteHoodieTableFileSystemView.class);
|
||||
|
||||
@@ -327,8 +329,11 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<FileSlice> getLatestFileSlicesBeforeOrOn(String partitionPath, String maxCommitTime) {
|
||||
Map<String, String> paramsMap = getParamsWithAdditionalParam(partitionPath, MAX_INSTANT_PARAM, maxCommitTime);
|
||||
public Stream<FileSlice> getLatestFileSlicesBeforeOrOn(String partitionPath, String maxCommitTime,
|
||||
boolean includeFileSlicesInPendingCompaction) {
|
||||
Map<String, String> paramsMap = getParamsWithAdditionalParams(partitionPath,
|
||||
new String[]{MAX_INSTANT_PARAM, INCLUDE_FILES_IN_PENDING_COMPACTION_PARAM},
|
||||
new String[]{maxCommitTime, String.valueOf(includeFileSlicesInPendingCompaction)});
|
||||
try {
|
||||
List<FileSliceDTO> dataFiles = executeRequest(LATEST_SLICES_BEFORE_ON_INSTANT_URL, paramsMap,
|
||||
new TypeReference<List<FileSliceDTO>>() {
|
||||
|
||||
@@ -351,7 +351,7 @@ public class HoodieTableFileSystemViewTest {
|
||||
assertEquals("Log File Order check", fileName2, logFiles.get(2).getFileName());
|
||||
assertEquals("Log File Order check", fileName1, logFiles.get(3).getFileName());
|
||||
|
||||
fileSliceList = rtView.getLatestFileSlicesBeforeOrOn(partitionPath, deltaInstantTime5)
|
||||
fileSliceList = rtView.getLatestFileSlicesBeforeOrOn(partitionPath, deltaInstantTime5, true)
|
||||
.collect(Collectors.toList());
|
||||
assertEquals("Expect only one file-id", 1, fileSliceList.size());
|
||||
fileSlice = fileSliceList.get(0);
|
||||
@@ -672,7 +672,7 @@ public class HoodieTableFileSystemViewTest {
|
||||
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime4, TEST_WRITE_TOKEN, fileId3)));
|
||||
|
||||
filenames = Sets.newHashSet();
|
||||
List<HoodieLogFile> logFilesList = rtView.getLatestFileSlicesBeforeOrOn("2016/05/01", commitTime4)
|
||||
List<HoodieLogFile> logFilesList = rtView.getLatestFileSlicesBeforeOrOn("2016/05/01", commitTime4, true)
|
||||
.map(slice -> slice.getLogFiles()).flatMap(logFileList -> logFileList)
|
||||
.collect(Collectors.toList());
|
||||
assertEquals(logFilesList.size(), 4);
|
||||
@@ -706,7 +706,7 @@ public class HoodieTableFileSystemViewTest {
|
||||
}
|
||||
|
||||
logFilesList =
|
||||
rtView.getLatestFileSlicesBeforeOrOn("2016/05/01", commitTime3).map(slice -> slice.getLogFiles())
|
||||
rtView.getLatestFileSlicesBeforeOrOn("2016/05/01", commitTime3, true).map(slice -> slice.getLogFiles())
|
||||
.flatMap(logFileList -> logFileList).collect(Collectors.toList());
|
||||
assertEquals(logFilesList.size(), 1);
|
||||
assertTrue(logFilesList.get(0).getFileName()
|
||||
@@ -1135,7 +1135,7 @@ public class HoodieTableFileSystemViewTest {
|
||||
assertEquals("Log File Order check", fileName3, logFiles.get(1).getFileName());
|
||||
assertEquals("Log File Order check", fileName1, logFiles.get(2).getFileName());
|
||||
|
||||
fileSliceList = rtView.getLatestFileSlicesBeforeOrOn(partitionPath, deltaInstantTime5)
|
||||
fileSliceList = rtView.getLatestFileSlicesBeforeOrOn(partitionPath, deltaInstantTime5, true)
|
||||
.collect(Collectors.toList());
|
||||
assertEquals("Expect only one file-id", 1, fileSliceList.size());
|
||||
fileSlice = fileSliceList.get(0);
|
||||
@@ -1147,6 +1147,11 @@ public class HoodieTableFileSystemViewTest {
|
||||
assertEquals("Log files must include only those after compaction request", 2, logFiles.size());
|
||||
assertEquals("Log File Order check", fileName4, logFiles.get(0).getFileName());
|
||||
assertEquals("Log File Order check", fileName3, logFiles.get(1).getFileName());
|
||||
|
||||
// Check getLatestFileSlicesBeforeOrOn excluding fileIds in pending compaction
|
||||
fileSliceList = rtView.getLatestFileSlicesBeforeOrOn(partitionPath, deltaInstantTime5, false)
|
||||
.collect(Collectors.toList());
|
||||
assertEquals("Expect empty list as file-id is in pending compaction", 0, fileSliceList.size());
|
||||
});
|
||||
|
||||
Assert.assertEquals(3, fsView.getPendingCompactionOperations().count());
|
||||
|
||||
Reference in New Issue
Block a user