1
0

FileSystemView and Timeline level changes to support Async Compaction

This commit is contained in:
Balaji Varadarajan
2018-05-23 16:54:53 -07:00
committed by vinoth chandar
parent 44caf0d40c
commit 6d01ae8ca0
20 changed files with 892 additions and 132 deletions

View File

@@ -230,7 +230,7 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends
// This needs to be done since GlobalIndex at the moment does not store the latest commit time
Map<String, String> fileIdToLatestCommitTimeMap =
hoodieIndex.isGlobal() ? this.getRTFileSystemView().getLatestFileSlices(partitionPath)
.collect(Collectors.toMap(FileSlice::getFileId, FileSlice::getBaseCommitTime)) : null;
.collect(Collectors.toMap(FileSlice::getFileId, FileSlice::getBaseInstantTime)) : null;
commitMetadata.getPartitionToWriteStats().get(partitionPath).stream()
.filter(wStat -> {
if (wStat != null
@@ -341,7 +341,8 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends
// TODO - check if index.isglobal then small files are log files too
Optional<FileSlice> smallFileSlice = getRTFileSystemView()
.getLatestFileSlicesBeforeOrOn(partitionPath, latestCommitTime.getTimestamp()).filter(
// Use the merged file-slice for small file selection
.getLatestMergedFileSlicesBeforeOrOn(partitionPath, latestCommitTime.getTimestamp()).filter(
fileSlice -> fileSlice.getLogFiles().count() < 1
&& fileSlice.getDataFile().get().getFileSize() < config
.getParquetSmallFileLimit()).sorted((FileSlice left, FileSlice right) ->

View File

@@ -119,7 +119,8 @@ public abstract class HoodieTable<T extends HoodieRecordPayload> implements Seri
* Get the real time view of the file system for this table
*/
public TableFileSystemView.RealtimeView getRTFileSystemView() {
return new HoodieTableFileSystemView(metaClient, getCompletedCommitTimeline());
return new HoodieTableFileSystemView(metaClient,
metaClient.getCommitsAndCompactionTimeline().filterCompletedAndCompactionInstants());
}
/**
@@ -140,7 +141,7 @@ public abstract class HoodieTable<T extends HoodieRecordPayload> implements Seri
* Get only the inflights (no-completed) commit timeline
*/
public HoodieTimeline getInflightCommitTimeline() {
return metaClient.getCommitsTimeline().filterInflights();
return metaClient.getCommitsTimeline().filterInflightsExcludingCompaction();
}
/**