1
0

Ensure Compaction Operation compacts the data file as defined in the workload

This commit is contained in:
Balaji Varadarajan
2018-05-26 14:08:29 -07:00
committed by vinoth chandar
parent 2f8ce93030
commit 2e12c86d01
11 changed files with 74 additions and 32 deletions

View File

@@ -174,6 +174,8 @@ public interface HoodieTimeline extends Serializable {
/**
* Helper methods to compare instants
**/
BiPredicate<String, String> EQUAL =
(commit1, commit2) -> commit1.compareTo(commit2) == 0;
BiPredicate<String, String> GREATER_OR_EQUAL =
(commit1, commit2) -> commit1.compareTo(commit2) >= 0;
BiPredicate<String, String> GREATER = (commit1, commit2) -> commit1.compareTo(commit2) > 0;

View File

@@ -51,6 +51,12 @@ public interface TableFileSystemView {
Stream<HoodieDataFile> getLatestDataFilesBeforeOrOn(String partitionPath,
String maxCommitTime);
/**
* Stream all the latest version data files in the given partition with precondition that
* instant time of file matches passed in instant time.
*/
Stream<HoodieDataFile> getLatestDataFilesOn(String partitionPath, String instantTime);
/**
* Stream all the latest data files pass
*/

View File

@@ -256,6 +256,22 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
.map(Optional::get);
}
@Override
public Stream<HoodieDataFile> getLatestDataFilesOn(String partitionPath, String instantTime) {
return getAllFileGroups(partitionPath)
.map(fileGroup -> {
return fileGroup.getAllDataFiles()
.filter(dataFile ->
HoodieTimeline.compareTimestamps(dataFile.getCommitTime(),
instantTime,
HoodieTimeline.EQUAL))
.filter(df -> !isDataFileDueToPendingCompaction(df))
.findFirst();
})
.filter(Optional::isPresent)
.map(Optional::get);
}
@Override
public Stream<HoodieDataFile> getAllDataFiles(String partitionPath) {
return getAllFileGroups(partitionPath)

View File

@@ -14,8 +14,10 @@
# limitations under the License.
#
log4j.rootLogger=WARN, A1
log4j.category.com.uber=WARN
log4j.category.org.apache.parquet.hadoop=ERROR
log4j.category.com.uber=INFO
log4j.category.com.uber.hoodie.table.log=WARN
log4j.category.com.uber.hoodie.common.util=WARN
log4j.category.org.apache.parquet.hadoop=WARN
# A1 is set to be a ConsoleAppender.
log4j.appender.A1=org.apache.log4j.ConsoleAppender
# A1 uses PatternLayout.