1
0

FileSystemView and Timeline level changes to support Async Compaction

This commit is contained in:
Balaji Varadarajan
2018-05-23 16:54:53 -07:00
committed by vinoth chandar
parent 44caf0d40c
commit 6d01ae8ca0
20 changed files with 892 additions and 132 deletions

View File

@@ -37,7 +37,7 @@ public class FileSlice implements Serializable {
/**
* Point in the timeline, at which the slice was created
*/
private String baseCommitTime;
private String baseInstantTime;
/**
* data file, with the compacted data, for this slice
@@ -50,11 +50,11 @@ public class FileSlice implements Serializable {
*/
private final TreeSet<HoodieLogFile> logFiles;
public FileSlice(String baseCommitTime, String fileId) {
public FileSlice(String baseInstantTime, String fileId) {
this.fileId = fileId;
this.baseCommitTime = baseCommitTime;
this.baseInstantTime = baseInstantTime;
this.dataFile = null;
this.logFiles = new TreeSet<>(HoodieLogFile.getLogVersionComparator());
this.logFiles = new TreeSet<>(HoodieLogFile.getBaseInstantAndLogVersionComparator());
}
public void setDataFile(HoodieDataFile dataFile) {
@@ -69,8 +69,8 @@ public class FileSlice implements Serializable {
return logFiles.stream();
}
public String getBaseCommitTime() {
return baseCommitTime;
public String getBaseInstantTime() {
return baseInstantTime;
}
public String getFileId() {
@@ -84,7 +84,7 @@ public class FileSlice implements Serializable {
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("FileSlice {");
sb.append("baseCommitTime=").append(baseCommitTime);
sb.append("baseCommitTime=").append(baseInstantTime);
sb.append(", dataFile='").append(dataFile).append('\'');
sb.append(", logFiles='").append(logFiles).append('\'');
sb.append('}');

View File

@@ -72,6 +72,16 @@ public class HoodieFileGroup implements Serializable {
this.lastInstant = timeline.lastInstant();
}
/**
* Potentially add a new file-slice by adding base-instant time
* A file-slice without any data-file and log-files can exist (if a compaction just got requested)
*/
public void addNewFileSliceAtInstant(String baseInstantTime) {
if (!fileSlices.containsKey(baseInstantTime)) {
fileSlices.put(baseInstantTime, new FileSlice(baseInstantTime, id));
}
}
/**
* Add a new datafile into the file group
*/
@@ -106,13 +116,27 @@ public class HoodieFileGroup implements Serializable {
*/
private boolean isFileSliceCommitted(FileSlice slice) {
String maxCommitTime = lastInstant.get().getTimestamp();
return timeline.containsOrBeforeTimelineStarts(slice.getBaseCommitTime())
&& HoodieTimeline.compareTimestamps(slice.getBaseCommitTime(),
return timeline.containsOrBeforeTimelineStarts(slice.getBaseInstantTime())
&& HoodieTimeline.compareTimestamps(slice.getBaseInstantTime(),
maxCommitTime,
HoodieTimeline.LESSER_OR_EQUAL);
}
/**
* Get all the the file slices including in-flight ones as seen in underlying file-system
*/
public Stream<FileSlice> getAllFileSlicesIncludingInflight() {
return fileSlices.entrySet().stream().map(sliceEntry -> sliceEntry.getValue());
}
/**
* Get latest file slices including in-flight ones
*/
public Optional<FileSlice> getLatestFileSlicesIncludingInflight() {
return getAllFileSlicesIncludingInflight().findFirst();
}
/**
* Provides a stream of committed file slices, sorted reverse base commit time.
*/
@@ -141,15 +165,29 @@ public class HoodieFileGroup implements Serializable {
public Optional<FileSlice> getLatestFileSliceBeforeOrOn(String maxCommitTime) {
return getAllFileSlices()
.filter(slice ->
HoodieTimeline.compareTimestamps(slice.getBaseCommitTime(),
HoodieTimeline.compareTimestamps(slice.getBaseInstantTime(),
maxCommitTime,
HoodieTimeline.LESSER_OR_EQUAL))
.findFirst();
}
/**
* Obtain the latest file slice, upto a commitTime i.e < maxInstantTime
* @param maxInstantTime Max Instant Time
* @return
*/
public Optional<FileSlice> getLatestFileSliceBefore(String maxInstantTime) {
return getAllFileSlices()
.filter(slice ->
HoodieTimeline.compareTimestamps(slice.getBaseInstantTime(),
maxInstantTime,
HoodieTimeline.LESSER))
.findFirst();
}
public Optional<FileSlice> getLatestFileSliceInRange(List<String> commitRange) {
return getAllFileSlices()
.filter(slice -> commitRange.contains(slice.getBaseCommitTime()))
.filter(slice -> commitRange.contains(slice.getBaseInstantTime()))
.findFirst();
}
@@ -162,47 +200,6 @@ public class HoodieFileGroup implements Serializable {
.map(slice -> slice.getDataFile().get());
}
/**
* Get the latest committed data file
*/
public Optional<HoodieDataFile> getLatestDataFile() {
return getAllDataFiles().findFirst();
}
/**
* Get the latest data file, that is <= max commit time
*/
public Optional<HoodieDataFile> getLatestDataFileBeforeOrOn(String maxCommitTime) {
return getAllDataFiles()
.filter(dataFile ->
HoodieTimeline.compareTimestamps(dataFile.getCommitTime(),
maxCommitTime,
HoodieTimeline.LESSER_OR_EQUAL))
.findFirst();
}
/**
* Get the latest data file, that is contained within the provided commit range.
*/
public Optional<HoodieDataFile> getLatestDataFileInRange(List<String> commitRange) {
return getAllDataFiles()
.filter(dataFile -> commitRange.contains(dataFile.getCommitTime()))
.findFirst();
}
/**
* Obtain the latest log file (based on latest committed data file), currently being appended to
*
* @return logfile if present, empty if no log file has been opened already.
*/
public Optional<HoodieLogFile> getLatestLogFile() {
Optional<FileSlice> latestSlice = getLatestFileSlice();
if (latestSlice.isPresent() && latestSlice.get().getLogFiles().count() > 0) {
return latestSlice.get().getLogFiles().findFirst();
}
return Optional.empty();
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("HoodieFileGroup {");

View File

@@ -94,10 +94,16 @@ public class HoodieLogFile implements Serializable {
FSUtils.makeLogFileName(fileId, extension, baseCommitTime, newVersion)));
}
public static Comparator<HoodieLogFile> getLogVersionComparator() {
public static Comparator<HoodieLogFile> getBaseInstantAndLogVersionComparator() {
return (o1, o2) -> {
// reverse the order
return new Integer(o2.getLogVersion()).compareTo(o1.getLogVersion());
String baseInstantTime1 = o1.getBaseCommitTime();
String baseInstantTime2 = o2.getBaseCommitTime();
if (baseInstantTime1.equals(baseInstantTime2)) {
// reverse the order by log-version when base-commit is same
return new Integer(o2.getLogVersion()).compareTo(o1.getLogVersion());
}
// reverse the order by base-commits
return new Integer(baseInstantTime2.compareTo(baseInstantTime1));
};
}

View File

@@ -274,6 +274,23 @@ public class HoodieTableMetaClient implements Serializable {
}
}
/**
* Get the commit + pending-compaction timeline visible for this table.
* A RT filesystem view is constructed with this timeline so that file-slice after pending compaction-requested
* instant-time is also considered valid. A RT file-system view for reading must then merge the file-slices before
* and after pending compaction instant so that all delta-commits are read.
*/
public HoodieTimeline getCommitsAndCompactionTimeline() {
switch (this.getTableType()) {
case COPY_ON_WRITE:
return getActiveTimeline().getCommitTimeline();
case MERGE_ON_READ:
return getActiveTimeline().getCommitsAndCompactionTimeline();
default:
throw new HoodieException("Unsupported table type :" + this.getTableType());
}
}
/**
* Get the compacted commit timeline visible for this table
*/

View File

@@ -18,10 +18,12 @@ package com.uber.hoodie.common.table;
import com.uber.hoodie.common.table.timeline.HoodieDefaultTimeline;
import com.uber.hoodie.common.table.timeline.HoodieInstant;
import com.uber.hoodie.common.table.timeline.HoodieInstant.State;
import java.io.Serializable;
import java.util.Optional;
import java.util.function.BiPredicate;
import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils;
/**
* HoodieTimeline is a view of meta-data instants in the hoodie dataset. Instants are specific
@@ -42,6 +44,10 @@ public interface HoodieTimeline extends Serializable {
String ROLLBACK_ACTION = "rollback";
String SAVEPOINT_ACTION = "savepoint";
String INFLIGHT_EXTENSION = ".inflight";
// With Async Compaction, compaction instant can be in 3 states :
// (compaction-requested), (compaction-inflight), (completed)
String COMPACTION_ACTION = "compaction";
String REQUESTED_EXTENSION = ".requested";
String COMMIT_EXTENSION = "." + COMMIT_ACTION;
String DELTA_COMMIT_EXTENSION = "." + DELTA_COMMIT_ACTION;
@@ -54,6 +60,12 @@ public interface HoodieTimeline extends Serializable {
String INFLIGHT_CLEAN_EXTENSION = "." + CLEAN_ACTION + INFLIGHT_EXTENSION;
String INFLIGHT_ROLLBACK_EXTENSION = "." + ROLLBACK_ACTION + INFLIGHT_EXTENSION;
String INFLIGHT_SAVEPOINT_EXTENSION = "." + SAVEPOINT_ACTION + INFLIGHT_EXTENSION;
String REQUESTED_COMPACTION_SUFFIX =
StringUtils.join(COMPACTION_ACTION, REQUESTED_EXTENSION);
String REQUESTED_COMPACTION_EXTENSION =
StringUtils.join(".", REQUESTED_COMPACTION_SUFFIX);
String INFLIGHT_COMPACTION_EXTENSION =
StringUtils.join(".", COMPACTION_ACTION, INFLIGHT_EXTENSION);
/**
* Filter this timeline to just include the in-flights
@@ -62,6 +74,13 @@ public interface HoodieTimeline extends Serializable {
*/
HoodieTimeline filterInflights();
/**
* Filter this timeline to just include the in-flights excluding compaction instants
*
* @return New instance of HoodieTimeline with just in-flights excluding compaction inflights
*/
HoodieTimeline filterInflightsExcludingCompaction();
/**
* Filter this timeline to just include the completed instants
*
@@ -69,6 +88,20 @@ public interface HoodieTimeline extends Serializable {
*/
HoodieTimeline filterCompletedInstants();
/**
* Filter this timeline to just include the completed + compaction (inflight + requested) instants
* A RT filesystem view is constructed with this timeline so that file-slice after pending compaction-requested
* instant-time is also considered valid. A RT file-system view for reading must then merge the file-slices before
* and after pending compaction instant so that all delta-commits are read.
* @return New instance of HoodieTimeline with just completed instants
*/
HoodieTimeline filterCompletedAndCompactionInstants();
/**
* Filter this timeline to just include inflight and requested compaction instants
* @return
*/
HoodieTimeline filterPendingCompactionTimeline();
/**
* Create a new Timeline with instants after startTs and before or on endTs
@@ -157,45 +190,60 @@ public interface HoodieTimeline extends Serializable {
return new HoodieInstant(false, instant.getAction(), instant.getTimestamp());
}
static HoodieInstant getCompactionRequestedInstant(final String timestamp) {
return new HoodieInstant(State.REQUESTED, COMPACTION_ACTION, timestamp);
}
static HoodieInstant getCompactionInflightInstant(final String timestamp) {
return new HoodieInstant(State.INFLIGHT, COMPACTION_ACTION, timestamp);
}
static HoodieInstant getInflightInstant(final HoodieInstant instant) {
return new HoodieInstant(true, instant.getAction(), instant.getTimestamp());
}
static String makeCommitFileName(String commitTime) {
return commitTime + HoodieTimeline.COMMIT_EXTENSION;
return StringUtils.join(commitTime, HoodieTimeline.COMMIT_EXTENSION);
}
static String makeInflightCommitFileName(String commitTime) {
return commitTime + HoodieTimeline.INFLIGHT_COMMIT_EXTENSION;
return StringUtils.join(commitTime, HoodieTimeline.INFLIGHT_COMMIT_EXTENSION);
}
static String makeCleanerFileName(String instant) {
return instant + HoodieTimeline.CLEAN_EXTENSION;
return StringUtils.join(instant, HoodieTimeline.CLEAN_EXTENSION);
}
static String makeInflightCleanerFileName(String instant) {
return instant + HoodieTimeline.INFLIGHT_CLEAN_EXTENSION;
return StringUtils.join(instant, HoodieTimeline.INFLIGHT_CLEAN_EXTENSION);
}
static String makeRollbackFileName(String instant) {
return instant + HoodieTimeline.ROLLBACK_EXTENSION;
return StringUtils.join(instant, HoodieTimeline.ROLLBACK_EXTENSION);
}
static String makeInflightRollbackFileName(String instant) {
return instant + HoodieTimeline.INFLIGHT_ROLLBACK_EXTENSION;
return StringUtils.join(instant, HoodieTimeline.INFLIGHT_ROLLBACK_EXTENSION);
}
static String makeInflightSavePointFileName(String commitTime) {
return commitTime + HoodieTimeline.INFLIGHT_SAVEPOINT_EXTENSION;
return StringUtils.join(commitTime, HoodieTimeline.INFLIGHT_SAVEPOINT_EXTENSION);
}
static String makeSavePointFileName(String commitTime) {
return commitTime + HoodieTimeline.SAVEPOINT_EXTENSION;
return StringUtils.join(commitTime, HoodieTimeline.SAVEPOINT_EXTENSION);
}
static String makeInflightDeltaFileName(String commitTime) {
return commitTime + HoodieTimeline.INFLIGHT_DELTA_COMMIT_EXTENSION;
return StringUtils.join(commitTime, HoodieTimeline.INFLIGHT_DELTA_COMMIT_EXTENSION);
}
static String makeInflightCompactionFileName(String commitTime) {
return StringUtils.join(commitTime, HoodieTimeline.INFLIGHT_COMPACTION_EXTENSION);
}
static String makeRequestedCompactionFileName(String commitTime) {
return StringUtils.join(commitTime, HoodieTimeline.REQUESTED_COMPACTION_EXTENSION);
}
static String makeDeltaFileName(String commitTime) {
@@ -211,8 +259,6 @@ public interface HoodieTimeline extends Serializable {
}
static String makeFileNameAsInflight(String fileName) {
return fileName + HoodieTimeline.INFLIGHT_EXTENSION;
return StringUtils.join(fileName, HoodieTimeline.INFLIGHT_EXTENSION);
}
}

View File

@@ -73,6 +73,12 @@ public interface TableFileSystemView {
*/
Stream<FileSlice> getLatestFileSlices(String partitionPath);
/**
* Stream all the latest uncompacted file slices in the given partition
*/
Stream<FileSlice> getLatestUnCompactedFileSlices(String partitionPath);
/**
* Stream all the latest file slices in the given partition with precondition that
* commitTime(file) before maxCommitTime
@@ -80,6 +86,16 @@ public interface TableFileSystemView {
Stream<FileSlice> getLatestFileSlicesBeforeOrOn(String partitionPath,
String maxCommitTime);
/**
* Stream all "merged" file-slices before on an instant time
* If a file-group has a pending compaction request, the file-slice before and after compaction request instant
* is merged and returned.
* @param partitionPath Partition Path
* @param maxInstantTime Max Instant Time
* @return
*/
public Stream<FileSlice> getLatestMergedFileSlicesBeforeOrOn(String partitionPath, String maxInstantTime);
/**
* Stream all the latest file slices, in the given range
*/

View File

@@ -64,6 +64,7 @@ public abstract class AbstractHoodieLogRecordScanner {
// Reader schema for the records
private final Schema readerSchema;
// Latest valid instant time
// Log-Blocks belonging to inflight delta-instants are filtered-out using this high-watermark.
private final String latestInstantTime;
private final HoodieTableMetaClient hoodieTableMetaClient;
// Merge strategy to use when combining records from log

View File

@@ -95,7 +95,7 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
this(metaClient,
new String[] {COMMIT_EXTENSION, INFLIGHT_COMMIT_EXTENSION, DELTA_COMMIT_EXTENSION,
INFLIGHT_DELTA_COMMIT_EXTENSION, SAVEPOINT_EXTENSION, INFLIGHT_SAVEPOINT_EXTENSION,
CLEAN_EXTENSION, INFLIGHT_CLEAN_EXTENSION});
CLEAN_EXTENSION, INFLIGHT_CLEAN_EXTENSION, INFLIGHT_COMPACTION_EXTENSION, REQUESTED_COMPACTION_EXTENSION});
}
/**
@@ -118,19 +118,31 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
/**
* Get all instants (commits, delta commits) that produce new data, in the active timeline *
*
*/
public HoodieTimeline getCommitsTimeline() {
return getTimelineOfActions(
Sets.newHashSet(COMMIT_ACTION, DELTA_COMMIT_ACTION));
}
/**
* Get all instants (commits, delta commits, in-flight/request compaction) that produce new data, in the active
* timeline *
* With Async compaction a requested/inflight compaction-instant is a valid baseInstant for a file-slice as there
* could be delta-commits with that baseInstant.
*/
public HoodieTimeline getCommitsAndCompactionTimeline() {
return getTimelineOfActions(
Sets.newHashSet(COMMIT_ACTION, DELTA_COMMIT_ACTION, COMPACTION_ACTION));
}
/**
* Get all instants (commits, delta commits, clean, savepoint, rollback) that result in actions,
* in the active timeline *
*/
public HoodieTimeline getAllCommitsTimeline() {
return getTimelineOfActions(
Sets.newHashSet(COMMIT_ACTION, DELTA_COMMIT_ACTION, CLEAN_ACTION,
Sets.newHashSet(COMMIT_ACTION, DELTA_COMMIT_ACTION, CLEAN_ACTION, COMPACTION_ACTION,
SAVEPOINT_ACTION, ROLLBACK_ACTION));
}
@@ -200,7 +212,7 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
log.info("Marking instant complete " + instant);
Preconditions.checkArgument(instant.isInflight(),
"Could not mark an already completed instant as complete again " + instant);
moveInflightToComplete(instant, HoodieTimeline.getCompletedInstant(instant), data);
transitionState(instant, HoodieTimeline.getCompletedInstant(instant), data);
log.info("Completed " + instant);
}
@@ -211,7 +223,18 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
}
public void deleteInflight(HoodieInstant instant) {
log.info("Deleting in-flight " + instant);
Preconditions.checkArgument(instant.isInflight());
deleteInstantFile(instant);
}
public void deleteCompactionRequested(HoodieInstant instant) {
Preconditions.checkArgument(instant.isRequested());
Preconditions.checkArgument(instant.getAction() == HoodieTimeline.COMPACTION_ACTION);
deleteInstantFile(instant);
}
private void deleteInstantFile(HoodieInstant instant) {
log.info("Deleting instant " + instant);
Path inFlightCommitFilePath = new Path(metaClient.getMetaPath(), instant.getFileName());
try {
boolean result = metaClient.getFs().delete(inFlightCommitFilePath, false);
@@ -232,24 +255,43 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
return readDataFromPath(detailPath);
}
protected void moveInflightToComplete(HoodieInstant inflight, HoodieInstant completed,
public void revertFromInflightToRequested(HoodieInstant inflightInstant, HoodieInstant requestedInstant,
Optional<byte[]> data) {
Path commitFilePath = new Path(metaClient.getMetaPath(), completed.getFileName());
Preconditions.checkArgument(inflightInstant.getAction().equals(HoodieTimeline.COMPACTION_ACTION));
transitionState(inflightInstant, requestedInstant, data);
}
public void transitionFromRequestedToInflight(HoodieInstant requestedInstant, HoodieInstant inflightInstant,
Optional<byte[]> data) {
Preconditions.checkArgument(requestedInstant.getAction().equals(HoodieTimeline.COMPACTION_ACTION));
transitionState(requestedInstant, inflightInstant, data);
}
protected void moveInflightToComplete(HoodieInstant inflightInstant, HoodieInstant commitInstant,
Optional<byte[]> data) {
transitionState(inflightInstant, commitInstant, data);
}
private void transitionState(HoodieInstant fromInstant, HoodieInstant toInstant,
Optional<byte[]> data) {
Preconditions.checkArgument(fromInstant.getTimestamp().equals(toInstant.getTimestamp()));
Path commitFilePath = new Path(metaClient.getMetaPath(), toInstant.getFileName());
try {
// open a new file and write the commit metadata in
Path inflightCommitFile = new Path(metaClient.getMetaPath(), inflight.getFileName());
createFileInMetaPath(inflight.getFileName(), data);
Path inflightCommitFile = new Path(metaClient.getMetaPath(), fromInstant.getFileName());
createFileInMetaPath(fromInstant.getFileName(), data);
boolean success = metaClient.getFs().rename(inflightCommitFile, commitFilePath);
if (!success) {
throw new HoodieIOException(
"Could not rename " + inflightCommitFile + " to " + commitFilePath);
}
} catch (IOException e) {
throw new HoodieIOException("Could not complete " + inflight, e);
throw new HoodieIOException("Could not complete " + fromInstant, e);
}
}
protected void moveCompleteToInflight(HoodieInstant completed, HoodieInstant inflight) {
Preconditions.checkArgument(completed.getTimestamp().equals(inflight.getTimestamp()));
Path inFlightCommitFilePath = new Path(metaClient.getMetaPath(), inflight.getFileName());
try {
if (!metaClient.getFs().exists(inFlightCommitFilePath)) {
@@ -269,6 +311,11 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
createFileInMetaPath(instant.getFileName(), content);
}
public void saveToRequested(HoodieInstant instant, Optional<byte[]> content) {
Preconditions.checkArgument(instant.getAction().equals(HoodieTimeline.COMPACTION_ACTION));
createFileInMetaPath(instant.getFileName(), content);
}
protected void createFileInMetaPath(String filename, Optional<byte[]> content) {
Path fullPath = new Path(metaClient.getMetaPath(), filename);
try {

View File

@@ -53,15 +53,38 @@ public class HoodieDefaultTimeline implements HoodieTimeline {
public HoodieDefaultTimeline() {
}
@Override
public HoodieTimeline filterInflights() {
return new HoodieDefaultTimeline(instants.stream().filter(HoodieInstant::isInflight),
details);
}
@Override
public HoodieTimeline filterInflightsExcludingCompaction() {
return new HoodieDefaultTimeline(instants.stream().filter(instant -> {
return instant.isInflight() && (!instant.getAction().equals(HoodieTimeline.COMPACTION_ACTION));
}), details);
}
@Override
public HoodieTimeline filterCompletedInstants() {
return new HoodieDefaultTimeline(instants.stream().filter(s -> !s.isInflight()), details);
}
@Override
public HoodieTimeline filterCompletedAndCompactionInstants() {
return new HoodieDefaultTimeline(instants.stream().filter(s -> {
return !s.isInflight() || s.getAction().equals(HoodieTimeline.COMPACTION_ACTION);
}), details);
}
@Override
public HoodieTimeline filterPendingCompactionTimeline() {
return new HoodieDefaultTimeline(
instants.stream().filter(s -> s.getAction().equals(HoodieTimeline.COMPACTION_ACTION)),
details);
}
@Override
public HoodieDefaultTimeline findInstantsInRange(String startTs, String endTs) {
return new HoodieDefaultTimeline(instants.stream().filter(

View File

@@ -30,7 +30,19 @@ import org.apache.hadoop.fs.FileStatus;
*/
public class HoodieInstant implements Serializable {
private boolean isInflight = false;
/**
* Instant State
*/
public enum State {
// Requested State (valid state for Compaction)
REQUESTED,
// Inflight instant
INFLIGHT,
// Committed instant
COMPLETED
}
private State state = State.COMPLETED;
private String action;
private String timestamp;
@@ -49,21 +61,35 @@ public class HoodieInstant implements Serializable {
// This is to support backwards compatibility on how in-flight commit files were written
// General rule is inflight extension is .<action>.inflight, but for commit it is .inflight
action = "commit";
isInflight = true;
state = State.INFLIGHT;
} else if (action.contains(HoodieTimeline.INFLIGHT_EXTENSION)) {
isInflight = true;
state = State.INFLIGHT;
action = action.replace(HoodieTimeline.INFLIGHT_EXTENSION, "");
} else if (action.equals(HoodieTimeline.REQUESTED_COMPACTION_SUFFIX)) {
state = State.REQUESTED;
action = action.replace(HoodieTimeline.REQUESTED_EXTENSION, "");
}
}
public HoodieInstant(boolean isInflight, String action, String timestamp) {
this.isInflight = isInflight;
//TODO: vb - Preserving for avoiding cascading changes. This constructor will be updated in subsequent PR
this.state = isInflight ? State.INFLIGHT : State.COMPLETED;
this.action = action;
this.timestamp = timestamp;
}
public HoodieInstant(State state, String action, String timestamp) {
this.state = state;
this.action = action;
this.timestamp = timestamp;
}
public boolean isInflight() {
return isInflight;
return state == State.INFLIGHT;
}
public boolean isRequested() {
return state == State.REQUESTED;
}
public String getAction() {
@@ -79,20 +105,28 @@ public class HoodieInstant implements Serializable {
*/
public String getFileName() {
if (HoodieTimeline.COMMIT_ACTION.equals(action)) {
return isInflight ? HoodieTimeline.makeInflightCommitFileName(timestamp)
return isInflight() ? HoodieTimeline.makeInflightCommitFileName(timestamp)
: HoodieTimeline.makeCommitFileName(timestamp);
} else if (HoodieTimeline.CLEAN_ACTION.equals(action)) {
return isInflight ? HoodieTimeline.makeInflightCleanerFileName(timestamp)
return isInflight() ? HoodieTimeline.makeInflightCleanerFileName(timestamp)
: HoodieTimeline.makeCleanerFileName(timestamp);
} else if (HoodieTimeline.ROLLBACK_ACTION.equals(action)) {
return isInflight ? HoodieTimeline.makeInflightRollbackFileName(timestamp)
return isInflight() ? HoodieTimeline.makeInflightRollbackFileName(timestamp)
: HoodieTimeline.makeRollbackFileName(timestamp);
} else if (HoodieTimeline.SAVEPOINT_ACTION.equals(action)) {
return isInflight ? HoodieTimeline.makeInflightSavePointFileName(timestamp)
return isInflight() ? HoodieTimeline.makeInflightSavePointFileName(timestamp)
: HoodieTimeline.makeSavePointFileName(timestamp);
} else if (HoodieTimeline.DELTA_COMMIT_ACTION.equals(action)) {
return isInflight ? HoodieTimeline.makeInflightDeltaFileName(timestamp)
return isInflight() ? HoodieTimeline.makeInflightDeltaFileName(timestamp)
: HoodieTimeline.makeDeltaFileName(timestamp);
} else if (HoodieTimeline.COMPACTION_ACTION.equals(action)) {
if (isInflight()) {
return HoodieTimeline.makeInflightCompactionFileName(timestamp);
} else if (isRequested()) {
return HoodieTimeline.makeRequestedCompactionFileName(timestamp);
} else {
return HoodieTimeline.makeCommitFileName(timestamp);
}
}
throw new IllegalArgumentException("Cannot get file name for unknown action " + action);
}
@@ -106,18 +140,18 @@ public class HoodieInstant implements Serializable {
return false;
}
HoodieInstant that = (HoodieInstant) o;
return isInflight == that.isInflight
return state == that.state
&& Objects.equals(action, that.action)
&& Objects.equals(timestamp, that.timestamp);
}
@Override
public int hashCode() {
return Objects.hash(isInflight, action, timestamp);
return Objects.hash(state, action, timestamp);
}
@Override
public String toString() {
return "[" + ((isInflight) ? "==>" : "") + timestamp + "__" + action + "]";
return "[" + ((isInflight() || isRequested()) ? "==>" : "") + timestamp + "__" + action + "__" + state + "]";
}
}

View File

@@ -64,6 +64,11 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
// mapping from file id to the file group.
protected HashMap<String, HoodieFileGroup> fileGroupMap;
/**
* File Id to pending compaction instant time
*/
private final Map<String, String> fileIdToPendingCompactionInstantTime;
/**
* Create a file system view, as of the given timeline
*/
@@ -73,6 +78,8 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
this.visibleActiveTimeline = visibleActiveTimeline;
this.fileGroupMap = new HashMap<>();
this.partitionToFileGroupsMap = new HashMap<>();
//TODO: vb Will be implemented in next PR
this.fileIdToPendingCompactionInstantTime = new HashMap<>();
}
@@ -128,14 +135,19 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
List<HoodieFileGroup> fileGroups = new ArrayList<>();
fileIdSet.forEach(pair -> {
HoodieFileGroup group = new HoodieFileGroup(pair.getKey(), pair.getValue(),
visibleActiveTimeline);
String fileId = pair.getValue();
HoodieFileGroup group = new HoodieFileGroup(pair.getKey(), fileId, visibleActiveTimeline);
if (dataFiles.containsKey(pair)) {
dataFiles.get(pair).forEach(dataFile -> group.addDataFile(dataFile));
}
if (logFiles.containsKey(pair)) {
logFiles.get(pair).forEach(logFile -> group.addLogFile(logFile));
}
if (fileIdToPendingCompactionInstantTime.containsKey(fileId)) {
// If there is no delta-commit after compaction request, this step would ensure a new file-slice appears
// so that any new ingestion uses the correct base-instant
group.addNewFileSliceAtInstant(fileIdToPendingCompactionInstantTime.get(fileId));
}
fileGroups.add(group);
});
@@ -165,19 +177,37 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
return Arrays.stream(statuses).filter(rtFilePredicate).map(HoodieLogFile::new);
}
/**
* With async compaction, it is possible to see partial/complete data-files due to inflight-compactions, Ignore
* those data-files
*
* @param dataFile Data File
*/
private boolean isDataFileDueToPendingCompaction(HoodieDataFile dataFile) {
String compactionInstantTime = fileIdToPendingCompactionInstantTime.get(dataFile.getFileId());
if ((null != compactionInstantTime) && dataFile.getCommitTime().equals(compactionInstantTime)) {
return true;
}
return false;
}
@Override
public Stream<HoodieDataFile> getLatestDataFiles(final String partitionPath) {
return getAllFileGroups(partitionPath)
.map(fileGroup -> fileGroup.getLatestDataFile())
.filter(dataFileOpt -> dataFileOpt.isPresent())
.map(fileGroup -> {
return fileGroup.getAllDataFiles().filter(df -> !isDataFileDueToPendingCompaction(df)).findFirst();
})
.filter(Optional::isPresent)
.map(Optional::get);
}
@Override
public Stream<HoodieDataFile> getLatestDataFiles() {
return fileGroupMap.values().stream()
.map(fileGroup -> fileGroup.getLatestDataFile())
.filter(dataFileOpt -> dataFileOpt.isPresent())
.map(fileGroup -> {
return fileGroup.getAllDataFiles().filter(df -> !isDataFileDueToPendingCompaction(df)).findFirst();
})
.filter(Optional::isPresent)
.map(Optional::get);
}
@@ -185,16 +215,29 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
public Stream<HoodieDataFile> getLatestDataFilesBeforeOrOn(String partitionPath,
String maxCommitTime) {
return getAllFileGroups(partitionPath)
.map(fileGroup -> fileGroup.getLatestDataFileBeforeOrOn(maxCommitTime))
.filter(dataFileOpt -> dataFileOpt.isPresent())
.map(fileGroup -> {
return fileGroup.getAllDataFiles()
.filter(dataFile ->
HoodieTimeline.compareTimestamps(dataFile.getCommitTime(),
maxCommitTime,
HoodieTimeline.LESSER_OR_EQUAL))
.filter(df -> !isDataFileDueToPendingCompaction(df))
.findFirst();
})
.filter(Optional::isPresent)
.map(Optional::get);
}
@Override
public Stream<HoodieDataFile> getLatestDataFilesInRange(List<String> commitsToReturn) {
return fileGroupMap.values().stream()
.map(fileGroup -> fileGroup.getLatestDataFileInRange(commitsToReturn))
.filter(dataFileOpt -> dataFileOpt.isPresent())
.map(fileGroup -> {
return fileGroup.getAllDataFiles()
.filter(dataFile -> commitsToReturn.contains(dataFile.getCommitTime())
&& !isDataFileDueToPendingCompaction(dataFile))
.findFirst();
})
.filter(Optional::isPresent)
.map(Optional::get);
}
@@ -202,23 +245,125 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
public Stream<HoodieDataFile> getAllDataFiles(String partitionPath) {
return getAllFileGroups(partitionPath)
.map(fileGroup -> fileGroup.getAllDataFiles())
.flatMap(dataFileList -> dataFileList);
.flatMap(dataFileList -> dataFileList)
.filter(df -> !isDataFileDueToPendingCompaction(df));
}
@Override
public Stream<FileSlice> getLatestFileSlices(String partitionPath) {
return getAllFileGroups(partitionPath)
.map(fileGroup -> fileGroup.getLatestFileSlice())
.filter(dataFileOpt -> dataFileOpt.isPresent())
.filter(Optional::isPresent)
.map(Optional::get)
.map(this::filterDataFileAfterPendingCompaction);
}
@Override
public Stream<FileSlice> getLatestUnCompactedFileSlices(String partitionPath) {
return getAllFileGroups(partitionPath)
.map(fileGroup -> {
FileSlice fileSlice = fileGroup.getLatestFileSlice().get();
// if the file-group is under compaction, pick the latest before compaction instant time.
if (isFileSliceAfterPendingCompaction(fileSlice)) {
String compactionInstantTime = fileIdToPendingCompactionInstantTime.get(fileSlice.getFileId());
return fileGroup.getLatestFileSliceBefore(compactionInstantTime);
}
return Optional.of(fileSlice);
})
.map(Optional::get);
}
/**
* Returns true if the file-group is under pending-compaction and the file-slice' baseInstant matches
* compaction Instant
* @param fileSlice File Slice
* @return
*/
private boolean isFileSliceAfterPendingCompaction(FileSlice fileSlice) {
String compactionInstantTime = fileIdToPendingCompactionInstantTime.get(fileSlice.getFileId());
if ((null != compactionInstantTime) && fileSlice.getBaseInstantTime().equals(compactionInstantTime)) {
return true;
}
return false;
}
/**
* With async compaction, it is possible to see partial/complete data-files due to inflight-compactions,
* Ignore those data-files
* @param fileSlice File Slice
* @return
*/
private FileSlice filterDataFileAfterPendingCompaction(FileSlice fileSlice) {
if (isFileSliceAfterPendingCompaction(fileSlice)) {
// Data file is filtered out of the file-slice as the corresponding compaction
// instant not completed yet.
FileSlice transformed = new FileSlice(fileSlice.getBaseInstantTime(), fileSlice.getFileId());
fileSlice.getLogFiles().forEach(lf -> transformed.addLogFile(lf));
return transformed;
}
return fileSlice;
}
@Override
public Stream<FileSlice> getLatestFileSlicesBeforeOrOn(String partitionPath,
String maxCommitTime) {
return getAllFileGroups(partitionPath)
.map(fileGroup -> fileGroup.getLatestFileSliceBeforeOrOn(maxCommitTime))
.filter(dataFileOpt -> dataFileOpt.isPresent())
.filter(Optional::isPresent)
.map(Optional::get)
.map(this::filterDataFileAfterPendingCompaction);
}
/**
* Helper to merge last 2 file-slices. These 2 file-slices do not have compaction done yet.
*
* @param lastSlice Latest File slice for a file-group
* @param penultimateSlice Penultimate file slice for a file-group in commit timeline order
*/
private static FileSlice mergeCompactionPendingFileSlices(FileSlice lastSlice, FileSlice penultimateSlice) {
FileSlice merged = new FileSlice(penultimateSlice.getBaseInstantTime(), penultimateSlice.getFileId());
if (penultimateSlice.getDataFile().isPresent()) {
merged.setDataFile(penultimateSlice.getDataFile().get());
}
// Add Log files from penultimate and last slices
penultimateSlice.getLogFiles().forEach(lf -> merged.addLogFile(lf));
lastSlice.getLogFiles().forEach(lf -> merged.addLogFile(lf));
return merged;
}
/**
* If the file-slice is because of pending compaction instant, this method merges the file-slice with the one before
* the compaction instant time
* @param fileGroup File Group for which the file slice belongs to
* @param fileSlice File Slice which needs to be merged
* @return
*/
private FileSlice getMergedFileSlice(HoodieFileGroup fileGroup, FileSlice fileSlice) {
// if the file-group is under construction, pick the latest before compaction instant time.
if (fileIdToPendingCompactionInstantTime.containsKey(fileSlice.getFileId())) {
String compactionInstantTime = fileIdToPendingCompactionInstantTime.get(fileSlice.getFileId());
if (fileSlice.getBaseInstantTime().equals(compactionInstantTime)) {
Optional<FileSlice> prevFileSlice = fileGroup.getLatestFileSliceBefore(compactionInstantTime);
if (prevFileSlice.isPresent()) {
return mergeCompactionPendingFileSlices(fileSlice, prevFileSlice.get());
}
}
}
return fileSlice;
}
@Override
public Stream<FileSlice> getLatestMergedFileSlicesBeforeOrOn(String partitionPath, String maxInstantTime) {
return getAllFileGroups(partitionPath)
.map(fileGroup -> {
Optional<FileSlice> fileSlice = fileGroup.getLatestFileSliceBeforeOrOn(maxInstantTime);
// if the file-group is under construction, pick the latest before compaction instant time.
if (fileSlice.isPresent()) {
fileSlice = Optional.of(getMergedFileSlice(fileGroup, fileSlice.get()));
}
return fileSlice;
})
.filter(Optional::isPresent)
.map(Optional::get);
}
@@ -226,7 +371,6 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
public Stream<FileSlice> getLatestFileSliceInRange(List<String> commitsToReturn) {
return fileGroupMap.values().stream()
.map(fileGroup -> fileGroup.getLatestFileSliceInRange(commitsToReturn))
.filter(dataFileOpt -> dataFileOpt.isPresent())
.map(Optional::get);
}
@@ -260,4 +404,15 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
"Failed to list data files in partition " + partitionPathStr, e);
}
}
/**
* Used by tests to add pending compaction entries TODO: This method is temporary and should go away in subsequent
* Async Compaction PR
*
* @param fileId File Id
* @param compactionInstantTime Compaction Instant Time
*/
protected void addPendingCompactionFileId(String fileId, String compactionInstantTime) {
fileIdToPendingCompactionInstantTime.put(fileId, compactionInstantTime);
}
}