1
0

[HUDI-1717] Metadata Reader should merge all the un-synced but complete instants from the dataset timeline. (#3082)

This commit is contained in:
Prashant Wason
2021-06-22 08:52:18 -07:00
committed by GitHub
parent 062d5baf84
commit 11e64b2db0
6 changed files with 114 additions and 22 deletions

View File

@@ -62,6 +62,7 @@ public abstract class BaseTableMetadata implements HoodieTableMetadata {
protected final HoodieMetadataConfig metadataConfig;
// Directory used for Spillable Map when merging records
protected final String spillableMapDirectory;
private String syncedInstantTime;
protected boolean enabled;
private TimelineMergedTableMetadata timelineMergedMetadata;
@@ -277,17 +278,44 @@ public abstract class BaseTableMetadata implements HoodieTableMetadata {
private void openTimelineScanner() {
if (timelineMergedMetadata == null) {
List<HoodieInstant> unSyncedInstants = findInstantsToSync();
List<HoodieInstant> unSyncedInstants = findInstantsToSyncForReader();
timelineMergedMetadata =
new TimelineMergedTableMetadata(datasetMetaClient, unSyncedInstants, getSyncedInstantTime(), null);
syncedInstantTime = unSyncedInstants.isEmpty() ? getLatestDatasetInstantTime()
: unSyncedInstants.get(unSyncedInstants.size() - 1).getTimestamp();
}
}
protected abstract List<HoodieInstant> findInstantsToSync();
/**
* Return the timestamp of the latest synced instant.
*/
@Override
public Option<String> getSyncedInstantTime() {
if (!enabled) {
return Option.empty();
}
return Option.ofNullable(syncedInstantTime);
}
/**
* Return the instants which are not-synced to the {@code HoodieTableMetadata}.
*
* This is the list of all completed but un-synched instants.
*/
protected abstract List<HoodieInstant> findInstantsToSyncForReader();
/**
* Return the instants which are not-synced to the {@code HoodieTableMetadataWriter}.
*
* This is the list of all completed but un-synched instants which do not have any incomplete instants in between them.
*/
protected abstract List<HoodieInstant> findInstantsToSyncForWriter();
@Override
public boolean isInSync() {
return enabled && findInstantsToSync().isEmpty();
return enabled && findInstantsToSyncForWriter().isEmpty();
}
protected HoodieEngineContext getEngineContext() {

View File

@@ -29,7 +29,6 @@ import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.table.timeline.HoodieDefaultTimeline;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
@@ -265,7 +264,22 @@ public class HoodieBackedTableMetadata extends BaseTableMetadata {
* Return an ordered list of instants which have not been synced to the Metadata Table.
*/
@Override
protected List<HoodieInstant> findInstantsToSync() {
protected List<HoodieInstant> findInstantsToSyncForReader() {
return findInstantsToSync(true);
}
/**
* Return an ordered list of instants which have not been synced to the Metadata Table.
*/
@Override
protected List<HoodieInstant> findInstantsToSyncForWriter() {
return findInstantsToSync(false);
}
/**
* Return an ordered list of instants which have not been synced to the Metadata Table.
*/
private List<HoodieInstant> findInstantsToSync(boolean ignoreIncompleteInstants) {
initIfNeeded();
// if there are no instants yet, return empty list, since there is nothing to sync here.
@@ -277,7 +291,8 @@ public class HoodieBackedTableMetadata extends BaseTableMetadata {
// are candidates for sync.
String latestMetadataInstantTime = metaClient.getActiveTimeline().filterCompletedInstants().lastInstant().get().getTimestamp();
HoodieDefaultTimeline candidateTimeline = datasetMetaClient.getActiveTimeline().findInstantsAfter(latestMetadataInstantTime, Integer.MAX_VALUE);
Option<HoodieInstant> earliestIncompleteInstant = candidateTimeline.filterInflightsAndRequested().firstInstant();
Option<HoodieInstant> earliestIncompleteInstant = ignoreIncompleteInstants ? Option.empty()
: candidateTimeline.filterInflightsAndRequested().firstInstant();
if (earliestIncompleteInstant.isPresent()) {
return candidateTimeline.filterCompletedInstants()
@@ -289,20 +304,6 @@ public class HoodieBackedTableMetadata extends BaseTableMetadata {
}
}
/**
* Return the timestamp of the latest compaction instant.
*/
@Override
public Option<String> getSyncedInstantTime() {
if (!enabled) {
return Option.empty();
}
HoodieActiveTimeline timeline = metaClient.reloadActiveTimeline();
return timeline.getDeltaCommitTimeline().filterCompletedInstants()
.lastInstant().map(HoodieInstant::getTimestamp);
}
public boolean enabled() {
return enabled;
}