[HUDI-3355] Issue with out of order commits in the timeline when ingestion writers using SparkAllowUpdateStrategy (#4962)
This commit is contained in:
@@ -87,9 +87,10 @@ import java.nio.charset.StandardCharsets;
|
||||
import java.text.ParseException;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
@@ -124,6 +125,7 @@ public abstract class BaseHoodieWriteClient<T extends HoodieRecordPayload, I, K,
|
||||
protected transient AsyncArchiveService asyncArchiveService;
|
||||
protected final TransactionManager txnManager;
|
||||
protected Option<Pair<HoodieInstant, Map<String, String>>> lastCompletedTxnAndMetadata = Option.empty();
|
||||
protected Set<String> pendingInflightAndRequestedInstants;
|
||||
|
||||
/**
|
||||
* Create a write client, with new hudi index.
|
||||
@@ -440,6 +442,8 @@ public abstract class BaseHoodieWriteClient<T extends HoodieRecordPayload, I, K,
|
||||
HoodieTableMetaClient metaClient) {
|
||||
setOperationType(writeOperationType);
|
||||
this.lastCompletedTxnAndMetadata = TransactionUtils.getLastCompletedTxnInstantAndMetadata(metaClient);
|
||||
this.pendingInflightAndRequestedInstants = TransactionUtils.getInflightAndRequestedInstants(metaClient);
|
||||
this.pendingInflightAndRequestedInstants.remove(instantTime);
|
||||
if (null == this.asyncCleanerService) {
|
||||
this.asyncCleanerService = AsyncCleanerService.startAsyncCleaningIfEnabled(this);
|
||||
} else {
|
||||
|
||||
@@ -26,6 +26,7 @@ import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.common.util.CollectionUtils;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.HoodieIOException;
|
||||
@@ -36,7 +37,9 @@ import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
public class TransactionUtils {
|
||||
@@ -51,26 +54,8 @@ public class TransactionUtils {
|
||||
* @param thisCommitMetadata
|
||||
* @param config
|
||||
* @param lastCompletedTxnOwnerInstant
|
||||
* @return
|
||||
* @throws HoodieWriteConflictException
|
||||
*/
|
||||
public static Option<HoodieCommitMetadata> resolveWriteConflictIfAny(
|
||||
final HoodieTable table,
|
||||
final Option<HoodieInstant> currentTxnOwnerInstant,
|
||||
final Option<HoodieCommitMetadata> thisCommitMetadata,
|
||||
final HoodieWriteConfig config,
|
||||
Option<HoodieInstant> lastCompletedTxnOwnerInstant) throws HoodieWriteConflictException {
|
||||
return resolveWriteConflictIfAny(table, currentTxnOwnerInstant, thisCommitMetadata, config, lastCompletedTxnOwnerInstant, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve any write conflicts when committing data.
|
||||
* @param pendingInstants
|
||||
*
|
||||
* @param table
|
||||
* @param currentTxnOwnerInstant
|
||||
* @param thisCommitMetadata
|
||||
* @param config
|
||||
* @param lastCompletedTxnOwnerInstant
|
||||
* @return
|
||||
* @throws HoodieWriteConflictException
|
||||
*/
|
||||
@@ -80,11 +65,16 @@ public class TransactionUtils {
|
||||
final Option<HoodieCommitMetadata> thisCommitMetadata,
|
||||
final HoodieWriteConfig config,
|
||||
Option<HoodieInstant> lastCompletedTxnOwnerInstant,
|
||||
boolean reloadActiveTimeline) throws HoodieWriteConflictException {
|
||||
boolean reloadActiveTimeline,
|
||||
Set<String> pendingInstants) throws HoodieWriteConflictException {
|
||||
if (config.getWriteConcurrencyMode().supportsOptimisticConcurrencyControl()) {
|
||||
// deal with pendingInstants
|
||||
Stream<HoodieInstant> completedInstantsDuringCurrentWriteOperation = getCompletedInstantsDuringCurrentWriteOperation(table.getMetaClient(), pendingInstants);
|
||||
|
||||
ConflictResolutionStrategy resolutionStrategy = config.getWriteConflictResolutionStrategy();
|
||||
Stream<HoodieInstant> instantStream = resolutionStrategy.getCandidateInstants(reloadActiveTimeline
|
||||
? table.getMetaClient().reloadActiveTimeline() : table.getActiveTimeline(), currentTxnOwnerInstant.get(), lastCompletedTxnOwnerInstant);
|
||||
Stream<HoodieInstant> instantStream = Stream.concat(resolutionStrategy.getCandidateInstants(reloadActiveTimeline
|
||||
? table.getMetaClient().reloadActiveTimeline() : table.getActiveTimeline(), currentTxnOwnerInstant.get(), lastCompletedTxnOwnerInstant),
|
||||
completedInstantsDuringCurrentWriteOperation);
|
||||
final ConcurrentOperation thisOperation = new ConcurrentOperation(currentTxnOwnerInstant.get(), thisCommitMetadata.orElse(new HoodieCommitMetadata()));
|
||||
instantStream.forEach(instant -> {
|
||||
try {
|
||||
@@ -137,4 +127,35 @@ public class TransactionUtils {
|
||||
throw new HoodieIOException("Unable to read metadata for instant " + hoodieInstantOption.get(), io);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get InflightAndRequest instants.
|
||||
*
|
||||
* @param metaClient
|
||||
* @return
|
||||
*/
|
||||
public static Set<String> getInflightAndRequestedInstants(HoodieTableMetaClient metaClient) {
|
||||
// collect InflightAndRequest instants for deltaCommit/commit/compaction/clustering
|
||||
Set<String> timelineActions = CollectionUtils
|
||||
.createImmutableSet(HoodieTimeline.REPLACE_COMMIT_ACTION, HoodieTimeline.COMPACTION_ACTION, HoodieTimeline.DELTA_COMMIT_ACTION, HoodieTimeline.COMMIT_ACTION);
|
||||
return metaClient
|
||||
.getActiveTimeline()
|
||||
.getTimelineOfActions(timelineActions)
|
||||
.filterInflightsAndRequested()
|
||||
.getInstants()
|
||||
.map(HoodieInstant::getTimestamp)
|
||||
.collect(Collectors.toSet());
|
||||
}
|
||||
|
||||
public static Stream<HoodieInstant> getCompletedInstantsDuringCurrentWriteOperation(HoodieTableMetaClient metaClient, Set<String> pendingInstants) {
|
||||
// deal with pendingInstants
|
||||
// some pending instants maybe finished during current write operation,
|
||||
// we should check the conflict of those pending operation
|
||||
return metaClient
|
||||
.reloadActiveTimeline()
|
||||
.getCommitsTimeline()
|
||||
.filterCompletedInstants()
|
||||
.getInstants()
|
||||
.filter(f -> pendingInstants.contains(f.getTimestamp()));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -80,6 +80,7 @@ public abstract class BaseCommitActionExecutor<T extends HoodieRecordPayload, I,
|
||||
protected final TaskContextSupplier taskContextSupplier;
|
||||
protected final TransactionManager txnManager;
|
||||
protected Option<Pair<HoodieInstant, Map<String, String>>> lastCompletedTxn;
|
||||
protected Set<String> pendingInflightAndRequestedInstants;
|
||||
|
||||
public BaseCommitActionExecutor(HoodieEngineContext context, HoodieWriteConfig config,
|
||||
HoodieTable<T, I, K, O> table, String instantTime, WriteOperationType operationType,
|
||||
@@ -91,6 +92,8 @@ public abstract class BaseCommitActionExecutor<T extends HoodieRecordPayload, I,
|
||||
// TODO : Remove this once we refactor and move out autoCommit method from here, since the TxnManager is held in {@link BaseHoodieWriteClient}.
|
||||
this.txnManager = new TransactionManager(config, table.getMetaClient().getFs());
|
||||
this.lastCompletedTxn = TransactionUtils.getLastCompletedTxnInstantAndMetadata(table.getMetaClient());
|
||||
this.pendingInflightAndRequestedInstants = TransactionUtils.getInflightAndRequestedInstants(table.getMetaClient());
|
||||
this.pendingInflightAndRequestedInstants.remove(instantTime);
|
||||
if (table.getStorageLayout().doesNotSupport(operationType)) {
|
||||
throw new UnsupportedOperationException("Executor " + this.getClass().getSimpleName()
|
||||
+ " is not compatible with table layout " + table.getStorageLayout().getClass().getSimpleName());
|
||||
@@ -184,7 +187,7 @@ public abstract class BaseCommitActionExecutor<T extends HoodieRecordPayload, I,
|
||||
setCommitMetadata(result);
|
||||
// reload active timeline so as to get all updates after current transaction have started. hence setting last arg to true.
|
||||
TransactionUtils.resolveWriteConflictIfAny(table, this.txnManager.getCurrentTransactionOwner(),
|
||||
result.getCommitMetadata(), config, this.txnManager.getLastCompletedTransactionOwner(), true);
|
||||
result.getCommitMetadata(), config, this.txnManager.getLastCompletedTransactionOwner(), true, pendingInflightAndRequestedInstants);
|
||||
commit(extraMetadata, result);
|
||||
} finally {
|
||||
this.txnManager.endTransaction(inflightInstant);
|
||||
|
||||
Reference in New Issue
Block a user