[HUDI-1468] Support custom clustering strategies and preserve commit metadata as part of clustering (#3419)
Co-authored-by: Satish Kotha <satishkotha@uber.com>
This commit is contained in:
@@ -216,7 +216,7 @@ public abstract class BaseJavaCommitActionExecutor<T extends HoodieRecordPayload
|
||||
}
|
||||
}
|
||||
|
||||
protected Map<String, List<String>> getPartitionToReplacedFileIds(List<WriteStatus> writeStatuses) {
|
||||
protected Map<String, List<String>> getPartitionToReplacedFileIds(HoodieWriteMetadata<List<WriteStatus>> writeMetadata) {
|
||||
return Collections.emptyMap();
|
||||
}
|
||||
|
||||
@@ -330,7 +330,7 @@ public abstract class BaseJavaCommitActionExecutor<T extends HoodieRecordPayload
|
||||
List<WriteStatus> statuses = table.getIndex().updateLocation(writeStatuses, context, table);
|
||||
result.setIndexUpdateDuration(Duration.between(indexStartTime, Instant.now()));
|
||||
result.setWriteStatuses(statuses);
|
||||
result.setPartitionToReplaceFileIds(getPartitionToReplacedFileIds(statuses));
|
||||
result.setPartitionToReplaceFileIds(getPartitionToReplacedFileIds(result));
|
||||
commitOnAutoCommit(result);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -71,7 +71,7 @@ public class JavaBulkInsertHelper<T extends HoodieRecordPayload, R> extends Abst
|
||||
table.getMetaClient().getCommitActionType(), instantTime), Option.empty(),
|
||||
config.shouldAllowMultiWriteOnSameInstant());
|
||||
// write new files
|
||||
List<WriteStatus> writeStatuses = bulkInsert(inputRecords, instantTime, table, config, performDedupe, userDefinedBulkInsertPartitioner, false, config.getBulkInsertShuffleParallelism());
|
||||
List<WriteStatus> writeStatuses = bulkInsert(inputRecords, instantTime, table, config, performDedupe, userDefinedBulkInsertPartitioner, false, config.getBulkInsertShuffleParallelism(), false);
|
||||
//update index
|
||||
((BaseJavaCommitActionExecutor) executor).updateIndexAndCommitIfNeeded(writeStatuses, result);
|
||||
return result;
|
||||
@@ -85,7 +85,8 @@ public class JavaBulkInsertHelper<T extends HoodieRecordPayload, R> extends Abst
|
||||
boolean performDedupe,
|
||||
Option<BulkInsertPartitioner<T>> userDefinedBulkInsertPartitioner,
|
||||
boolean useWriterSchema,
|
||||
int parallelism) {
|
||||
int parallelism,
|
||||
boolean preserveHoodieMetadata) {
|
||||
|
||||
// De-dupe/merge if needed
|
||||
List<HoodieRecord<T>> dedupedRecords = inputRecords;
|
||||
|
||||
@@ -64,9 +64,9 @@ public class JavaInsertOverwriteCommitActionExecutor<T extends HoodieRecordPaylo
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Map<String, List<String>> getPartitionToReplacedFileIds(List<WriteStatus> writeStatuses) {
|
||||
protected Map<String, List<String>> getPartitionToReplacedFileIds(HoodieWriteMetadata<List<WriteStatus>> writeResult) {
|
||||
return context.mapToPair(
|
||||
writeStatuses.stream().map(status -> status.getStat().getPartitionPath()).distinct().collect(Collectors.toList()),
|
||||
writeResult.getWriteStatuses().stream().map(status -> status.getStat().getPartitionPath()).distinct().collect(Collectors.toList()),
|
||||
partitionPath ->
|
||||
Pair.of(partitionPath, getAllExistingFileIds(partitionPath)), 1
|
||||
);
|
||||
|
||||
@@ -27,6 +27,7 @@ import org.apache.hudi.common.model.WriteOperationType;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hudi.table.action.HoodieWriteMetadata;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
@@ -48,7 +49,7 @@ public class JavaInsertOverwriteTableCommitActionExecutor<T extends HoodieRecord
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Map<String, List<String>> getPartitionToReplacedFileIds(List<WriteStatus> writeStatuses) {
|
||||
protected Map<String, List<String>> getPartitionToReplacedFileIds(HoodieWriteMetadata<List<WriteStatus>> writeResult) {
|
||||
Map<String, List<String>> partitionToExistingFileIds = new HashMap<>();
|
||||
List<String> partitionPaths = FSUtils.getAllPartitionPaths(context,
|
||||
table.getMetaClient().getBasePath(), config.useFileListingMetadata(),
|
||||
|
||||
Reference in New Issue
Block a user