[HUDI-1951] Add bucket hash index, compatible with the hive bucket (#3173)
* [HUDI-2154] Add index key field to HoodieKey * [HUDI-2157] Add the bucket index and its read/write implemention of Spark engine. * revert HUDI-2154 add index key field to HoodieKey * fix all comments and introduce a new tricky way to get index key at runtime support double insert for bucket index * revert spark read optimizer based on bucket index * add the storage layout * index tag, hash function and add ut * fix ut * address partial comments * Code review feedback * add layout config and docs * fix ut * rename hoodie.layout and rebase master Co-authored-by: Vinoth Chandar <vinoth@apache.org>
This commit is contained in:
@@ -47,6 +47,6 @@ public class FlinkInsertCommitActionExecutor<T extends HoodieRecordPayload<T>> e
|
||||
@Override
|
||||
public HoodieWriteMetadata<List<WriteStatus>> execute() {
|
||||
return FlinkWriteHelper.newInstance().write(instantTime, inputRecords, context, table,
|
||||
config.shouldCombineBeforeInsert(), config.getInsertShuffleParallelism(), this, false);
|
||||
config.shouldCombineBeforeInsert(), config.getInsertShuffleParallelism(), this, operationType);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -64,6 +64,6 @@ public class FlinkInsertOverwriteCommitActionExecutor<T extends HoodieRecordPayl
|
||||
@Override
|
||||
public HoodieWriteMetadata<List<WriteStatus>> execute() {
|
||||
return FlinkWriteHelper.newInstance().write(instantTime, inputRecords, context, table,
|
||||
config.shouldCombineBeforeInsert(), config.getInsertShuffleParallelism(), this, false);
|
||||
config.shouldCombineBeforeInsert(), config.getInsertShuffleParallelism(), this, operationType);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -45,6 +45,6 @@ public class FlinkInsertOverwriteTableCommitActionExecutor<T extends HoodieRecor
|
||||
@Override
|
||||
public HoodieWriteMetadata<List<WriteStatus>> execute() {
|
||||
return FlinkWriteHelper.newInstance().write(instantTime, inputRecords, context, table,
|
||||
config.shouldCombineBeforeInsert(), config.getInsertShuffleParallelism(), this, false);
|
||||
config.shouldCombineBeforeInsert(), config.getInsertShuffleParallelism(), this, operationType);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -47,6 +47,6 @@ public class FlinkUpsertCommitActionExecutor<T extends HoodieRecordPayload<T>> e
|
||||
@Override
|
||||
public HoodieWriteMetadata<List<WriteStatus>> execute() {
|
||||
return FlinkWriteHelper.newInstance().write(instantTime, inputRecords, context, table,
|
||||
config.shouldCombineBeforeUpsert(), config.getUpsertShuffleParallelism(), this, true);
|
||||
config.shouldCombineBeforeUpsert(), config.getUpsertShuffleParallelism(), this, operationType);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -25,6 +25,7 @@ import org.apache.hudi.common.model.HoodieKey;
|
||||
import org.apache.hudi.common.model.HoodieOperation;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.model.HoodieRecordPayload;
|
||||
import org.apache.hudi.common.model.WriteOperationType;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
import org.apache.hudi.exception.HoodieUpsertException;
|
||||
import org.apache.hudi.index.HoodieIndex;
|
||||
@@ -64,7 +65,7 @@ public class FlinkWriteHelper<T extends HoodieRecordPayload, R> extends Abstract
|
||||
@Override
|
||||
public HoodieWriteMetadata<List<WriteStatus>> write(String instantTime, List<HoodieRecord<T>> inputRecords, HoodieEngineContext context,
|
||||
HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> table, boolean shouldCombine, int shuffleParallelism,
|
||||
BaseCommitActionExecutor<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>, R> executor, boolean performTagging) {
|
||||
BaseCommitActionExecutor<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>, R> executor, WriteOperationType operationType) {
|
||||
try {
|
||||
Instant lookupBegin = Instant.now();
|
||||
Duration indexLookupDuration = Duration.between(lookupBegin, Instant.now());
|
||||
|
||||
@@ -47,6 +47,6 @@ public class FlinkUpsertDeltaCommitActionExecutor<T extends HoodieRecordPayload<
|
||||
@Override
|
||||
public HoodieWriteMetadata execute() {
|
||||
return FlinkWriteHelper.newInstance().write(instantTime, inputRecords, context, table,
|
||||
config.shouldCombineBeforeUpsert(), config.getUpsertShuffleParallelism(), this, true);
|
||||
config.shouldCombineBeforeUpsert(), config.getUpsertShuffleParallelism(), this, operationType);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user