1
0

[HUDI-1951] Add bucket hash index, compatible with the hive bucket (#3173)

* [HUDI-2154] Add index key field to HoodieKey

* [HUDI-2157] Add the bucket index and its read/write implemention of Spark engine.
* revert HUDI-2154 add index key field to HoodieKey
* fix all comments and introduce a new tricky way to get index key at runtime
support double insert for bucket index
* revert spark read optimizer based on bucket index
* add the storage layout
* index tag, hash function and add ut
* fix ut
* address partial comments
* Code review feedback
* add layout config and docs
* fix ut
* rename hoodie.layout and rebase master

Co-authored-by: Vinoth Chandar <vinoth@apache.org>
This commit is contained in:
Shawy Geng
2021-12-31 04:38:26 +08:00
committed by GitHub
parent 0f0088fe4b
commit a4e622ac61
46 changed files with 1335 additions and 47 deletions

View File

@@ -47,6 +47,6 @@ public class FlinkInsertCommitActionExecutor<T extends HoodieRecordPayload<T>> e
@Override
public HoodieWriteMetadata<List<WriteStatus>> execute() {
return FlinkWriteHelper.newInstance().write(instantTime, inputRecords, context, table,
config.shouldCombineBeforeInsert(), config.getInsertShuffleParallelism(), this, false);
config.shouldCombineBeforeInsert(), config.getInsertShuffleParallelism(), this, operationType);
}
}

View File

@@ -64,6 +64,6 @@ public class FlinkInsertOverwriteCommitActionExecutor<T extends HoodieRecordPayl
@Override
public HoodieWriteMetadata<List<WriteStatus>> execute() {
return FlinkWriteHelper.newInstance().write(instantTime, inputRecords, context, table,
config.shouldCombineBeforeInsert(), config.getInsertShuffleParallelism(), this, false);
config.shouldCombineBeforeInsert(), config.getInsertShuffleParallelism(), this, operationType);
}
}

View File

@@ -45,6 +45,6 @@ public class FlinkInsertOverwriteTableCommitActionExecutor<T extends HoodieRecor
@Override
public HoodieWriteMetadata<List<WriteStatus>> execute() {
return FlinkWriteHelper.newInstance().write(instantTime, inputRecords, context, table,
config.shouldCombineBeforeInsert(), config.getInsertShuffleParallelism(), this, false);
config.shouldCombineBeforeInsert(), config.getInsertShuffleParallelism(), this, operationType);
}
}

View File

@@ -47,6 +47,6 @@ public class FlinkUpsertCommitActionExecutor<T extends HoodieRecordPayload<T>> e
@Override
public HoodieWriteMetadata<List<WriteStatus>> execute() {
return FlinkWriteHelper.newInstance().write(instantTime, inputRecords, context, table,
config.shouldCombineBeforeUpsert(), config.getUpsertShuffleParallelism(), this, true);
config.shouldCombineBeforeUpsert(), config.getUpsertShuffleParallelism(), this, operationType);
}
}

View File

@@ -25,6 +25,7 @@ import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieOperation;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.exception.HoodieUpsertException;
import org.apache.hudi.index.HoodieIndex;
@@ -64,7 +65,7 @@ public class FlinkWriteHelper<T extends HoodieRecordPayload, R> extends Abstract
@Override
public HoodieWriteMetadata<List<WriteStatus>> write(String instantTime, List<HoodieRecord<T>> inputRecords, HoodieEngineContext context,
HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> table, boolean shouldCombine, int shuffleParallelism,
BaseCommitActionExecutor<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>, R> executor, boolean performTagging) {
BaseCommitActionExecutor<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>, R> executor, WriteOperationType operationType) {
try {
Instant lookupBegin = Instant.now();
Duration indexLookupDuration = Duration.between(lookupBegin, Instant.now());

View File

@@ -47,6 +47,6 @@ public class FlinkUpsertDeltaCommitActionExecutor<T extends HoodieRecordPayload<
@Override
public HoodieWriteMetadata execute() {
return FlinkWriteHelper.newInstance().write(instantTime, inputRecords, context, table,
config.shouldCombineBeforeUpsert(), config.getUpsertShuffleParallelism(), this, true);
config.shouldCombineBeforeUpsert(), config.getUpsertShuffleParallelism(), this, operationType);
}
}