1
0

[HUDI-1234] Insert new records to data files without merging for "Insert" operation. (#2111)

* Added HoodieConcatHandle to skip merging for "insert" operation when the corresponding config is set

Co-authored-by: Sivabalan Narayanan <sivabala@uber.com>
This commit is contained in:
SteNicholas
2021-01-28 02:09:51 +08:00
committed by GitHub
parent a54550d94f
commit 2ee1c3fb0c
7 changed files with 266 additions and 26 deletions

View File

@@ -43,6 +43,7 @@ import org.apache.hudi.execution.SparkLazyInsertIterable;
import org.apache.hudi.io.CreateHandleFactory;
import org.apache.hudi.io.HoodieMergeHandle;
import org.apache.hudi.io.HoodieSortedMergeHandle;
import org.apache.hudi.io.storage.HoodieConcatHandle;
import org.apache.hudi.table.HoodieSparkTable;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.WorkloadProfile;
@@ -119,7 +120,7 @@ public abstract class BaseSparkCommitActionExecutor<T extends HoodieRecordPayloa
WorkloadProfile profile = null;
if (isWorkloadProfileNeeded()) {
context.setJobStatus(this.getClass().getSimpleName(), "Building workload profile");
profile = new WorkloadProfile(buildProfile(inputRecordsRDD));
profile = new WorkloadProfile(buildProfile(inputRecordsRDD), operationType);
LOG.info("Workload profile :" + profile);
saveWorkloadProfileMetadataToInflight(profile, instantTime);
}
@@ -320,6 +321,8 @@ public abstract class BaseSparkCommitActionExecutor<T extends HoodieRecordPayloa
protected HoodieMergeHandle getUpdateHandle(String partitionPath, String fileId, Iterator<HoodieRecord<T>> recordItr) {
if (table.requireSortedRecords()) {
return new HoodieSortedMergeHandle<>(config, instantTime, (HoodieSparkTable) table, recordItr, partitionPath, fileId, taskContextSupplier);
} else if (!WriteOperationType.isChangingRecords(operationType) && config.allowDuplicateInserts()) {
return new HoodieConcatHandle<>(config, instantTime, table, recordItr, partitionPath, fileId, taskContextSupplier);
} else {
return new HoodieMergeHandle<>(config, instantTime, table, recordItr, partitionPath, fileId, taskContextSupplier);
}

View File

@@ -190,7 +190,7 @@ public class UpsertPartitioner<T extends HoodieRecordPayload<T>> extends Partiti
for (SmallFile smallFile : smallFiles) {
long recordsToAppend = Math.min((config.getParquetMaxFileSize() - smallFile.sizeBytes) / averageRecordSize,
totalUnassignedInserts);
if (recordsToAppend > 0) {
if (recordsToAppend > 0 && totalUnassignedInserts > 0) {
// create a new bucket or re-use an existing bucket
int bucket;
if (updateLocationToBucket.containsKey(smallFile.location.getFileId())) {