[HUDI-1234] Insert new records to data files without merging for "Insert" operation. (#2111)
* Added HoodieConcatHandle to skip merging for "insert" operation when the corresponding config is set Co-authored-by: Sivabalan Narayanan <sivabala@uber.com>
This commit is contained in:
@@ -43,6 +43,7 @@ import org.apache.hudi.execution.SparkLazyInsertIterable;
|
||||
import org.apache.hudi.io.CreateHandleFactory;
|
||||
import org.apache.hudi.io.HoodieMergeHandle;
|
||||
import org.apache.hudi.io.HoodieSortedMergeHandle;
|
||||
import org.apache.hudi.io.storage.HoodieConcatHandle;
|
||||
import org.apache.hudi.table.HoodieSparkTable;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hudi.table.WorkloadProfile;
|
||||
@@ -119,7 +120,7 @@ public abstract class BaseSparkCommitActionExecutor<T extends HoodieRecordPayloa
|
||||
WorkloadProfile profile = null;
|
||||
if (isWorkloadProfileNeeded()) {
|
||||
context.setJobStatus(this.getClass().getSimpleName(), "Building workload profile");
|
||||
profile = new WorkloadProfile(buildProfile(inputRecordsRDD));
|
||||
profile = new WorkloadProfile(buildProfile(inputRecordsRDD), operationType);
|
||||
LOG.info("Workload profile :" + profile);
|
||||
saveWorkloadProfileMetadataToInflight(profile, instantTime);
|
||||
}
|
||||
@@ -320,6 +321,8 @@ public abstract class BaseSparkCommitActionExecutor<T extends HoodieRecordPayloa
|
||||
protected HoodieMergeHandle getUpdateHandle(String partitionPath, String fileId, Iterator<HoodieRecord<T>> recordItr) {
|
||||
if (table.requireSortedRecords()) {
|
||||
return new HoodieSortedMergeHandle<>(config, instantTime, (HoodieSparkTable) table, recordItr, partitionPath, fileId, taskContextSupplier);
|
||||
} else if (!WriteOperationType.isChangingRecords(operationType) && config.allowDuplicateInserts()) {
|
||||
return new HoodieConcatHandle<>(config, instantTime, table, recordItr, partitionPath, fileId, taskContextSupplier);
|
||||
} else {
|
||||
return new HoodieMergeHandle<>(config, instantTime, table, recordItr, partitionPath, fileId, taskContextSupplier);
|
||||
}
|
||||
|
||||
@@ -190,7 +190,7 @@ public class UpsertPartitioner<T extends HoodieRecordPayload<T>> extends Partiti
|
||||
for (SmallFile smallFile : smallFiles) {
|
||||
long recordsToAppend = Math.min((config.getParquetMaxFileSize() - smallFile.sizeBytes) / averageRecordSize,
|
||||
totalUnassignedInserts);
|
||||
if (recordsToAppend > 0) {
|
||||
if (recordsToAppend > 0 && totalUnassignedInserts > 0) {
|
||||
// create a new bucket or re-use an existing bucket
|
||||
int bucket;
|
||||
if (updateLocationToBucket.containsKey(smallFile.location.getFileId())) {
|
||||
|
||||
Reference in New Issue
Block a user