1
0

[HUDI-1877] Support records staying in same fileId after clustering (#3833)

* [HUDI-1877] Support records staying in same fileId after clustering

Add plan strategy

* Ensure same filegroup id and refactor based on comments
This commit is contained in:
Sagar Sumit
2021-11-10 09:47:50 +05:30
committed by GitHub
parent dfe3b84715
commit bb6a19e7d7
14 changed files with 280 additions and 81 deletions

View File

@@ -29,6 +29,7 @@ import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.execution.JavaLazyInsertIterable;
import org.apache.hudi.execution.bulkinsert.JavaBulkInsertInternalPartitionerFactory;
import org.apache.hudi.io.CreateHandleFactory;
import org.apache.hudi.io.WriteHandleFactory;
import org.apache.hudi.table.BulkInsertPartitioner;
import org.apache.hudi.table.FileIdPrefixProvider;
import org.apache.hudi.table.HoodieTable;
@@ -76,7 +77,7 @@ public class JavaBulkInsertHelper<T extends HoodieRecordPayload, R> extends Abst
}
// write new files
List<WriteStatus> writeStatuses = bulkInsert(inputRecords, instantTime, table, config, performDedupe, userDefinedBulkInsertPartitioner, false, config.getBulkInsertShuffleParallelism(), false);
List<WriteStatus> writeStatuses = bulkInsert(inputRecords, instantTime, table, config, performDedupe, userDefinedBulkInsertPartitioner, false, config.getBulkInsertShuffleParallelism(), new CreateHandleFactory(false));
//update index
((BaseJavaCommitActionExecutor) executor).updateIndexAndCommitIfNeeded(writeStatuses, result);
return result;
@@ -91,7 +92,7 @@ public class JavaBulkInsertHelper<T extends HoodieRecordPayload, R> extends Abst
Option<BulkInsertPartitioner<T>> userDefinedBulkInsertPartitioner,
boolean useWriterSchema,
int parallelism,
boolean preserveHoodieMetadata) {
WriteHandleFactory writeHandleFactory) {
// De-dupe/merge if needed
List<HoodieRecord<T>> dedupedRecords = inputRecords;