[HUDI-2345] Hoodie columns sort partitioner for bulk insert (#3523)
Co-authored-by: yuezhang <yuezhang@freewheel.tv>
This commit is contained in:
@@ -158,11 +158,18 @@ public class HoodieWriteConfig extends HoodieConfig {
|
||||
.withDocumentation("For large initial imports using bulk_insert operation, controls the parallelism to use for sort modes or custom partitioning done"
|
||||
+ "before writing records to the table.");
|
||||
|
||||
public static final ConfigProperty<String> BULKINSERT_USER_DEFINED_PARTITIONER_SORT_COLUMNS = ConfigProperty
|
||||
.key("hoodie.bulkinsert.user.defined.partitioner.sort.columns")
|
||||
.noDefaultValue()
|
||||
.withDocumentation("Columns to sort the data by when use org.apache.hudi.execution.bulkinsert.RDDCustomColumnsSortPartitioner as user defined partitioner during bulk_insert. "
|
||||
+ "For example 'column1,column2'");
|
||||
|
||||
public static final ConfigProperty<String> BULKINSERT_USER_DEFINED_PARTITIONER_CLASS_NAME = ConfigProperty
|
||||
.key("hoodie.bulkinsert.user.defined.partitioner.class")
|
||||
.noDefaultValue()
|
||||
.withDocumentation("If specified, this class will be used to re-partition records before they are bulk inserted. This can be used to sort, pack, cluster data"
|
||||
+ " optimally for common query patterns.");
|
||||
+ " optimally for common query patterns. For now we support a build-in user defined bulkinsert partitioner org.apache.hudi.execution.bulkinsert.RDDCustomColumnsSortPartitioner"
|
||||
+ " which can does sorting based on specified column values set by " + BULKINSERT_USER_DEFINED_PARTITIONER_SORT_COLUMNS.key());
|
||||
|
||||
public static final ConfigProperty<String> UPSERT_PARALLELISM_VALUE = ConfigProperty
|
||||
.key("hoodie.upsert.shuffle.parallelism")
|
||||
@@ -894,6 +901,10 @@ public class HoodieWriteConfig extends HoodieConfig {
|
||||
return getString(BULKINSERT_USER_DEFINED_PARTITIONER_CLASS_NAME);
|
||||
}
|
||||
|
||||
public String getUserDefinedBulkInsertPartitionerSortColumns() {
|
||||
return getString(BULKINSERT_USER_DEFINED_PARTITIONER_SORT_COLUMNS);
|
||||
}
|
||||
|
||||
public int getInsertShuffleParallelism() {
|
||||
return getInt(INSERT_PARALLELISM_VALUE);
|
||||
}
|
||||
@@ -1832,6 +1843,11 @@ public class HoodieWriteConfig extends HoodieConfig {
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withUserDefinedBulkInsertPartitionerSortColumns(String columns) {
|
||||
writeConfig.setValue(BULKINSERT_USER_DEFINED_PARTITIONER_SORT_COLUMNS, columns);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withDeleteParallelism(int parallelism) {
|
||||
writeConfig.setValue(DELETE_PARALLELISM_VALUE, String.valueOf(parallelism));
|
||||
return this;
|
||||
|
||||
Reference in New Issue
Block a user