1
0

[HUDI-472] Introduce configurations and new modes of sorting for bulk_insert (#1149)

* [HUDI-472] Introduce the configuration and new modes of record sorting for bulk_insert(#1149). Three sorting modes are implemented: global sort ("global_sort"), local sort inside each RDD partition ("partition_sort") and no sort ("none")
This commit is contained in:
Y Ethan Guo
2020-07-31 06:52:42 -07:00
committed by GitHub
parent 2fc2b01d86
commit ccd70a7e48
23 changed files with 626 additions and 136 deletions

View File

@@ -23,7 +23,7 @@ import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.testutils.RawTripTestPayload;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.table.UserDefinedBulkInsertPartitioner;
import org.apache.hudi.table.BulkInsertPartitioner;
import org.apache.spark.api.java.JavaRDD;
@@ -60,12 +60,17 @@ public class DataSourceTestUtils {
}
public static class NoOpBulkInsertPartitioner<T extends HoodieRecordPayload>
implements UserDefinedBulkInsertPartitioner<T> {
implements BulkInsertPartitioner<T> {
@Override
public JavaRDD<HoodieRecord<T>> repartitionRecords(JavaRDD<HoodieRecord<T>> records, int outputSparkPartitions) {
return records;
}
@Override
public boolean arePartitionRecordsSorted() {
return false;
}
}
}