1
0

[HUDI-3701] Flink bulk_insert support bucket hash index (#5118)

This commit is contained in:
Danny Chan
2022-03-25 09:01:42 +08:00
committed by GitHub
parent 608d4bf32d
commit 5e86cdd1e9
12 changed files with 169 additions and 27 deletions

View File

@@ -906,8 +906,8 @@ public class ITTestHoodieDataSource extends AbstractTestBase {
}
@ParameterizedTest
@ValueSource(booleans = {true, false})
void testBulkInsert(boolean hiveStylePartitioning) {
@MethodSource("indexAndPartitioningParams")
void testBulkInsert(String indexType, boolean hiveStylePartitioning) {
TableEnvironment tableEnv = batchTableEnv;
// csv source
String csvSourceDDL = TestConfigurations.getCsvSourceDDL("csv_source", "test_source_5.data");
@@ -917,6 +917,7 @@ public class ITTestHoodieDataSource extends AbstractTestBase {
.option(FlinkOptions.PATH, tempFile.getAbsolutePath())
.option(FlinkOptions.OPERATION, "bulk_insert")
.option(FlinkOptions.WRITE_BULK_INSERT_SHUFFLE_BY_PARTITION, true)
.option(FlinkOptions.INDEX_TYPE, indexType)
.option(FlinkOptions.HIVE_STYLE_PARTITIONING, hiveStylePartitioning)
.end();
tableEnv.executeSql(hoodieTableDDL);
@@ -1262,6 +1263,19 @@ public class ITTestHoodieDataSource extends AbstractTestBase {
return Stream.of(data).map(Arguments::of);
}
/**
* Return test params => (index type, hive style partitioning).
*/
private static Stream<Arguments> indexAndPartitioningParams() {
Object[][] data =
new Object[][] {
{"FLINK_STATE", false},
{"FLINK_STATE", true},
{"BUCKET", false},
{"BUCKET", true}};
return Stream.of(data).map(Arguments::of);
}
private void execInsertSql(TableEnvironment tEnv, String insert) {
TableResult tableResult = tEnv.executeSql(insert);
// wait to finish