[HUDI-2316] Support Flink batch upsert (#3494)
This commit is contained in:
@@ -292,7 +292,7 @@ public class HoodieDataSourceITCase extends AbstractTestBase {
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@MethodSource("configParams")
|
||||
@MethodSource("executionModeAndPartitioningParams")
|
||||
void testWriteAndRead(ExecMode execMode, boolean hiveStylePartitioning) {
|
||||
TableEnvironment tableEnv = execMode == ExecMode.BATCH ? batchTableEnv : streamTableEnv;
|
||||
Map<String, String> options = new HashMap<>();
|
||||
@@ -317,6 +317,56 @@ public class HoodieDataSourceITCase extends AbstractTestBase {
|
||||
+ "+I[id8, Han, 56, 1970-01-01T00:00:08, par4]]");
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@EnumSource(value = HoodieTableType.class)
|
||||
void testBatchModeUpsertWithoutPartition(HoodieTableType tableType) {
|
||||
TableEnvironment tableEnv = batchTableEnv;
|
||||
Map<String, String> options = new HashMap<>();
|
||||
options.put(FlinkOptions.PATH.key(), tempFile.getAbsolutePath());
|
||||
options.put(FlinkOptions.TABLE_NAME.key(), tableType.name());
|
||||
String hoodieTableDDL = TestConfigurations.getCreateHoodieTableDDL("t1", options, false);
|
||||
tableEnv.executeSql(hoodieTableDDL);
|
||||
|
||||
execInsertSql(tableEnv, TestSQL.INSERT_T1);
|
||||
|
||||
List<Row> result1 = CollectionUtil.iterableToList(
|
||||
() -> tableEnv.sqlQuery("select * from t1").execute().collect());
|
||||
assertRowsEquals(result1, TestData.DATA_SET_SOURCE_INSERT);
|
||||
|
||||
// batchMode update
|
||||
execInsertSql(tableEnv, TestSQL.UPDATE_INSERT_T1);
|
||||
List<Row> result2 = CollectionUtil.iterableToList(
|
||||
() -> tableEnv.sqlQuery("select * from t1").execute().collect());
|
||||
assertRowsEquals(result2, TestData.DATA_SET_SOURCE_MERGED);
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@MethodSource("tableTypeAndPartitioningParams")
|
||||
void testBatchModeUpsert(HoodieTableType tableType, boolean hiveStylePartitioning) {
|
||||
TableEnvironment tableEnv = batchTableEnv;
|
||||
Map<String, String> options = new HashMap<>();
|
||||
options.put(FlinkOptions.PATH.key(), tempFile.getAbsolutePath());
|
||||
options.put(FlinkOptions.TABLE_NAME.key(), tableType.name());
|
||||
if (hiveStylePartitioning) {
|
||||
options.put(FlinkOptions.HIVE_STYLE_PARTITIONING.key(), "true");
|
||||
}
|
||||
String hoodieTableDDL = TestConfigurations.getCreateHoodieTableDDL("t1", options);
|
||||
tableEnv.executeSql(hoodieTableDDL);
|
||||
|
||||
execInsertSql(tableEnv, TestSQL.INSERT_T1);
|
||||
|
||||
List<Row> result1 = CollectionUtil.iterableToList(
|
||||
() -> tableEnv.sqlQuery("select * from t1").execute().collect());
|
||||
assertRowsEquals(result1, TestData.DATA_SET_SOURCE_INSERT);
|
||||
|
||||
// batchMode update
|
||||
execInsertSql(tableEnv, TestSQL.UPDATE_INSERT_T1);
|
||||
|
||||
List<Row> result2 = CollectionUtil.iterableToList(
|
||||
() -> tableEnv.sqlQuery("select * from t1").execute().collect());
|
||||
assertRowsEquals(result2, TestData.DATA_SET_SOURCE_MERGED);
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@EnumSource(value = ExecMode.class)
|
||||
void testWriteAndReadParMiddle(ExecMode execMode) throws Exception {
|
||||
@@ -436,18 +486,9 @@ public class HoodieDataSourceITCase extends AbstractTestBase {
|
||||
@EnumSource(value = ExecMode.class)
|
||||
void testWriteNonPartitionedTable(ExecMode execMode) {
|
||||
TableEnvironment tableEnv = execMode == ExecMode.BATCH ? batchTableEnv : streamTableEnv;
|
||||
String hoodieTableDDL = "create table t1(\n"
|
||||
+ " uuid varchar(20),\n"
|
||||
+ " name varchar(10),\n"
|
||||
+ " age int,\n"
|
||||
+ " ts timestamp(3),\n"
|
||||
+ " `partition` varchar(20),\n"
|
||||
+ " PRIMARY KEY(uuid) NOT ENFORCED\n"
|
||||
+ ")\n"
|
||||
+ "with (\n"
|
||||
+ " 'connector' = 'hudi',\n"
|
||||
+ " 'path' = '" + tempFile.getAbsolutePath() + "'\n"
|
||||
+ ")";
|
||||
Map<String, String> options = new HashMap<>();
|
||||
options.put(FlinkOptions.PATH.key(), tempFile.getAbsolutePath());
|
||||
String hoodieTableDDL = TestConfigurations.getCreateHoodieTableDDL("t1", options, false);
|
||||
tableEnv.executeSql(hoodieTableDDL);
|
||||
|
||||
final String insertInto1 = "insert into t1 values\n"
|
||||
@@ -627,19 +668,10 @@ public class HoodieDataSourceITCase extends AbstractTestBase {
|
||||
@Test
|
||||
void testBulkInsertNonPartitionedTable() {
|
||||
TableEnvironment tableEnv = batchTableEnv;
|
||||
String hoodieTableDDL = "create table t1(\n"
|
||||
+ " uuid varchar(20),\n"
|
||||
+ " name varchar(10),\n"
|
||||
+ " age int,\n"
|
||||
+ " ts timestamp(3),\n"
|
||||
+ " `partition` varchar(20),\n"
|
||||
+ " PRIMARY KEY(uuid) NOT ENFORCED\n"
|
||||
+ ")\n"
|
||||
+ "with (\n"
|
||||
+ " 'connector' = 'hudi',\n"
|
||||
+ " 'path' = '" + tempFile.getAbsolutePath() + "',\n"
|
||||
+ " 'write.operation' = 'bulk_insert'\n"
|
||||
+ ")";
|
||||
Map<String, String> options = new HashMap<>();
|
||||
options.put(FlinkOptions.PATH.key(), tempFile.getAbsolutePath());
|
||||
options.put(FlinkOptions.OPERATION.key(), "bulk_insert");
|
||||
String hoodieTableDDL = TestConfigurations.getCreateHoodieTableDDL("t1", options, false);
|
||||
tableEnv.executeSql(hoodieTableDDL);
|
||||
|
||||
final String insertInto1 = "insert into t1 values\n"
|
||||
@@ -675,7 +707,7 @@ public class HoodieDataSourceITCase extends AbstractTestBase {
|
||||
/**
|
||||
* Return test params => (execution mode, hive style partitioning).
|
||||
*/
|
||||
private static Stream<Arguments> configParams() {
|
||||
private static Stream<Arguments> executionModeAndPartitioningParams() {
|
||||
Object[][] data =
|
||||
new Object[][] {
|
||||
{ExecMode.BATCH, false},
|
||||
@@ -685,6 +717,19 @@ public class HoodieDataSourceITCase extends AbstractTestBase {
|
||||
return Stream.of(data).map(Arguments::of);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return test params => (HoodieTableType, hive style partitioning).
|
||||
*/
|
||||
private static Stream<Arguments> tableTypeAndPartitioningParams() {
|
||||
Object[][] data =
|
||||
new Object[][] {
|
||||
{HoodieTableType.COPY_ON_WRITE, false},
|
||||
{HoodieTableType.COPY_ON_WRITE, true},
|
||||
{HoodieTableType.MERGE_ON_READ, false},
|
||||
{HoodieTableType.MERGE_ON_READ, true}};
|
||||
return Stream.of(data).map(Arguments::of);
|
||||
}
|
||||
|
||||
private void execInsertSql(TableEnvironment tEnv, String insert) {
|
||||
TableResult tableResult = tEnv.executeSql(insert);
|
||||
// wait to finish
|
||||
|
||||
@@ -56,18 +56,24 @@ public class TestConfigurations {
|
||||
.build();
|
||||
|
||||
public static String getCreateHoodieTableDDL(String tableName, Map<String, String> options) {
|
||||
String createTable = "create table " + tableName + "(\n"
|
||||
return getCreateHoodieTableDDL(tableName, options, true);
|
||||
}
|
||||
|
||||
public static String getCreateHoodieTableDDL(String tableName, Map<String, String> options, boolean havePartition) {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
builder.append("create table " + tableName + "(\n"
|
||||
+ " uuid varchar(20),\n"
|
||||
+ " name varchar(10),\n"
|
||||
+ " age int,\n"
|
||||
+ " ts timestamp(3),\n"
|
||||
+ " `partition` varchar(20),\n"
|
||||
+ " PRIMARY KEY(uuid) NOT ENFORCED\n"
|
||||
+ ")\n"
|
||||
+ "PARTITIONED BY (`partition`)\n"
|
||||
+ "with (\n"
|
||||
+ " 'connector' = 'hudi'";
|
||||
StringBuilder builder = new StringBuilder(createTable);
|
||||
+ ")\n");
|
||||
if (havePartition) {
|
||||
builder.append("PARTITIONED BY (`partition`)\n");
|
||||
}
|
||||
builder.append("with (\n"
|
||||
+ " 'connector' = 'hudi'");
|
||||
options.forEach((k, v) -> builder.append(",\n")
|
||||
.append(" '").append(k).append("' = '").append(v).append("'"));
|
||||
builder.append("\n)");
|
||||
|
||||
@@ -33,4 +33,14 @@ public class TestSQL {
|
||||
+ "('id6','Emma',20,TIMESTAMP '1970-01-01 00:00:06','par3'),\n"
|
||||
+ "('id7','Bob',44,TIMESTAMP '1970-01-01 00:00:07','par4'),\n"
|
||||
+ "('id8','Han',56,TIMESTAMP '1970-01-01 00:00:08','par4')";
|
||||
|
||||
public static final String UPDATE_INSERT_T1 = "insert into t1 values\n"
|
||||
+ "('id1','Danny',24,TIMESTAMP '1970-01-01 00:00:01','par1'),\n"
|
||||
+ "('id2','Stephen',34,TIMESTAMP '1970-01-01 00:00:02','par1'),\n"
|
||||
+ "('id3','Julian',54,TIMESTAMP '1970-01-01 00:00:03','par2'),\n"
|
||||
+ "('id4','Fabian',32,TIMESTAMP '1970-01-01 00:00:04','par2'),\n"
|
||||
+ "('id5','Sophia',18,TIMESTAMP '1970-01-01 00:00:05','par3'),\n"
|
||||
+ "('id9','Jane',19,TIMESTAMP '1970-01-01 00:00:06','par3'),\n"
|
||||
+ "('id10','Ella',38,TIMESTAMP '1970-01-01 00:00:07','par4'),\n"
|
||||
+ "('id11','Phoebe',52,TIMESTAMP '1970-01-01 00:00:08','par4')";
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user