[HUDI-1902] Global index for flink writer (#2958)
Supports deduplication for record keys with different partition path.
This commit is contained in:
@@ -392,6 +392,56 @@ public class HoodieDataSourceITCase extends AbstractTestBase {
|
||||
assertRowsEquals(result, "[id1,Sophia,18,1970-01-01T00:00:05,par5]");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testWriteGlobalIndex() {
|
||||
// the source generates 4 commits
|
||||
String createSource = TestConfigurations.getFileSourceDDL(
|
||||
"source", "test_source_4.data", 4);
|
||||
streamTableEnv.executeSql(createSource);
|
||||
|
||||
Map<String, String> options = new HashMap<>();
|
||||
options.put(FlinkOptions.PATH.key(), tempFile.getAbsolutePath());
|
||||
options.put(FlinkOptions.INSERT_DROP_DUPS.key(), "true");
|
||||
String hoodieTableDDL = TestConfigurations.getCreateHoodieTableDDL("t1", options);
|
||||
streamTableEnv.executeSql(hoodieTableDDL);
|
||||
|
||||
final String insertInto2 = "insert into t1 select * from source";
|
||||
|
||||
execInsertSql(streamTableEnv, insertInto2);
|
||||
|
||||
List<Row> result = CollectionUtil.iterableToList(
|
||||
() -> streamTableEnv.sqlQuery("select * from t1").execute().collect());
|
||||
assertRowsEquals(result, "[id1,Phoebe,52,1970-01-01T00:00:08,par4]");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testWriteLocalIndex() {
|
||||
// the source generates 4 commits
|
||||
String createSource = TestConfigurations.getFileSourceDDL(
|
||||
"source", "test_source_4.data", 4);
|
||||
streamTableEnv.executeSql(createSource);
|
||||
|
||||
Map<String, String> options = new HashMap<>();
|
||||
options.put(FlinkOptions.PATH.key(), tempFile.getAbsolutePath());
|
||||
options.put(FlinkOptions.INDEX_GLOBAL_ENABLED.key(), "false");
|
||||
options.put(FlinkOptions.INSERT_DROP_DUPS.key(), "true");
|
||||
String hoodieTableDDL = TestConfigurations.getCreateHoodieTableDDL("t1", options);
|
||||
streamTableEnv.executeSql(hoodieTableDDL);
|
||||
|
||||
final String insertInto2 = "insert into t1 select * from source";
|
||||
|
||||
execInsertSql(streamTableEnv, insertInto2);
|
||||
|
||||
List<Row> result = CollectionUtil.iterableToList(
|
||||
() -> streamTableEnv.sqlQuery("select * from t1").execute().collect());
|
||||
final String expected = "["
|
||||
+ "id1,Stephen,34,1970-01-01T00:00:02,par1, "
|
||||
+ "id1,Fabian,32,1970-01-01T00:00:04,par2, "
|
||||
+ "id1,Jane,19,1970-01-01T00:00:06,par3, "
|
||||
+ "id1,Phoebe,52,1970-01-01T00:00:08,par4]";
|
||||
assertRowsEquals(result, expected, 3);
|
||||
}
|
||||
|
||||
@Test
|
||||
void testStreamReadEmptyTablePath() throws Exception {
|
||||
// create an empty table
|
||||
|
||||
@@ -256,8 +256,20 @@ public class TestData {
|
||||
* @param expected Expected string of the sorted rows
|
||||
*/
|
||||
public static void assertRowsEquals(List<Row> rows, String expected) {
|
||||
assertRowsEquals(rows, expected, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sort the {@code rows} using field at index {@code orderingPos} and asserts
|
||||
* it equals with the expected string {@code expected}.
|
||||
*
|
||||
* @param rows Actual result rows
|
||||
* @param expected Expected string of the sorted rows
|
||||
* @param orderingPos Field position for ordering
|
||||
*/
|
||||
public static void assertRowsEquals(List<Row> rows, String expected, int orderingPos) {
|
||||
String rowsString = rows.stream()
|
||||
.sorted(Comparator.comparing(o -> toStringSafely(o.getField(0))))
|
||||
.sorted(Comparator.comparing(o -> toStringSafely(o.getField(orderingPos))))
|
||||
.collect(Collectors.toList()).toString();
|
||||
assertThat(rowsString, is(expected));
|
||||
}
|
||||
|
||||
8
hudi-flink/src/test/resources/test_source_4.data
Normal file
8
hudi-flink/src/test/resources/test_source_4.data
Normal file
@@ -0,0 +1,8 @@
|
||||
{"uuid": "id1", "name": "Danny", "age": 24, "ts": "1970-01-01T00:00:01", "partition": "par1"}
|
||||
{"uuid": "id1", "name": "Stephen", "age": 34, "ts": "1970-01-01T00:00:02", "partition": "par1"}
|
||||
{"uuid": "id1", "name": "Julian", "age": 54, "ts": "1970-01-01T00:00:03", "partition": "par2"}
|
||||
{"uuid": "id1", "name": "Fabian", "age": 32, "ts": "1970-01-01T00:00:04", "partition": "par2"}
|
||||
{"uuid": "id1", "name": "Sophia", "age": 18, "ts": "1970-01-01T00:00:05", "partition": "par3"}
|
||||
{"uuid": "id1", "name": "Jane", "age": 19, "ts": "1970-01-01T00:00:06", "partition": "par3"}
|
||||
{"uuid": "id1", "name": "Ella", "age": 38, "ts": "1970-01-01T00:00:07", "partition": "par4"}
|
||||
{"uuid": "id1", "name": "Phoebe", "age": 52, "ts": "1970-01-01T00:00:08", "partition": "par4"}
|
||||
Reference in New Issue
Block a user