1
0

[HUDI-2379] Include the pending compaction file groups for flink (#3567)

streaming reader
This commit is contained in:
Danny Chan
2021-09-01 16:47:52 +08:00
committed by GitHub
parent d59c8044f8
commit f66e1ce9bf
4 changed files with 90 additions and 11 deletions

View File

@@ -319,7 +319,7 @@ public class HoodieDataSourceITCase extends AbstractTestBase {
String hoodieTableDDL = sql("t1")
.option(FlinkOptions.PATH, tempFile.getAbsolutePath())
.option(FlinkOptions.TABLE_TYPE, FlinkOptions.TABLE_TYPE_MERGE_ON_READ)
.option(FlinkOptions.TABLE_TYPE, tableType.name())
.option(FlinkOptions.READ_AS_STREAMING, "true")
.option(FlinkOptions.READ_STREAMING_CHECK_INTERVAL, "2")
.option(FlinkOptions.HIVE_STYLE_PARTITIONING, hiveStylePartitioning)
@@ -334,6 +334,40 @@ public class HoodieDataSourceITCase extends AbstractTestBase {
assertRowsEquals(result, expected, true);
}
@Test
void testStreamReadMorTableWithCompactionPlan() throws Exception {
String createSource = TestConfigurations.getFileSourceDDL("source");
streamTableEnv.executeSql(createSource);
String hoodieTableDDL = sql("t1")
.option(FlinkOptions.PATH, tempFile.getAbsolutePath())
.option(FlinkOptions.TABLE_TYPE, FlinkOptions.TABLE_TYPE_MERGE_ON_READ)
.option(FlinkOptions.READ_AS_STREAMING, "true")
.option(FlinkOptions.READ_STREAMING_START_COMMIT, FlinkOptions.START_COMMIT_EARLIEST)
.option(FlinkOptions.READ_STREAMING_CHECK_INTERVAL, "2")
// close the async compaction
.option(FlinkOptions.COMPACTION_ASYNC_ENABLED, false)
// generate compaction plan for each commit
.option(FlinkOptions.COMPACTION_DELTA_COMMITS, "1")
.withPartition(false)
.end();
streamTableEnv.executeSql(hoodieTableDDL);
streamTableEnv.executeSql("insert into t1 select * from source");
List<Row> result = execSelectSql(streamTableEnv, "select * from t1", 10);
final String expected = "["
+ "+I[id1, Danny, 23, 1970-01-01T00:00:01, par1], "
+ "+I[id2, Stephen, 33, 1970-01-01T00:00:02, par1], "
+ "+I[id3, Julian, 53, 1970-01-01T00:00:03, par2], "
+ "+I[id4, Fabian, 31, 1970-01-01T00:00:04, par2], "
+ "+I[id5, Sophia, 18, 1970-01-01T00:00:05, par3], "
+ "+I[id6, Emma, 20, 1970-01-01T00:00:06, par3], "
+ "+I[id7, Bob, 44, 1970-01-01T00:00:07, par4], "
+ "+I[id8, Han, 56, 1970-01-01T00:00:08, par4]]";
assertRowsEquals(result, expected);
}
@ParameterizedTest
@MethodSource("executionModeAndPartitioningParams")
void testWriteAndRead(ExecMode execMode, boolean hiveStylePartitioning) {

View File

@@ -400,7 +400,7 @@ public class TestData {
String rowsString = rows.stream()
.sorted(Comparator.comparing(o -> toIdSafely(o.getField(0))))
.collect(Collectors.toList()).toString();
assertThat(rowDataToString(expected), is(rowsString));
assertThat(rowsString, is(rowDataToString(expected)));
}
/**