[HUDI-2548] Flink streaming reader misses the rolling over file handles (#3787)
This commit is contained in:
@@ -420,9 +420,9 @@ public class TestWriteCopyOnWrite {
|
||||
|
||||
Map<String, List<HoodieRecord>> dataBuffer = funcWrapper.getDataBuffer();
|
||||
assertThat("Should have 1 data bucket", dataBuffer.size(), is(1));
|
||||
assertThat("3 records expect to flush out as a mini-batch",
|
||||
assertThat("2 records expect to flush out as a mini-batch",
|
||||
dataBuffer.values().stream().findFirst().map(List::size).orElse(-1),
|
||||
is(3));
|
||||
is(2));
|
||||
|
||||
// this triggers the data write and event send
|
||||
funcWrapper.checkpointFunction(1);
|
||||
@@ -483,9 +483,9 @@ public class TestWriteCopyOnWrite {
|
||||
|
||||
Map<String, List<HoodieRecord>> dataBuffer = funcWrapper.getDataBuffer();
|
||||
assertThat("Should have 1 data bucket", dataBuffer.size(), is(1));
|
||||
assertThat("3 records expect to flush out as a mini-batch",
|
||||
assertThat("2 records expect to flush out as a mini-batch",
|
||||
dataBuffer.values().stream().findFirst().map(List::size).orElse(-1),
|
||||
is(3));
|
||||
is(2));
|
||||
|
||||
// this triggers the data write and event send
|
||||
funcWrapper.checkpointFunction(1);
|
||||
@@ -615,9 +615,9 @@ public class TestWriteCopyOnWrite {
|
||||
|
||||
Map<String, List<HoodieRecord>> dataBuffer = funcWrapper.getDataBuffer();
|
||||
assertThat("Should have 1 data bucket", dataBuffer.size(), is(1));
|
||||
assertThat("3 records expect to flush out as a mini-batch",
|
||||
assertThat("2 records expect to flush out as a mini-batch",
|
||||
dataBuffer.values().stream().findFirst().map(List::size).orElse(-1),
|
||||
is(3));
|
||||
is(2));
|
||||
|
||||
// this triggers the data write and event send
|
||||
funcWrapper.checkpointFunction(1);
|
||||
@@ -665,6 +665,7 @@ public class TestWriteCopyOnWrite {
|
||||
Map<String, String> expected = new HashMap<>();
|
||||
// the last 2 lines are merged
|
||||
expected.put("par1", "["
|
||||
+ "id1,par1,id1,Danny,23,1,par1, "
|
||||
+ "id1,par1,id1,Danny,23,1,par1, "
|
||||
+ "id1,par1,id1,Danny,23,1,par1" + "]");
|
||||
return expected;
|
||||
|
||||
@@ -79,8 +79,11 @@ public class HoodieDataSourceITCase extends AbstractTestBase {
|
||||
streamTableEnv = TableEnvironmentImpl.create(settings);
|
||||
streamTableEnv.getConfig().getConfiguration()
|
||||
.setInteger(ExecutionConfigOptions.TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM, 1);
|
||||
streamTableEnv.getConfig().getConfiguration()
|
||||
.setString("execution.checkpointing.interval", "2s");
|
||||
Configuration execConf = streamTableEnv.getConfig().getConfiguration();
|
||||
execConf.setString("execution.checkpointing.interval", "2s");
|
||||
// configure not to retry after failure
|
||||
execConf.setString("restart-strategy", "fixed-delay");
|
||||
execConf.setString("restart-strategy.fixed-delay.attempts", "0");
|
||||
|
||||
settings = EnvironmentSettings.newInstance().inBatchMode().build();
|
||||
batchTableEnv = TableEnvironmentImpl.create(settings);
|
||||
@@ -529,12 +532,37 @@ public class HoodieDataSourceITCase extends AbstractTestBase {
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@EnumSource(value = ExecMode.class)
|
||||
void testUpsertWithMiniBatches(ExecMode execMode) {
|
||||
@EnumSource(value = HoodieTableType.class)
|
||||
void testStreamWriteAndReadWithMiniBatches(HoodieTableType tableType) throws Exception {
|
||||
// create filesystem table named source
|
||||
String createSource = TestConfigurations.getFileSourceDDL("source", 4);
|
||||
streamTableEnv.executeSql(createSource);
|
||||
|
||||
String hoodieTableDDL = sql("t1")
|
||||
.option(FlinkOptions.PATH, tempFile.getAbsolutePath())
|
||||
.option(FlinkOptions.READ_AS_STREAMING, true)
|
||||
.option(FlinkOptions.TABLE_TYPE, tableType)
|
||||
.option(FlinkOptions.READ_START_COMMIT, "earliest")
|
||||
.option(FlinkOptions.WRITE_BATCH_SIZE, 0.00001)
|
||||
.noPartition()
|
||||
.end();
|
||||
streamTableEnv.executeSql(hoodieTableDDL);
|
||||
String insertInto = "insert into t1 select * from source";
|
||||
execInsertSql(streamTableEnv, insertInto);
|
||||
|
||||
// reading from the earliest commit instance.
|
||||
List<Row> rows = execSelectSql(streamTableEnv, "select * from t1", 20);
|
||||
assertRowsEquals(rows, TestData.DATA_SET_SOURCE_INSERT);
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@MethodSource("executionModeAndTableTypeParams")
|
||||
void testBatchUpsertWithMiniBatches(ExecMode execMode, HoodieTableType tableType) {
|
||||
TableEnvironment tableEnv = execMode == ExecMode.BATCH ? batchTableEnv : streamTableEnv;
|
||||
String hoodieTableDDL = sql("t1")
|
||||
.option(FlinkOptions.PATH, tempFile.getAbsolutePath())
|
||||
.option(FlinkOptions.WRITE_BATCH_SIZE, "0.001")
|
||||
.option(FlinkOptions.TABLE_TYPE, tableType)
|
||||
.end();
|
||||
tableEnv.executeSql(hoodieTableDDL);
|
||||
|
||||
@@ -958,7 +986,7 @@ public class HoodieDataSourceITCase extends AbstractTestBase {
|
||||
try {
|
||||
tableResult.getJobClient().get().getJobExecutionResult().get();
|
||||
} catch (InterruptedException | ExecutionException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
// ignored
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user