[HUDI-1801] FlinkMergeHandle rolling over may miss to rename the latest file handle (#2831)
The FlinkMergeHandle may rename the N-1 th file handle instead of the latest one, thus to cause data duplication.
This commit is contained in:
@@ -324,6 +324,34 @@ public class HoodieDataSourceITCase extends AbstractTestBase {
|
||||
assertRowsEquals(result2, expected);
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@EnumSource(value = ExecMode.class)
|
||||
void testUpsertWithMiniBatches(ExecMode execMode) {
|
||||
TableEnvironment tableEnv = execMode == ExecMode.BATCH ? batchTableEnv : streamTableEnv;
|
||||
Map<String, String> options = new HashMap<>();
|
||||
options.put(FlinkOptions.PATH.key(), tempFile.getAbsolutePath());
|
||||
options.put(FlinkOptions.WRITE_BATCH_SIZE.key(), "0.001");
|
||||
String hoodieTableDDL = TestConfigurations.getCreateHoodieTableDDL("t1", options);
|
||||
tableEnv.executeSql(hoodieTableDDL);
|
||||
|
||||
final String insertInto1 = "insert into t1 values\n"
|
||||
+ "('id1','Danny',23,TIMESTAMP '1970-01-01 00:00:01','par1')";
|
||||
|
||||
execInsertSql(tableEnv, insertInto1);
|
||||
|
||||
final String insertInto2 = "insert into t1 values\n"
|
||||
+ "('id1','Stephen',33,TIMESTAMP '1970-01-01 00:00:02','par1'),\n"
|
||||
+ "('id1','Julian',53,TIMESTAMP '1970-01-01 00:00:03','par1'),\n"
|
||||
+ "('id1','Fabian',31,TIMESTAMP '1970-01-01 00:00:04','par1'),\n"
|
||||
+ "('id1','Sophia',18,TIMESTAMP '1970-01-01 00:00:05','par1')";
|
||||
|
||||
execInsertSql(tableEnv, insertInto2);
|
||||
|
||||
List<Row> result = CollectionUtil.iterableToList(
|
||||
() -> tableEnv.sqlQuery("select * from t1").execute().collect());
|
||||
assertRowsEquals(result, "[id1,Sophia,18,1970-01-01T00:00:05,par1]");
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Utilities
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user