1
0

[HUDI-1801] FlinkMergeHandle rolling over may miss to rename the latest file handle (#2831)

The FlinkMergeHandle may rename the N-1 th file handle instead of the
latest one, thus to cause data duplication.
This commit is contained in:
Danny Chan
2021-04-16 11:40:53 +08:00
committed by GitHub
parent 191470d1fc
commit b6d949b48a
4 changed files with 49 additions and 30 deletions

View File

@@ -324,6 +324,34 @@ public class HoodieDataSourceITCase extends AbstractTestBase {
assertRowsEquals(result2, expected);
}
@ParameterizedTest
@EnumSource(value = ExecMode.class)
void testUpsertWithMiniBatches(ExecMode execMode) {
TableEnvironment tableEnv = execMode == ExecMode.BATCH ? batchTableEnv : streamTableEnv;
Map<String, String> options = new HashMap<>();
options.put(FlinkOptions.PATH.key(), tempFile.getAbsolutePath());
options.put(FlinkOptions.WRITE_BATCH_SIZE.key(), "0.001");
String hoodieTableDDL = TestConfigurations.getCreateHoodieTableDDL("t1", options);
tableEnv.executeSql(hoodieTableDDL);
final String insertInto1 = "insert into t1 values\n"
+ "('id1','Danny',23,TIMESTAMP '1970-01-01 00:00:01','par1')";
execInsertSql(tableEnv, insertInto1);
final String insertInto2 = "insert into t1 values\n"
+ "('id1','Stephen',33,TIMESTAMP '1970-01-01 00:00:02','par1'),\n"
+ "('id1','Julian',53,TIMESTAMP '1970-01-01 00:00:03','par1'),\n"
+ "('id1','Fabian',31,TIMESTAMP '1970-01-01 00:00:04','par1'),\n"
+ "('id1','Sophia',18,TIMESTAMP '1970-01-01 00:00:05','par1')";
execInsertSql(tableEnv, insertInto2);
List<Row> result = CollectionUtil.iterableToList(
() -> tableEnv.sqlQuery("select * from t1").execute().collect());
assertRowsEquals(result, "[id1,Sophia,18,1970-01-01T00:00:05,par1]");
}
// -------------------------------------------------------------------------
// Utilities
// -------------------------------------------------------------------------