[HUDI-1788] Insert overwrite (table) for Flink writer (#2808)
Supports `INSERT OVERWRITE` and `INSERT OVERWRITE TABLE` for Flink writer.
This commit is contained in:
@@ -20,6 +20,7 @@ package org.apache.hudi.sink;
|
||||
|
||||
import org.apache.hudi.client.WriteStatus;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.model.HoodieTableType;
|
||||
import org.apache.hudi.common.model.HoodieWriteStat;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.configuration.FlinkOptions;
|
||||
@@ -104,9 +105,8 @@ public class TestStreamWriteOperatorCoordinator {
|
||||
coordinator.handleEventFromOperator(1, event1);
|
||||
|
||||
coordinator.notifyCheckpointComplete(1);
|
||||
String inflight = coordinator.getWriteClient()
|
||||
.getInflightAndRequestedInstant(FlinkOptions.TABLE_TYPE_COPY_ON_WRITE);
|
||||
String lastCompleted = coordinator.getWriteClient().getLastCompletedInstant(FlinkOptions.TABLE_TYPE_COPY_ON_WRITE);
|
||||
String inflight = coordinator.getWriteClient().getLastPendingInstant(HoodieTableType.COPY_ON_WRITE);
|
||||
String lastCompleted = coordinator.getWriteClient().getLastCompletedInstant(HoodieTableType.COPY_ON_WRITE);
|
||||
assertThat("Instant should be complete", lastCompleted, is(instant));
|
||||
assertNotEquals("", inflight, "Should start a new instant");
|
||||
assertNotEquals(instant, inflight, "Should start a new instant");
|
||||
@@ -156,7 +156,7 @@ public class TestStreamWriteOperatorCoordinator {
|
||||
|
||||
assertDoesNotThrow(() -> coordinator.notifyCheckpointComplete(1),
|
||||
"Returns early for empty write results");
|
||||
String lastCompleted = coordinator.getWriteClient().getLastCompletedInstant(FlinkOptions.TABLE_TYPE_COPY_ON_WRITE);
|
||||
String lastCompleted = coordinator.getWriteClient().getLastCompletedInstant(HoodieTableType.COPY_ON_WRITE);
|
||||
assertNull(lastCompleted, "Returns early for empty write results");
|
||||
assertNull(coordinator.getEventBuffer()[0]);
|
||||
|
||||
@@ -172,7 +172,7 @@ public class TestStreamWriteOperatorCoordinator {
|
||||
coordinator.handleEventFromOperator(1, event1);
|
||||
assertDoesNotThrow(() -> coordinator.notifyCheckpointComplete(2),
|
||||
"Commits the instant with partial events anyway");
|
||||
lastCompleted = coordinator.getWriteClient().getLastCompletedInstant(FlinkOptions.TABLE_TYPE_COPY_ON_WRITE);
|
||||
lastCompleted = coordinator.getWriteClient().getLastCompletedInstant(HoodieTableType.COPY_ON_WRITE);
|
||||
assertThat("Commits the instant with partial events anyway", lastCompleted, is(instant));
|
||||
}
|
||||
|
||||
|
||||
@@ -92,7 +92,7 @@ public class TestWriteCopyOnWrite {
|
||||
public void before() throws Exception {
|
||||
final String basePath = tempFile.getAbsolutePath();
|
||||
conf = TestConfigurations.getDefaultConf(basePath);
|
||||
conf.setString(FlinkOptions.TABLE_TYPE, getTableType());
|
||||
conf.setString(FlinkOptions.TABLE_TYPE, getTableType().name());
|
||||
setUp(conf);
|
||||
this.funcWrapper = new StreamWriteFunctionWrapper<>(tempFile.getAbsolutePath(), conf);
|
||||
}
|
||||
@@ -125,8 +125,7 @@ public class TestWriteCopyOnWrite {
|
||||
// this triggers the data write and event send
|
||||
funcWrapper.checkpointFunction(1);
|
||||
|
||||
String instant = funcWrapper.getWriteClient()
|
||||
.getInflightAndRequestedInstant(getTableType());
|
||||
String instant = funcWrapper.getWriteClient().getLastPendingInstant(getTableType());
|
||||
|
||||
final OperatorEvent nextEvent = funcWrapper.getNextEvent();
|
||||
MatcherAssert.assertThat("The operator expect to send an event", nextEvent, instanceOf(BatchWriteSuccessEvent.class));
|
||||
@@ -152,7 +151,7 @@ public class TestWriteCopyOnWrite {
|
||||
funcWrapper.checkpointFunction(2);
|
||||
|
||||
String instant2 = funcWrapper.getWriteClient()
|
||||
.getInflightAndRequestedInstant(getTableType());
|
||||
.getLastPendingInstant(getTableType());
|
||||
assertNotEquals(instant, instant2);
|
||||
|
||||
final OperatorEvent nextEvent2 = funcWrapper.getNextEvent();
|
||||
@@ -181,7 +180,7 @@ public class TestWriteCopyOnWrite {
|
||||
funcWrapper.checkpointFunction(1);
|
||||
|
||||
String instant = funcWrapper.getWriteClient()
|
||||
.getInflightAndRequestedInstant(getTableType());
|
||||
.getLastPendingInstant(getTableType());
|
||||
assertNotNull(instant);
|
||||
|
||||
final OperatorEvent nextEvent = funcWrapper.getNextEvent();
|
||||
@@ -223,7 +222,7 @@ public class TestWriteCopyOnWrite {
|
||||
funcWrapper.checkpointFunction(1);
|
||||
|
||||
String instant = funcWrapper.getWriteClient()
|
||||
.getInflightAndRequestedInstant(getTableType());
|
||||
.getLastPendingInstant(getTableType());
|
||||
|
||||
final OperatorEvent nextEvent = funcWrapper.getNextEvent();
|
||||
assertThat("The operator expect to send an event", nextEvent, instanceOf(BatchWriteSuccessEvent.class));
|
||||
@@ -309,7 +308,7 @@ public class TestWriteCopyOnWrite {
|
||||
funcWrapper.checkpointFunction(2);
|
||||
|
||||
String instant = funcWrapper.getWriteClient()
|
||||
.getInflightAndRequestedInstant(getTableType());
|
||||
.getLastPendingInstant(getTableType());
|
||||
|
||||
nextEvent = funcWrapper.getNextEvent();
|
||||
assertThat("The operator expect to send an event", nextEvent, instanceOf(BatchWriteSuccessEvent.class));
|
||||
@@ -354,7 +353,7 @@ public class TestWriteCopyOnWrite {
|
||||
funcWrapper.checkpointFunction(2);
|
||||
|
||||
String instant = funcWrapper.getWriteClient()
|
||||
.getInflightAndRequestedInstant(getTableType());
|
||||
.getLastPendingInstant(getTableType());
|
||||
|
||||
nextEvent = funcWrapper.getNextEvent();
|
||||
assertThat("The operator expect to send an event", nextEvent, instanceOf(BatchWriteSuccessEvent.class));
|
||||
@@ -409,7 +408,7 @@ public class TestWriteCopyOnWrite {
|
||||
assertNotNull(funcWrapper.getEventBuffer()[0], "The coordinator missed the event");
|
||||
|
||||
String instant = funcWrapper.getWriteClient()
|
||||
.getInflightAndRequestedInstant(getTableType());
|
||||
.getLastPendingInstant(getTableType());
|
||||
|
||||
funcWrapper.checkpointComplete(1);
|
||||
|
||||
@@ -493,7 +492,7 @@ public class TestWriteCopyOnWrite {
|
||||
funcWrapper.checkpointFunction(2);
|
||||
|
||||
String instant = funcWrapper.getWriteClient()
|
||||
.getInflightAndRequestedInstant(getTableType());
|
||||
.getLastPendingInstant(getTableType());
|
||||
|
||||
nextEvent = funcWrapper.getNextEvent();
|
||||
assertThat("The operator expect to send an event", nextEvent, instanceOf(BatchWriteSuccessEvent.class));
|
||||
@@ -516,7 +515,7 @@ public class TestWriteCopyOnWrite {
|
||||
|
||||
@SuppressWarnings("rawtypes")
|
||||
private void checkInflightInstant(HoodieFlinkWriteClient writeClient) {
|
||||
final String instant = writeClient.getInflightAndRequestedInstant(getTableType());
|
||||
final String instant = writeClient.getLastPendingInstant(getTableType());
|
||||
assertNotNull(instant);
|
||||
}
|
||||
|
||||
@@ -528,7 +527,7 @@ public class TestWriteCopyOnWrite {
|
||||
final String instant;
|
||||
switch (state) {
|
||||
case REQUESTED:
|
||||
instant = writeClient.getInflightAndRequestedInstant(getTableType());
|
||||
instant = writeClient.getLastPendingInstant(getTableType());
|
||||
break;
|
||||
case COMPLETED:
|
||||
instant = writeClient.getLastCompletedInstant(getTableType());
|
||||
@@ -539,8 +538,8 @@ public class TestWriteCopyOnWrite {
|
||||
assertThat(instant, is(instantStr));
|
||||
}
|
||||
|
||||
protected String getTableType() {
|
||||
return HoodieTableType.COPY_ON_WRITE.name();
|
||||
protected HoodieTableType getTableType() {
|
||||
return HoodieTableType.COPY_ON_WRITE;
|
||||
}
|
||||
|
||||
protected void checkWrittenData(File baseFile, Map<String, String> expected) throws Exception {
|
||||
|
||||
@@ -90,7 +90,7 @@ public class TestWriteMergeOnRead extends TestWriteCopyOnWrite {
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getTableType() {
|
||||
return HoodieTableType.MERGE_ON_READ.name();
|
||||
protected HoodieTableType getTableType() {
|
||||
return HoodieTableType.MERGE_ON_READ;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -53,7 +53,7 @@ public class TestWriteMergeOnReadWithCompact extends TestWriteCopyOnWrite {
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getTableType() {
|
||||
return HoodieTableType.MERGE_ON_READ.name();
|
||||
protected HoodieTableType getTableType() {
|
||||
return HoodieTableType.MERGE_ON_READ;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
|
||||
package org.apache.hudi.table;
|
||||
|
||||
import org.apache.hudi.common.model.HoodieTableType;
|
||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.apache.hudi.configuration.FlinkOptions;
|
||||
import org.apache.hudi.util.StreamerUtil;
|
||||
@@ -223,7 +224,7 @@ public class HoodieDataSourceITCase extends AbstractTestBase {
|
||||
TestData.writeData(TestData.DATA_SET_UPDATE_DELETE, conf);
|
||||
|
||||
String latestCommit = StreamerUtil.createWriteClient(conf, null)
|
||||
.getLastCompletedInstant(FlinkOptions.TABLE_TYPE_MERGE_ON_READ);
|
||||
.getLastCompletedInstant(HoodieTableType.MERGE_ON_READ);
|
||||
|
||||
Map<String, String> options = new HashMap<>();
|
||||
options.put(FlinkOptions.PATH.key(), tempFile.getAbsolutePath());
|
||||
@@ -276,6 +277,53 @@ public class HoodieDataSourceITCase extends AbstractTestBase {
|
||||
+ "id8,Han,56,1970-01-01T00:00:08,par4]");
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@EnumSource(value = ExecMode.class)
|
||||
void testInsertOverwrite(ExecMode execMode) {
|
||||
TableEnvironment tableEnv = execMode == ExecMode.BATCH ? batchTableEnv : streamTableEnv;
|
||||
Map<String, String> options = new HashMap<>();
|
||||
options.put(FlinkOptions.PATH.key(), tempFile.getAbsolutePath());
|
||||
String hoodieTableDDL = TestConfigurations.getCreateHoodieTableDDL("t1", options);
|
||||
tableEnv.executeSql(hoodieTableDDL);
|
||||
|
||||
final String insertInto1 = "insert into t1 values\n"
|
||||
+ "('id1','Danny',23,TIMESTAMP '1970-01-01 00:00:01','par1'),\n"
|
||||
+ "('id2','Stephen',33,TIMESTAMP '1970-01-01 00:00:02','par1'),\n"
|
||||
+ "('id3','Julian',53,TIMESTAMP '1970-01-01 00:00:03','par2'),\n"
|
||||
+ "('id4','Fabian',31,TIMESTAMP '1970-01-01 00:00:04','par2'),\n"
|
||||
+ "('id5','Sophia',18,TIMESTAMP '1970-01-01 00:00:05','par3'),\n"
|
||||
+ "('id6','Emma',20,TIMESTAMP '1970-01-01 00:00:06','par3'),\n"
|
||||
+ "('id7','Bob',44,TIMESTAMP '1970-01-01 00:00:07','par4'),\n"
|
||||
+ "('id8','Han',56,TIMESTAMP '1970-01-01 00:00:08','par4')";
|
||||
|
||||
execInsertSql(tableEnv, insertInto1);
|
||||
|
||||
// overwrite partition 'par1' and increase in age by 1
|
||||
final String insertInto2 = "insert overwrite t1 partition(`partition`='par1') values\n"
|
||||
+ "('id1','Danny',24,TIMESTAMP '1970-01-01 00:00:01'),\n"
|
||||
+ "('id2','Stephen',34,TIMESTAMP '1970-01-01 00:00:02')\n";
|
||||
|
||||
execInsertSql(tableEnv, insertInto2);
|
||||
|
||||
List<Row> result1 = CollectionUtil.iterableToList(
|
||||
() -> tableEnv.sqlQuery("select * from t1").execute().collect());
|
||||
assertRowsEquals(result1, TestData.DATA_SET_SOURCE_INSERT_OVERWRITE);
|
||||
|
||||
// overwrite the whole table
|
||||
final String insertInto3 = "insert overwrite t1 values\n"
|
||||
+ "('id1','Danny',24,TIMESTAMP '1970-01-01 00:00:01', 'par1'),\n"
|
||||
+ "('id2','Stephen',34,TIMESTAMP '1970-01-01 00:00:02', 'par2')\n";
|
||||
|
||||
execInsertSql(tableEnv, insertInto3);
|
||||
|
||||
List<Row> result2 = CollectionUtil.iterableToList(
|
||||
() -> tableEnv.sqlQuery("select * from t1").execute().collect());
|
||||
final String expected = "["
|
||||
+ "id1,Danny,24,1970-01-01T00:00:01,par1, "
|
||||
+ "id2,Stephen,34,1970-01-01T00:00:02,par2]";
|
||||
assertRowsEquals(result2, expected);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Utilities
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
@@ -167,6 +167,26 @@ public class TestData {
|
||||
TimestampData.fromEpochMillis(8000), StringData.fromString("par4"))
|
||||
);
|
||||
|
||||
// data set of test_source.data with partition 'par1' overwrite
|
||||
public static List<RowData> DATA_SET_SOURCE_INSERT_OVERWRITE = Arrays.asList(
|
||||
insertRow(StringData.fromString("id1"), StringData.fromString("Danny"), 24,
|
||||
TimestampData.fromEpochMillis(1000), StringData.fromString("par1")),
|
||||
insertRow(StringData.fromString("id2"), StringData.fromString("Stephen"), 34,
|
||||
TimestampData.fromEpochMillis(2000), StringData.fromString("par1")),
|
||||
insertRow(StringData.fromString("id3"), StringData.fromString("Julian"), 53,
|
||||
TimestampData.fromEpochMillis(3000), StringData.fromString("par2")),
|
||||
insertRow(StringData.fromString("id4"), StringData.fromString("Fabian"), 31,
|
||||
TimestampData.fromEpochMillis(4000), StringData.fromString("par2")),
|
||||
insertRow(StringData.fromString("id5"), StringData.fromString("Sophia"), 18,
|
||||
TimestampData.fromEpochMillis(5000), StringData.fromString("par3")),
|
||||
insertRow(StringData.fromString("id6"), StringData.fromString("Emma"), 20,
|
||||
TimestampData.fromEpochMillis(6000), StringData.fromString("par3")),
|
||||
insertRow(StringData.fromString("id7"), StringData.fromString("Bob"), 44,
|
||||
TimestampData.fromEpochMillis(7000), StringData.fromString("par4")),
|
||||
insertRow(StringData.fromString("id8"), StringData.fromString("Han"), 56,
|
||||
TimestampData.fromEpochMillis(8000), StringData.fromString("par4"))
|
||||
);
|
||||
|
||||
public static List<RowData> DATA_SET_UPDATE_DELETE = Arrays.asList(
|
||||
// this is update
|
||||
insertRow(StringData.fromString("id1"), StringData.fromString("Danny"), 24,
|
||||
|
||||
Reference in New Issue
Block a user