1
0

[HUDI-1788] Insert overwrite (table) for Flink writer (#2808)

Supports `INSERT OVERWRITE` and `INSERT OVERWRITE TABLE` for Flink
writer.
This commit is contained in:
Danny Chan
2021-04-14 10:23:37 +08:00
committed by GitHub
parent 65844a8d29
commit ab4a7b0b4a
23 changed files with 523 additions and 93 deletions

View File

@@ -20,6 +20,7 @@ package org.apache.hudi.sink;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.configuration.FlinkOptions;
@@ -104,9 +105,8 @@ public class TestStreamWriteOperatorCoordinator {
coordinator.handleEventFromOperator(1, event1);
coordinator.notifyCheckpointComplete(1);
String inflight = coordinator.getWriteClient()
.getInflightAndRequestedInstant(FlinkOptions.TABLE_TYPE_COPY_ON_WRITE);
String lastCompleted = coordinator.getWriteClient().getLastCompletedInstant(FlinkOptions.TABLE_TYPE_COPY_ON_WRITE);
String inflight = coordinator.getWriteClient().getLastPendingInstant(HoodieTableType.COPY_ON_WRITE);
String lastCompleted = coordinator.getWriteClient().getLastCompletedInstant(HoodieTableType.COPY_ON_WRITE);
assertThat("Instant should be complete", lastCompleted, is(instant));
assertNotEquals("", inflight, "Should start a new instant");
assertNotEquals(instant, inflight, "Should start a new instant");
@@ -156,7 +156,7 @@ public class TestStreamWriteOperatorCoordinator {
assertDoesNotThrow(() -> coordinator.notifyCheckpointComplete(1),
"Returns early for empty write results");
String lastCompleted = coordinator.getWriteClient().getLastCompletedInstant(FlinkOptions.TABLE_TYPE_COPY_ON_WRITE);
String lastCompleted = coordinator.getWriteClient().getLastCompletedInstant(HoodieTableType.COPY_ON_WRITE);
assertNull(lastCompleted, "Returns early for empty write results");
assertNull(coordinator.getEventBuffer()[0]);
@@ -172,7 +172,7 @@ public class TestStreamWriteOperatorCoordinator {
coordinator.handleEventFromOperator(1, event1);
assertDoesNotThrow(() -> coordinator.notifyCheckpointComplete(2),
"Commits the instant with partial events anyway");
lastCompleted = coordinator.getWriteClient().getLastCompletedInstant(FlinkOptions.TABLE_TYPE_COPY_ON_WRITE);
lastCompleted = coordinator.getWriteClient().getLastCompletedInstant(HoodieTableType.COPY_ON_WRITE);
assertThat("Commits the instant with partial events anyway", lastCompleted, is(instant));
}

View File

@@ -92,7 +92,7 @@ public class TestWriteCopyOnWrite {
public void before() throws Exception {
final String basePath = tempFile.getAbsolutePath();
conf = TestConfigurations.getDefaultConf(basePath);
conf.setString(FlinkOptions.TABLE_TYPE, getTableType());
conf.setString(FlinkOptions.TABLE_TYPE, getTableType().name());
setUp(conf);
this.funcWrapper = new StreamWriteFunctionWrapper<>(tempFile.getAbsolutePath(), conf);
}
@@ -125,8 +125,7 @@ public class TestWriteCopyOnWrite {
// this triggers the data write and event send
funcWrapper.checkpointFunction(1);
String instant = funcWrapper.getWriteClient()
.getInflightAndRequestedInstant(getTableType());
String instant = funcWrapper.getWriteClient().getLastPendingInstant(getTableType());
final OperatorEvent nextEvent = funcWrapper.getNextEvent();
MatcherAssert.assertThat("The operator expect to send an event", nextEvent, instanceOf(BatchWriteSuccessEvent.class));
@@ -152,7 +151,7 @@ public class TestWriteCopyOnWrite {
funcWrapper.checkpointFunction(2);
String instant2 = funcWrapper.getWriteClient()
.getInflightAndRequestedInstant(getTableType());
.getLastPendingInstant(getTableType());
assertNotEquals(instant, instant2);
final OperatorEvent nextEvent2 = funcWrapper.getNextEvent();
@@ -181,7 +180,7 @@ public class TestWriteCopyOnWrite {
funcWrapper.checkpointFunction(1);
String instant = funcWrapper.getWriteClient()
.getInflightAndRequestedInstant(getTableType());
.getLastPendingInstant(getTableType());
assertNotNull(instant);
final OperatorEvent nextEvent = funcWrapper.getNextEvent();
@@ -223,7 +222,7 @@ public class TestWriteCopyOnWrite {
funcWrapper.checkpointFunction(1);
String instant = funcWrapper.getWriteClient()
.getInflightAndRequestedInstant(getTableType());
.getLastPendingInstant(getTableType());
final OperatorEvent nextEvent = funcWrapper.getNextEvent();
assertThat("The operator expect to send an event", nextEvent, instanceOf(BatchWriteSuccessEvent.class));
@@ -309,7 +308,7 @@ public class TestWriteCopyOnWrite {
funcWrapper.checkpointFunction(2);
String instant = funcWrapper.getWriteClient()
.getInflightAndRequestedInstant(getTableType());
.getLastPendingInstant(getTableType());
nextEvent = funcWrapper.getNextEvent();
assertThat("The operator expect to send an event", nextEvent, instanceOf(BatchWriteSuccessEvent.class));
@@ -354,7 +353,7 @@ public class TestWriteCopyOnWrite {
funcWrapper.checkpointFunction(2);
String instant = funcWrapper.getWriteClient()
.getInflightAndRequestedInstant(getTableType());
.getLastPendingInstant(getTableType());
nextEvent = funcWrapper.getNextEvent();
assertThat("The operator expect to send an event", nextEvent, instanceOf(BatchWriteSuccessEvent.class));
@@ -409,7 +408,7 @@ public class TestWriteCopyOnWrite {
assertNotNull(funcWrapper.getEventBuffer()[0], "The coordinator missed the event");
String instant = funcWrapper.getWriteClient()
.getInflightAndRequestedInstant(getTableType());
.getLastPendingInstant(getTableType());
funcWrapper.checkpointComplete(1);
@@ -493,7 +492,7 @@ public class TestWriteCopyOnWrite {
funcWrapper.checkpointFunction(2);
String instant = funcWrapper.getWriteClient()
.getInflightAndRequestedInstant(getTableType());
.getLastPendingInstant(getTableType());
nextEvent = funcWrapper.getNextEvent();
assertThat("The operator expect to send an event", nextEvent, instanceOf(BatchWriteSuccessEvent.class));
@@ -516,7 +515,7 @@ public class TestWriteCopyOnWrite {
@SuppressWarnings("rawtypes")
private void checkInflightInstant(HoodieFlinkWriteClient writeClient) {
final String instant = writeClient.getInflightAndRequestedInstant(getTableType());
final String instant = writeClient.getLastPendingInstant(getTableType());
assertNotNull(instant);
}
@@ -528,7 +527,7 @@ public class TestWriteCopyOnWrite {
final String instant;
switch (state) {
case REQUESTED:
instant = writeClient.getInflightAndRequestedInstant(getTableType());
instant = writeClient.getLastPendingInstant(getTableType());
break;
case COMPLETED:
instant = writeClient.getLastCompletedInstant(getTableType());
@@ -539,8 +538,8 @@ public class TestWriteCopyOnWrite {
assertThat(instant, is(instantStr));
}
protected String getTableType() {
return HoodieTableType.COPY_ON_WRITE.name();
protected HoodieTableType getTableType() {
return HoodieTableType.COPY_ON_WRITE;
}
protected void checkWrittenData(File baseFile, Map<String, String> expected) throws Exception {

View File

@@ -90,7 +90,7 @@ public class TestWriteMergeOnRead extends TestWriteCopyOnWrite {
}
@Override
protected String getTableType() {
return HoodieTableType.MERGE_ON_READ.name();
protected HoodieTableType getTableType() {
return HoodieTableType.MERGE_ON_READ;
}
}

View File

@@ -53,7 +53,7 @@ public class TestWriteMergeOnReadWithCompact extends TestWriteCopyOnWrite {
}
@Override
protected String getTableType() {
return HoodieTableType.MERGE_ON_READ.name();
protected HoodieTableType getTableType() {
return HoodieTableType.MERGE_ON_READ;
}
}

View File

@@ -18,6 +18,7 @@
package org.apache.hudi.table;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.configuration.FlinkOptions;
import org.apache.hudi.util.StreamerUtil;
@@ -223,7 +224,7 @@ public class HoodieDataSourceITCase extends AbstractTestBase {
TestData.writeData(TestData.DATA_SET_UPDATE_DELETE, conf);
String latestCommit = StreamerUtil.createWriteClient(conf, null)
.getLastCompletedInstant(FlinkOptions.TABLE_TYPE_MERGE_ON_READ);
.getLastCompletedInstant(HoodieTableType.MERGE_ON_READ);
Map<String, String> options = new HashMap<>();
options.put(FlinkOptions.PATH.key(), tempFile.getAbsolutePath());
@@ -276,6 +277,53 @@ public class HoodieDataSourceITCase extends AbstractTestBase {
+ "id8,Han,56,1970-01-01T00:00:08,par4]");
}
@ParameterizedTest
@EnumSource(value = ExecMode.class)
void testInsertOverwrite(ExecMode execMode) {
TableEnvironment tableEnv = execMode == ExecMode.BATCH ? batchTableEnv : streamTableEnv;
Map<String, String> options = new HashMap<>();
options.put(FlinkOptions.PATH.key(), tempFile.getAbsolutePath());
String hoodieTableDDL = TestConfigurations.getCreateHoodieTableDDL("t1", options);
tableEnv.executeSql(hoodieTableDDL);
final String insertInto1 = "insert into t1 values\n"
+ "('id1','Danny',23,TIMESTAMP '1970-01-01 00:00:01','par1'),\n"
+ "('id2','Stephen',33,TIMESTAMP '1970-01-01 00:00:02','par1'),\n"
+ "('id3','Julian',53,TIMESTAMP '1970-01-01 00:00:03','par2'),\n"
+ "('id4','Fabian',31,TIMESTAMP '1970-01-01 00:00:04','par2'),\n"
+ "('id5','Sophia',18,TIMESTAMP '1970-01-01 00:00:05','par3'),\n"
+ "('id6','Emma',20,TIMESTAMP '1970-01-01 00:00:06','par3'),\n"
+ "('id7','Bob',44,TIMESTAMP '1970-01-01 00:00:07','par4'),\n"
+ "('id8','Han',56,TIMESTAMP '1970-01-01 00:00:08','par4')";
execInsertSql(tableEnv, insertInto1);
// overwrite partition 'par1' and increase in age by 1
final String insertInto2 = "insert overwrite t1 partition(`partition`='par1') values\n"
+ "('id1','Danny',24,TIMESTAMP '1970-01-01 00:00:01'),\n"
+ "('id2','Stephen',34,TIMESTAMP '1970-01-01 00:00:02')\n";
execInsertSql(tableEnv, insertInto2);
List<Row> result1 = CollectionUtil.iterableToList(
() -> tableEnv.sqlQuery("select * from t1").execute().collect());
assertRowsEquals(result1, TestData.DATA_SET_SOURCE_INSERT_OVERWRITE);
// overwrite the whole table
final String insertInto3 = "insert overwrite t1 values\n"
+ "('id1','Danny',24,TIMESTAMP '1970-01-01 00:00:01', 'par1'),\n"
+ "('id2','Stephen',34,TIMESTAMP '1970-01-01 00:00:02', 'par2')\n";
execInsertSql(tableEnv, insertInto3);
List<Row> result2 = CollectionUtil.iterableToList(
() -> tableEnv.sqlQuery("select * from t1").execute().collect());
final String expected = "["
+ "id1,Danny,24,1970-01-01T00:00:01,par1, "
+ "id2,Stephen,34,1970-01-01T00:00:02,par2]";
assertRowsEquals(result2, expected);
}
// -------------------------------------------------------------------------
// Utilities
// -------------------------------------------------------------------------

View File

@@ -167,6 +167,26 @@ public class TestData {
TimestampData.fromEpochMillis(8000), StringData.fromString("par4"))
);
// data set of test_source.data with partition 'par1' overwrite
public static List<RowData> DATA_SET_SOURCE_INSERT_OVERWRITE = Arrays.asList(
insertRow(StringData.fromString("id1"), StringData.fromString("Danny"), 24,
TimestampData.fromEpochMillis(1000), StringData.fromString("par1")),
insertRow(StringData.fromString("id2"), StringData.fromString("Stephen"), 34,
TimestampData.fromEpochMillis(2000), StringData.fromString("par1")),
insertRow(StringData.fromString("id3"), StringData.fromString("Julian"), 53,
TimestampData.fromEpochMillis(3000), StringData.fromString("par2")),
insertRow(StringData.fromString("id4"), StringData.fromString("Fabian"), 31,
TimestampData.fromEpochMillis(4000), StringData.fromString("par2")),
insertRow(StringData.fromString("id5"), StringData.fromString("Sophia"), 18,
TimestampData.fromEpochMillis(5000), StringData.fromString("par3")),
insertRow(StringData.fromString("id6"), StringData.fromString("Emma"), 20,
TimestampData.fromEpochMillis(6000), StringData.fromString("par3")),
insertRow(StringData.fromString("id7"), StringData.fromString("Bob"), 44,
TimestampData.fromEpochMillis(7000), StringData.fromString("par4")),
insertRow(StringData.fromString("id8"), StringData.fromString("Han"), 56,
TimestampData.fromEpochMillis(8000), StringData.fromString("par4"))
);
public static List<RowData> DATA_SET_UPDATE_DELETE = Arrays.asList(
// this is update
insertRow(StringData.fromString("id1"), StringData.fromString("Danny"), 24,