1
0

[FLINK-1923] Exactly-once write for flink writer (#3002)

Co-authored-by: 喻兆靖 <yuzhaojing@bilibili.com>
This commit is contained in:
yuzhaojing
2021-05-28 14:58:21 +08:00
committed by GitHub
parent 7fed7352bd
commit bc18c39835
7 changed files with 133 additions and 11 deletions

View File

@@ -25,6 +25,7 @@ import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.configuration.FlinkOptions;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.sink.event.BatchWriteSuccessEvent;
import org.apache.hudi.sink.utils.StreamWriteFunctionWrapper;
import org.apache.hudi.utils.TestConfigurations;
@@ -52,6 +53,7 @@ import static org.hamcrest.MatcherAssert.assertThat;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
/**
@@ -635,6 +637,55 @@ public class TestWriteCopyOnWrite {
checkWrittenData(tempFile, EXPECTED2);
}
@Test
public void testWriteExactlyOnce() throws Exception {
// reset the config option
conf.setBoolean(FlinkOptions.WRITE_EXACTLY_ONCE_ENABLED, true);
conf.setLong(FlinkOptions.WRITE_COMMIT_ACK_TIMEOUT, 3);
conf.setDouble(FlinkOptions.WRITE_TASK_MAX_SIZE, 200.0006); // 630 bytes buffer size
funcWrapper = new StreamWriteFunctionWrapper<>(tempFile.getAbsolutePath(), conf);
// open the function and ingest data
funcWrapper.openFunction();
for (RowData rowData : TestData.DATA_SET_INSERT) {
funcWrapper.invoke(rowData);
}
// no checkpoint, so the coordinator does not accept any events
assertTrue(
funcWrapper.getEventBuffer().length == 1
&& funcWrapper.getEventBuffer()[0] == null, "The coordinator events buffer expect to be empty");
// this triggers the data write and event send
funcWrapper.checkpointFunction(1);
assertTrue(funcWrapper.isConforming(), "The write function should be waiting for the instant to commit");
for (int i = 0; i < 2; i++) {
final OperatorEvent event = funcWrapper.getNextEvent(); // remove the first event first
assertThat("The operator expect to send an event", event, instanceOf(BatchWriteSuccessEvent.class));
funcWrapper.getCoordinator().handleEventFromOperator(0, event);
}
funcWrapper.checkpointComplete(1);
for (RowData rowData : TestData.DATA_SET_INSERT) {
funcWrapper.invoke(rowData);
}
assertFalse(funcWrapper.isConforming(), "The write function should finish waiting for the instant to commit");
// checkpoint for the next round, when there is eager flush but the write
// task is waiting for the instant commit ack, should throw for timeout.
funcWrapper.checkpointFunction(2);
assertThrows(HoodieException.class, () -> {
for (RowData rowData : TestData.DATA_SET_INSERT) {
funcWrapper.invoke(rowData);
}
}, "Timeout(500ms) while waiting for instant");
}
// -------------------------------------------------------------------------
// Utilities
// -------------------------------------------------------------------------

View File

@@ -197,4 +197,8 @@ public class StreamWriteFunctionWrapper<I> {
public boolean isKeyInState(HoodieKey hoodieKey) {
return this.bucketAssignerFunction.isKeyInState(hoodieKey);
}
public boolean isConforming() {
return this.writeFunction.isConfirming();
}
}