[FLINK-1923] Exactly-once write for flink writer (#3002)
Co-authored-by: 喻兆靖 <yuzhaojing@bilibili.com>
This commit is contained in:
@@ -25,6 +25,7 @@ import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.model.HoodieTableType;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.configuration.FlinkOptions;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
import org.apache.hudi.sink.event.BatchWriteSuccessEvent;
|
||||
import org.apache.hudi.sink.utils.StreamWriteFunctionWrapper;
|
||||
import org.apache.hudi.utils.TestConfigurations;
|
||||
@@ -52,6 +53,7 @@ import static org.hamcrest.MatcherAssert.assertThat;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
/**
|
||||
@@ -635,6 +637,55 @@ public class TestWriteCopyOnWrite {
|
||||
checkWrittenData(tempFile, EXPECTED2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWriteExactlyOnce() throws Exception {
|
||||
// reset the config option
|
||||
conf.setBoolean(FlinkOptions.WRITE_EXACTLY_ONCE_ENABLED, true);
|
||||
conf.setLong(FlinkOptions.WRITE_COMMIT_ACK_TIMEOUT, 3);
|
||||
conf.setDouble(FlinkOptions.WRITE_TASK_MAX_SIZE, 200.0006); // 630 bytes buffer size
|
||||
funcWrapper = new StreamWriteFunctionWrapper<>(tempFile.getAbsolutePath(), conf);
|
||||
|
||||
// open the function and ingest data
|
||||
|
||||
funcWrapper.openFunction();
|
||||
for (RowData rowData : TestData.DATA_SET_INSERT) {
|
||||
funcWrapper.invoke(rowData);
|
||||
}
|
||||
|
||||
// no checkpoint, so the coordinator does not accept any events
|
||||
assertTrue(
|
||||
funcWrapper.getEventBuffer().length == 1
|
||||
&& funcWrapper.getEventBuffer()[0] == null, "The coordinator events buffer expect to be empty");
|
||||
|
||||
// this triggers the data write and event send
|
||||
funcWrapper.checkpointFunction(1);
|
||||
assertTrue(funcWrapper.isConforming(), "The write function should be waiting for the instant to commit");
|
||||
|
||||
for (int i = 0; i < 2; i++) {
|
||||
final OperatorEvent event = funcWrapper.getNextEvent(); // remove the first event first
|
||||
assertThat("The operator expect to send an event", event, instanceOf(BatchWriteSuccessEvent.class));
|
||||
funcWrapper.getCoordinator().handleEventFromOperator(0, event);
|
||||
}
|
||||
|
||||
funcWrapper.checkpointComplete(1);
|
||||
|
||||
for (RowData rowData : TestData.DATA_SET_INSERT) {
|
||||
funcWrapper.invoke(rowData);
|
||||
}
|
||||
|
||||
assertFalse(funcWrapper.isConforming(), "The write function should finish waiting for the instant to commit");
|
||||
|
||||
// checkpoint for the next round, when there is eager flush but the write
|
||||
// task is waiting for the instant commit ack, should throw for timeout.
|
||||
funcWrapper.checkpointFunction(2);
|
||||
|
||||
assertThrows(HoodieException.class, () -> {
|
||||
for (RowData rowData : TestData.DATA_SET_INSERT) {
|
||||
funcWrapper.invoke(rowData);
|
||||
}
|
||||
}, "Timeout(500ms) while waiting for instant");
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Utilities
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
@@ -197,4 +197,8 @@ public class StreamWriteFunctionWrapper<I> {
|
||||
public boolean isKeyInState(HoodieKey hoodieKey) {
|
||||
return this.bucketAssignerFunction.isKeyInState(hoodieKey);
|
||||
}
|
||||
|
||||
public boolean isConforming() {
|
||||
return this.writeFunction.isConfirming();
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user