1
0

[HUDI-1624] The state based index should bootstrap from existing base files (#2581)

This commit is contained in:
Danny Chan
2021-02-23 13:37:44 +08:00
committed by GitHub
parent 43a0776c7c
commit 3ceb1b4c83
7 changed files with 261 additions and 23 deletions

View File

@@ -20,6 +20,7 @@ package org.apache.hudi.operator;
import org.apache.hudi.client.HoodieFlinkWriteClient;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.exception.HoodieException;
@@ -48,6 +49,7 @@ import static org.apache.hudi.operator.utils.TestData.checkWrittenData;
import static org.hamcrest.CoreMatchers.instanceOf;
import static org.hamcrest.CoreMatchers.is;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
@@ -382,6 +384,68 @@ public class StreamWriteFunctionTest {
checkWrittenData(tempFile, expected, 1);
}
@Test
public void testIndexStateBootstrap() throws Exception {
// open the function and ingest data
funcWrapper.openFunction();
for (RowData rowData : TestData.DATA_SET_ONE) {
funcWrapper.invoke(rowData);
}
assertEmptyDataFiles();
// this triggers the data write and event send
funcWrapper.checkpointFunction(1);
OperatorEvent nextEvent = funcWrapper.getNextEvent();
assertThat("The operator expect to send an event", nextEvent, instanceOf(BatchWriteSuccessEvent.class));
funcWrapper.getCoordinator().handleEventFromOperator(0, nextEvent);
assertNotNull(funcWrapper.getEventBuffer()[0], "The coordinator missed the event");
funcWrapper.checkpointComplete(1);
// Mark the index state as not fully loaded to trigger re-load from the filesystem.
funcWrapper.clearIndexState();
// upsert another data buffer
for (RowData rowData : TestData.DATA_SET_TWO) {
funcWrapper.invoke(rowData);
}
checkIndexLoaded(
new HoodieKey("id1", "par1"),
new HoodieKey("id2", "par1"),
new HoodieKey("id3", "par2"),
new HoodieKey("id4", "par2"),
new HoodieKey("id5", "par3"),
new HoodieKey("id6", "par3"),
new HoodieKey("id7", "par4"),
new HoodieKey("id8", "par4"));
// the data is not flushed yet
checkWrittenData(tempFile, EXPECTED1);
// this triggers the data write and event send
funcWrapper.checkpointFunction(2);
String instant = funcWrapper.getWriteClient()
.getInflightAndRequestedInstant("COPY_ON_WRITE");
nextEvent = funcWrapper.getNextEvent();
assertThat("The operator expect to send an event", nextEvent, instanceOf(BatchWriteSuccessEvent.class));
checkWrittenData(tempFile, EXPECTED2);
funcWrapper.getCoordinator().handleEventFromOperator(0, nextEvent);
assertNotNull(funcWrapper.getEventBuffer()[0], "The coordinator missed the event");
checkInstantState(funcWrapper.getWriteClient(), HoodieInstant.State.REQUESTED, instant);
assertFalse(funcWrapper.isAllPartitionsLoaded(),
"All partitions assume to be loaded into the index state");
funcWrapper.checkpointComplete(2);
// the coordinator checkpoint commits the inflight instant.
checkInstantState(funcWrapper.getWriteClient(), HoodieInstant.State.COMPLETED, instant);
checkWrittenData(tempFile, EXPECTED2);
assertTrue(funcWrapper.isAllPartitionsLoaded(),
"All partitions assume to be loaded into the index state");
}
// -------------------------------------------------------------------------
// Utilities
// -------------------------------------------------------------------------
@@ -419,4 +483,11 @@ public class StreamWriteFunctionTest {
assertNotNull(dataFiles);
assertThat(dataFiles.length, is(0));
}
private void checkIndexLoaded(HoodieKey... keys) {
for (HoodieKey key : keys) {
assertTrue(funcWrapper.isKeyInState(key),
"Key: " + key + " assumes to be in the index state");
}
}
}

View File

@@ -33,7 +33,7 @@ public class MockFunctionInitializationContext implements FunctionInitialization
@Override
public boolean isRestored() {
throw new UnsupportedOperationException();
return false;
}
@Override

View File

@@ -19,6 +19,7 @@
package org.apache.hudi.operator.utils;
import org.apache.hudi.client.HoodieFlinkWriteClient;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.operator.StreamWriteFunction;
import org.apache.hudi.operator.StreamWriteOperatorCoordinator;
@@ -162,4 +163,16 @@ public class StreamWriteFunctionWrapper<I> {
public StreamWriteOperatorCoordinator getCoordinator() {
return coordinator;
}
public void clearIndexState() {
this.bucketAssignerFunction.clearIndexState();
}
public boolean isKeyInState(HoodieKey hoodieKey) {
return this.bucketAssignerFunction.isKeyInState(hoodieKey);
}
public boolean isAllPartitionsLoaded() {
return this.bucketAssignerFunction.isAllPartitionsLoaded();
}
}