[HUDI-2671] Fix kafka offset handling in Kafka Connect protocol (#4021)
Co-authored-by: Rajesh Mahindra <rmahindra@Rajeshs-MacBook-Pro.local>
This commit is contained in:
@@ -26,7 +26,7 @@ import org.apache.hudi.connect.writers.KafkaConnectConfigs;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
import org.apache.hudi.helper.MockKafkaControlAgent;
|
||||
import org.apache.hudi.helper.TestHudiWriterProvider;
|
||||
import org.apache.hudi.helper.TestKafkaConnect;
|
||||
import org.apache.hudi.helper.MockKafkaConnect;
|
||||
|
||||
import org.apache.kafka.common.TopicPartition;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
@@ -41,23 +41,24 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
public class TestConnectTransactionParticipant {
|
||||
|
||||
private static final String TOPIC_NAME = "kafka-connect-test-topic";
|
||||
private static final int NUM_RECORDS_BATCH = 5;
|
||||
private static final int PARTITION_NUMBER = 4;
|
||||
|
||||
private ConnectTransactionParticipant participant;
|
||||
private MockCoordinator coordinator;
|
||||
private MockCoordinator mockCoordinator;
|
||||
private TopicPartition partition;
|
||||
private KafkaConnectConfigs configs;
|
||||
private KafkaControlAgent kafkaControlAgent;
|
||||
private TestHudiWriterProvider testHudiWriterProvider;
|
||||
private TestKafkaConnect testKafkaConnect;
|
||||
private MockKafkaConnect mockKafkaConnect;
|
||||
|
||||
@BeforeEach
|
||||
public void setUp() throws Exception {
|
||||
partition = new TopicPartition(TOPIC_NAME, PARTITION_NUMBER);
|
||||
kafkaControlAgent = new MockKafkaControlAgent();
|
||||
testKafkaConnect = new TestKafkaConnect(partition);
|
||||
coordinator = new MockCoordinator(kafkaControlAgent);
|
||||
coordinator.start();
|
||||
mockKafkaConnect = new MockKafkaConnect(partition);
|
||||
mockCoordinator = new MockCoordinator(kafkaControlAgent);
|
||||
mockCoordinator.start();
|
||||
configs = KafkaConnectConfigs.newBuilder()
|
||||
.build();
|
||||
initializeParticipant();
|
||||
@@ -66,26 +67,19 @@ public class TestConnectTransactionParticipant {
|
||||
@ParameterizedTest
|
||||
@EnumSource(value = CoordinatorFailureTestScenarios.class)
|
||||
public void testAllCoordinatorFailureScenarios(CoordinatorFailureTestScenarios testScenario) {
|
||||
int expectedRecordsWritten = 0;
|
||||
try {
|
||||
assertTrue(mockKafkaConnect.isPaused());
|
||||
switch (testScenario) {
|
||||
case REGULAR_SCENARIO:
|
||||
expectedRecordsWritten += testKafkaConnect.putRecordsToParticipant();
|
||||
assertTrue(testKafkaConnect.isPaused());
|
||||
break;
|
||||
case COORDINATOR_FAILED_AFTER_START_COMMIT:
|
||||
testKafkaConnect.putRecordsToParticipant();
|
||||
coordinator.sendEventFromCoordinator(ControlMessage.EventType.START_COMMIT);
|
||||
testKafkaConnect.putRecordsToParticipant();
|
||||
triggerAndProcessStartCommit();
|
||||
// Coordinator Failed
|
||||
initializeCoordinator();
|
||||
break;
|
||||
case COORDINATOR_FAILED_AFTER_END_COMMIT:
|
||||
testKafkaConnect.putRecordsToParticipant();
|
||||
coordinator.sendEventFromCoordinator(ControlMessage.EventType.START_COMMIT);
|
||||
testKafkaConnect.putRecordsToParticipant();
|
||||
coordinator.sendEventFromCoordinator(ControlMessage.EventType.END_COMMIT);
|
||||
expectedRecordsWritten += testKafkaConnect.putRecordsToParticipant();
|
||||
triggerAndProcessStartCommit();
|
||||
triggerAndProcessEndCommit();
|
||||
// Coordinator Failed
|
||||
initializeCoordinator();
|
||||
break;
|
||||
@@ -93,18 +87,8 @@ public class TestConnectTransactionParticipant {
|
||||
throw new HoodieException("Unknown test scenario " + testScenario);
|
||||
}
|
||||
|
||||
// Regular Case or Coordinator Recovery Case
|
||||
coordinator.sendEventFromCoordinator(ControlMessage.EventType.START_COMMIT);
|
||||
expectedRecordsWritten += testKafkaConnect.putRecordsToParticipant();
|
||||
assertTrue(testKafkaConnect.isResumed());
|
||||
coordinator.sendEventFromCoordinator(ControlMessage.EventType.END_COMMIT);
|
||||
testKafkaConnect.putRecordsToParticipant();
|
||||
assertTrue(testKafkaConnect.isPaused());
|
||||
coordinator.sendEventFromCoordinator(ControlMessage.EventType.ACK_COMMIT);
|
||||
testKafkaConnect.putRecordsToParticipant();
|
||||
assertEquals(testHudiWriterProvider.getLatestNumberWrites(), expectedRecordsWritten);
|
||||
// Ensure Coordinator and participant are in sync in the kafka offsets
|
||||
assertEquals(participant.getLastKafkaCommittedOffset(), coordinator.getCommittedKafkaOffset());
|
||||
// Despite failures in the previous commit, a fresh 2-phase commit should PASS.
|
||||
testTwoPhaseCommit(0);
|
||||
} catch (Exception exception) {
|
||||
throw new HoodieException("Unexpected test failure ", exception);
|
||||
}
|
||||
@@ -114,62 +98,38 @@ public class TestConnectTransactionParticipant {
|
||||
@ParameterizedTest
|
||||
@EnumSource(value = ParticipantFailureTestScenarios.class)
|
||||
public void testAllParticipantFailureScenarios(ParticipantFailureTestScenarios testScenario) {
|
||||
int expectedRecordsWritten = 0;
|
||||
try {
|
||||
int currentKafkaOffset = 0;
|
||||
switch (testScenario) {
|
||||
case FAILURE_BEFORE_START_COMMIT:
|
||||
testKafkaConnect.putRecordsToParticipant();
|
||||
// Participant fails
|
||||
// Participant failing after START_COMMIT will not write any data in this commit cycle.
|
||||
initializeParticipant();
|
||||
coordinator.sendEventFromCoordinator(ControlMessage.EventType.START_COMMIT);
|
||||
expectedRecordsWritten += testKafkaConnect.putRecordsToParticipant();
|
||||
assertTrue(testKafkaConnect.isResumed());
|
||||
coordinator.sendEventFromCoordinator(ControlMessage.EventType.END_COMMIT);
|
||||
testKafkaConnect.putRecordsToParticipant();
|
||||
assertTrue(testKafkaConnect.isPaused());
|
||||
coordinator.sendEventFromCoordinator(ControlMessage.EventType.ACK_COMMIT);
|
||||
testKafkaConnect.putRecordsToParticipant();
|
||||
assertEquals(testHudiWriterProvider.getLatestNumberWrites(), expectedRecordsWritten);
|
||||
// Ensure Coordinator and participant are in sync in the kafka offsets
|
||||
assertEquals(participant.getLastKafkaCommittedOffset(), coordinator.getCommittedKafkaOffset());
|
||||
break;
|
||||
case FAILURE_AFTER_START_COMMIT:
|
||||
testKafkaConnect.putRecordsToParticipant();
|
||||
coordinator.sendEventFromCoordinator(ControlMessage.EventType.START_COMMIT);
|
||||
testKafkaConnect.putRecordsToParticipant();
|
||||
// Participant fails
|
||||
triggerAndProcessStartCommit();
|
||||
// Participant failing after START_COMMIT will not write any data in this commit cycle.
|
||||
initializeParticipant();
|
||||
testKafkaConnect.putRecordsToParticipant();
|
||||
coordinator.sendEventFromCoordinator(ControlMessage.EventType.END_COMMIT);
|
||||
testKafkaConnect.putRecordsToParticipant();
|
||||
assertTrue(testKafkaConnect.isPaused());
|
||||
coordinator.sendEventFromCoordinator(ControlMessage.EventType.ACK_COMMIT);
|
||||
testKafkaConnect.putRecordsToParticipant();
|
||||
assertEquals(testHudiWriterProvider.getLatestNumberWrites(), expectedRecordsWritten);
|
||||
// Ensure Coordinator and participant are in sync in the kafka offsets
|
||||
assertEquals(participant.getLastKafkaCommittedOffset(), coordinator.getCommittedKafkaOffset());
|
||||
triggerAndProcessEndCommit();
|
||||
triggerAndProcessAckCommit();
|
||||
break;
|
||||
case FAILURE_AFTER_END_COMMIT:
|
||||
testKafkaConnect.putRecordsToParticipant();
|
||||
coordinator.sendEventFromCoordinator(ControlMessage.EventType.START_COMMIT);
|
||||
testKafkaConnect.putRecordsToParticipant();
|
||||
coordinator.sendEventFromCoordinator(ControlMessage.EventType.END_COMMIT);
|
||||
testKafkaConnect.putRecordsToParticipant();
|
||||
// Participant fails
|
||||
// Regular Case or Coordinator Recovery Case
|
||||
triggerAndProcessStartCommit();
|
||||
triggerAndProcessEndCommit();
|
||||
initializeParticipant();
|
||||
testKafkaConnect.putRecordsToParticipant();
|
||||
coordinator.sendEventFromCoordinator(ControlMessage.EventType.END_COMMIT);
|
||||
testKafkaConnect.putRecordsToParticipant();
|
||||
assertTrue(testKafkaConnect.isPaused());
|
||||
coordinator.sendEventFromCoordinator(ControlMessage.EventType.ACK_COMMIT);
|
||||
testKafkaConnect.putRecordsToParticipant();
|
||||
assertEquals(testHudiWriterProvider.getLatestNumberWrites(), expectedRecordsWritten);
|
||||
// Ensure Coordinator and participant are in sync in the kafka offsets
|
||||
assertEquals(participant.getLastKafkaCommittedOffset(), coordinator.getCommittedKafkaOffset());
|
||||
triggerAndProcessAckCommit();
|
||||
|
||||
// Participant failing after and END_COMMIT should not cause issues with the present commit,
|
||||
// since the data would have been written by previous participant before failing
|
||||
// and hence moved the kafka offset.
|
||||
currentKafkaOffset = NUM_RECORDS_BATCH;
|
||||
break;
|
||||
default:
|
||||
throw new HoodieException("Unknown test scenario " + testScenario);
|
||||
}
|
||||
|
||||
// Despite failures in the previous commit, a fresh 2-phase commit should PASS.
|
||||
testTwoPhaseCommit(currentKafkaOffset);
|
||||
} catch (Exception exception) {
|
||||
throw new HoodieException("Unexpected test failure ", exception);
|
||||
}
|
||||
@@ -180,15 +140,49 @@ public class TestConnectTransactionParticipant {
|
||||
participant = new ConnectTransactionParticipant(
|
||||
partition,
|
||||
kafkaControlAgent,
|
||||
testKafkaConnect,
|
||||
mockKafkaConnect,
|
||||
testHudiWriterProvider);
|
||||
testKafkaConnect.setParticipant(participant);
|
||||
mockKafkaConnect.setParticipant(participant);
|
||||
participant.start();
|
||||
}
|
||||
|
||||
private void initializeCoordinator() {
|
||||
coordinator = new MockCoordinator(kafkaControlAgent);
|
||||
coordinator.start();
|
||||
mockCoordinator = new MockCoordinator(kafkaControlAgent);
|
||||
mockCoordinator.start();
|
||||
}
|
||||
|
||||
// Test and validate result of a single 2 Phase commit from START_COMMIT to ACK_COMMIT.
|
||||
// Validates that NUM_RECORDS_BATCH number of kafka records are written,
|
||||
// and the kafka offset only increments by NUM_RECORDS_BATCH.
|
||||
private void testTwoPhaseCommit(long currentKafkaOffset) {
|
||||
triggerAndProcessStartCommit();
|
||||
triggerAndProcessEndCommit();
|
||||
triggerAndProcessAckCommit();
|
||||
|
||||
// Validate records written, current kafka offset and kafka offsets committed across
|
||||
// coordinator and participant are in sync despite failure scenarios.
|
||||
assertEquals(NUM_RECORDS_BATCH, testHudiWriterProvider.getLatestNumberWrites());
|
||||
assertEquals((currentKafkaOffset + NUM_RECORDS_BATCH), mockKafkaConnect.getCurrentKafkaOffset());
|
||||
// Ensure Coordinator and participant are in sync in the kafka offsets
|
||||
assertEquals(participant.getLastKafkaCommittedOffset(), mockCoordinator.getCommittedKafkaOffset());
|
||||
}
|
||||
|
||||
private void triggerAndProcessStartCommit() {
|
||||
mockCoordinator.sendEventFromCoordinator(ControlMessage.EventType.START_COMMIT);
|
||||
mockKafkaConnect.publishBatchRecordsToParticipant(NUM_RECORDS_BATCH);
|
||||
assertTrue(mockKafkaConnect.isResumed());
|
||||
}
|
||||
|
||||
private void triggerAndProcessEndCommit() {
|
||||
mockCoordinator.sendEventFromCoordinator(ControlMessage.EventType.END_COMMIT);
|
||||
mockKafkaConnect.publishBatchRecordsToParticipant(0);
|
||||
assertTrue(mockKafkaConnect.isPaused());
|
||||
}
|
||||
|
||||
private void triggerAndProcessAckCommit() {
|
||||
mockCoordinator.sendEventFromCoordinator(ControlMessage.EventType.ACK_COMMIT);
|
||||
mockKafkaConnect.publishBatchRecordsToParticipant(0);
|
||||
assertTrue(mockKafkaConnect.isPaused());
|
||||
}
|
||||
|
||||
private static class MockCoordinator implements TransactionCoordinator {
|
||||
@@ -279,5 +273,4 @@ public class TestConnectTransactionParticipant {
|
||||
FAILURE_AFTER_START_COMMIT,
|
||||
FAILURE_AFTER_END_COMMIT,
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -25,7 +25,6 @@ import org.apache.kafka.connect.data.Schema;
|
||||
import org.apache.kafka.connect.sink.SinkRecord;
|
||||
import org.apache.kafka.connect.sink.SinkTaskContext;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
@@ -33,20 +32,26 @@ import java.util.Set;
|
||||
/**
|
||||
* Helper class that emulates the Kafka Connect f/w and additionally
|
||||
* implements {@link SinkTaskContext} for testing purposes.
|
||||
*
|
||||
* Everytime the consumer (Participant) calls resume, a fixed
|
||||
* batch of kafka records from the current offset are pushed. If
|
||||
* the consumer resets the offsets, then a fresh batch of records
|
||||
* are sent from the new offset.
|
||||
*/
|
||||
public class TestKafkaConnect implements SinkTaskContext {
|
||||
public class MockKafkaConnect implements SinkTaskContext {
|
||||
|
||||
private static final int NUM_RECORDS_BATCH = 5;
|
||||
private final TopicPartition testPartition;
|
||||
|
||||
private TransactionParticipant participant;
|
||||
private long currentKafkaOffset;
|
||||
private boolean isPaused;
|
||||
private boolean isResetOffset;
|
||||
|
||||
public TestKafkaConnect(TopicPartition testPartition) {
|
||||
public MockKafkaConnect(TopicPartition testPartition) {
|
||||
this.testPartition = testPartition;
|
||||
isPaused = false;
|
||||
currentKafkaOffset = 0L;
|
||||
isResetOffset = false;
|
||||
}
|
||||
|
||||
public void setParticipant(TransactionParticipant participant) {
|
||||
@@ -61,23 +66,6 @@ public class TestKafkaConnect implements SinkTaskContext {
|
||||
return !isPaused;
|
||||
}
|
||||
|
||||
public int putRecordsToParticipant() throws IOException {
|
||||
for (int i = 1; i <= NUM_RECORDS_BATCH; i++) {
|
||||
participant.buffer(getNextKafkaRecord());
|
||||
}
|
||||
participant.processRecords();
|
||||
return NUM_RECORDS_BATCH;
|
||||
}
|
||||
|
||||
public SinkRecord getNextKafkaRecord() {
|
||||
return new SinkRecord(testPartition.topic(),
|
||||
testPartition.partition(),
|
||||
Schema.OPTIONAL_BYTES_SCHEMA,
|
||||
("key-" + currentKafkaOffset).getBytes(),
|
||||
Schema.OPTIONAL_BYTES_SCHEMA,
|
||||
"value".getBytes(), currentKafkaOffset++);
|
||||
}
|
||||
|
||||
public long getCurrentKafkaOffset() {
|
||||
return currentKafkaOffset;
|
||||
}
|
||||
@@ -100,7 +88,7 @@ public class TestKafkaConnect implements SinkTaskContext {
|
||||
public void offset(Map<TopicPartition, Long> offsets) {
|
||||
for (TopicPartition tp : offsets.keySet()) {
|
||||
if (tp.equals(testPartition)) {
|
||||
currentKafkaOffset = offsets.get(tp);
|
||||
resetOffset(offsets.get(tp));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -108,7 +96,7 @@ public class TestKafkaConnect implements SinkTaskContext {
|
||||
@Override
|
||||
public void offset(TopicPartition tp, long offset) {
|
||||
if (tp.equals(testPartition)) {
|
||||
currentKafkaOffset = offset;
|
||||
resetOffset(offset);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -129,6 +117,33 @@ public class TestKafkaConnect implements SinkTaskContext {
|
||||
|
||||
@Override
|
||||
public void requestCommit() {
|
||||
}
|
||||
|
||||
public int publishBatchRecordsToParticipant(int numRecords) {
|
||||
// Send NUM_RECORDS_BATCH to participant
|
||||
// If client resets offset, send another batch starting
|
||||
// from the new reset offset value
|
||||
do {
|
||||
isResetOffset = false;
|
||||
for (int i = 1; i <= numRecords; i++) {
|
||||
participant.buffer(getNextKafkaRecord());
|
||||
}
|
||||
participant.processRecords();
|
||||
} while (isResetOffset);
|
||||
return numRecords;
|
||||
}
|
||||
|
||||
private SinkRecord getNextKafkaRecord() {
|
||||
return new SinkRecord(testPartition.topic(),
|
||||
testPartition.partition(),
|
||||
Schema.OPTIONAL_BYTES_SCHEMA,
|
||||
("key-" + currentKafkaOffset).getBytes(),
|
||||
Schema.OPTIONAL_BYTES_SCHEMA,
|
||||
"value".getBytes(), currentKafkaOffset++);
|
||||
}
|
||||
|
||||
private void resetOffset(long newOffset) {
|
||||
currentKafkaOffset = newOffset;
|
||||
isResetOffset = true;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user