1
0

[HUDI-2671] Fix kafka offset handling in Kafka Connect protocol (#4021)

Co-authored-by: Rajesh Mahindra <rmahindra@Rajeshs-MacBook-Pro.local>
This commit is contained in:
rmahindra123
2021-11-24 10:03:58 -08:00
committed by GitHub
parent 9af219b7c1
commit 90f2ea2f12
4 changed files with 147 additions and 118 deletions

View File

@@ -26,7 +26,7 @@ import org.apache.hudi.connect.writers.KafkaConnectConfigs;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.helper.MockKafkaControlAgent;
import org.apache.hudi.helper.TestHudiWriterProvider;
import org.apache.hudi.helper.TestKafkaConnect;
import org.apache.hudi.helper.MockKafkaConnect;
import org.apache.kafka.common.TopicPartition;
import org.junit.jupiter.api.BeforeEach;
@@ -41,23 +41,24 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
public class TestConnectTransactionParticipant {
private static final String TOPIC_NAME = "kafka-connect-test-topic";
private static final int NUM_RECORDS_BATCH = 5;
private static final int PARTITION_NUMBER = 4;
private ConnectTransactionParticipant participant;
private MockCoordinator coordinator;
private MockCoordinator mockCoordinator;
private TopicPartition partition;
private KafkaConnectConfigs configs;
private KafkaControlAgent kafkaControlAgent;
private TestHudiWriterProvider testHudiWriterProvider;
private TestKafkaConnect testKafkaConnect;
private MockKafkaConnect mockKafkaConnect;
@BeforeEach
public void setUp() throws Exception {
partition = new TopicPartition(TOPIC_NAME, PARTITION_NUMBER);
kafkaControlAgent = new MockKafkaControlAgent();
testKafkaConnect = new TestKafkaConnect(partition);
coordinator = new MockCoordinator(kafkaControlAgent);
coordinator.start();
mockKafkaConnect = new MockKafkaConnect(partition);
mockCoordinator = new MockCoordinator(kafkaControlAgent);
mockCoordinator.start();
configs = KafkaConnectConfigs.newBuilder()
.build();
initializeParticipant();
@@ -66,26 +67,19 @@ public class TestConnectTransactionParticipant {
@ParameterizedTest
@EnumSource(value = CoordinatorFailureTestScenarios.class)
public void testAllCoordinatorFailureScenarios(CoordinatorFailureTestScenarios testScenario) {
int expectedRecordsWritten = 0;
try {
assertTrue(mockKafkaConnect.isPaused());
switch (testScenario) {
case REGULAR_SCENARIO:
expectedRecordsWritten += testKafkaConnect.putRecordsToParticipant();
assertTrue(testKafkaConnect.isPaused());
break;
case COORDINATOR_FAILED_AFTER_START_COMMIT:
testKafkaConnect.putRecordsToParticipant();
coordinator.sendEventFromCoordinator(ControlMessage.EventType.START_COMMIT);
testKafkaConnect.putRecordsToParticipant();
triggerAndProcessStartCommit();
// Coordinator Failed
initializeCoordinator();
break;
case COORDINATOR_FAILED_AFTER_END_COMMIT:
testKafkaConnect.putRecordsToParticipant();
coordinator.sendEventFromCoordinator(ControlMessage.EventType.START_COMMIT);
testKafkaConnect.putRecordsToParticipant();
coordinator.sendEventFromCoordinator(ControlMessage.EventType.END_COMMIT);
expectedRecordsWritten += testKafkaConnect.putRecordsToParticipant();
triggerAndProcessStartCommit();
triggerAndProcessEndCommit();
// Coordinator Failed
initializeCoordinator();
break;
@@ -93,18 +87,8 @@ public class TestConnectTransactionParticipant {
throw new HoodieException("Unknown test scenario " + testScenario);
}
// Regular Case or Coordinator Recovery Case
coordinator.sendEventFromCoordinator(ControlMessage.EventType.START_COMMIT);
expectedRecordsWritten += testKafkaConnect.putRecordsToParticipant();
assertTrue(testKafkaConnect.isResumed());
coordinator.sendEventFromCoordinator(ControlMessage.EventType.END_COMMIT);
testKafkaConnect.putRecordsToParticipant();
assertTrue(testKafkaConnect.isPaused());
coordinator.sendEventFromCoordinator(ControlMessage.EventType.ACK_COMMIT);
testKafkaConnect.putRecordsToParticipant();
assertEquals(testHudiWriterProvider.getLatestNumberWrites(), expectedRecordsWritten);
// Ensure Coordinator and participant are in sync in the kafka offsets
assertEquals(participant.getLastKafkaCommittedOffset(), coordinator.getCommittedKafkaOffset());
// Despite failures in the previous commit, a fresh 2-phase commit should PASS.
testTwoPhaseCommit(0);
} catch (Exception exception) {
throw new HoodieException("Unexpected test failure ", exception);
}
@@ -114,62 +98,38 @@ public class TestConnectTransactionParticipant {
@ParameterizedTest
@EnumSource(value = ParticipantFailureTestScenarios.class)
public void testAllParticipantFailureScenarios(ParticipantFailureTestScenarios testScenario) {
int expectedRecordsWritten = 0;
try {
int currentKafkaOffset = 0;
switch (testScenario) {
case FAILURE_BEFORE_START_COMMIT:
testKafkaConnect.putRecordsToParticipant();
// Participant fails
// Participant failing after START_COMMIT will not write any data in this commit cycle.
initializeParticipant();
coordinator.sendEventFromCoordinator(ControlMessage.EventType.START_COMMIT);
expectedRecordsWritten += testKafkaConnect.putRecordsToParticipant();
assertTrue(testKafkaConnect.isResumed());
coordinator.sendEventFromCoordinator(ControlMessage.EventType.END_COMMIT);
testKafkaConnect.putRecordsToParticipant();
assertTrue(testKafkaConnect.isPaused());
coordinator.sendEventFromCoordinator(ControlMessage.EventType.ACK_COMMIT);
testKafkaConnect.putRecordsToParticipant();
assertEquals(testHudiWriterProvider.getLatestNumberWrites(), expectedRecordsWritten);
// Ensure Coordinator and participant are in sync in the kafka offsets
assertEquals(participant.getLastKafkaCommittedOffset(), coordinator.getCommittedKafkaOffset());
break;
case FAILURE_AFTER_START_COMMIT:
testKafkaConnect.putRecordsToParticipant();
coordinator.sendEventFromCoordinator(ControlMessage.EventType.START_COMMIT);
testKafkaConnect.putRecordsToParticipant();
// Participant fails
triggerAndProcessStartCommit();
// Participant failing after START_COMMIT will not write any data in this commit cycle.
initializeParticipant();
testKafkaConnect.putRecordsToParticipant();
coordinator.sendEventFromCoordinator(ControlMessage.EventType.END_COMMIT);
testKafkaConnect.putRecordsToParticipant();
assertTrue(testKafkaConnect.isPaused());
coordinator.sendEventFromCoordinator(ControlMessage.EventType.ACK_COMMIT);
testKafkaConnect.putRecordsToParticipant();
assertEquals(testHudiWriterProvider.getLatestNumberWrites(), expectedRecordsWritten);
// Ensure Coordinator and participant are in sync in the kafka offsets
assertEquals(participant.getLastKafkaCommittedOffset(), coordinator.getCommittedKafkaOffset());
triggerAndProcessEndCommit();
triggerAndProcessAckCommit();
break;
case FAILURE_AFTER_END_COMMIT:
testKafkaConnect.putRecordsToParticipant();
coordinator.sendEventFromCoordinator(ControlMessage.EventType.START_COMMIT);
testKafkaConnect.putRecordsToParticipant();
coordinator.sendEventFromCoordinator(ControlMessage.EventType.END_COMMIT);
testKafkaConnect.putRecordsToParticipant();
// Participant fails
// Regular Case or Coordinator Recovery Case
triggerAndProcessStartCommit();
triggerAndProcessEndCommit();
initializeParticipant();
testKafkaConnect.putRecordsToParticipant();
coordinator.sendEventFromCoordinator(ControlMessage.EventType.END_COMMIT);
testKafkaConnect.putRecordsToParticipant();
assertTrue(testKafkaConnect.isPaused());
coordinator.sendEventFromCoordinator(ControlMessage.EventType.ACK_COMMIT);
testKafkaConnect.putRecordsToParticipant();
assertEquals(testHudiWriterProvider.getLatestNumberWrites(), expectedRecordsWritten);
// Ensure Coordinator and participant are in sync in the kafka offsets
assertEquals(participant.getLastKafkaCommittedOffset(), coordinator.getCommittedKafkaOffset());
triggerAndProcessAckCommit();
// Participant failing after and END_COMMIT should not cause issues with the present commit,
// since the data would have been written by previous participant before failing
// and hence moved the kafka offset.
currentKafkaOffset = NUM_RECORDS_BATCH;
break;
default:
throw new HoodieException("Unknown test scenario " + testScenario);
}
// Despite failures in the previous commit, a fresh 2-phase commit should PASS.
testTwoPhaseCommit(currentKafkaOffset);
} catch (Exception exception) {
throw new HoodieException("Unexpected test failure ", exception);
}
@@ -180,15 +140,49 @@ public class TestConnectTransactionParticipant {
participant = new ConnectTransactionParticipant(
partition,
kafkaControlAgent,
testKafkaConnect,
mockKafkaConnect,
testHudiWriterProvider);
testKafkaConnect.setParticipant(participant);
mockKafkaConnect.setParticipant(participant);
participant.start();
}
private void initializeCoordinator() {
coordinator = new MockCoordinator(kafkaControlAgent);
coordinator.start();
mockCoordinator = new MockCoordinator(kafkaControlAgent);
mockCoordinator.start();
}
// Test and validate result of a single 2 Phase commit from START_COMMIT to ACK_COMMIT.
// Validates that NUM_RECORDS_BATCH number of kafka records are written,
// and the kafka offset only increments by NUM_RECORDS_BATCH.
private void testTwoPhaseCommit(long currentKafkaOffset) {
triggerAndProcessStartCommit();
triggerAndProcessEndCommit();
triggerAndProcessAckCommit();
// Validate records written, current kafka offset and kafka offsets committed across
// coordinator and participant are in sync despite failure scenarios.
assertEquals(NUM_RECORDS_BATCH, testHudiWriterProvider.getLatestNumberWrites());
assertEquals((currentKafkaOffset + NUM_RECORDS_BATCH), mockKafkaConnect.getCurrentKafkaOffset());
// Ensure Coordinator and participant are in sync in the kafka offsets
assertEquals(participant.getLastKafkaCommittedOffset(), mockCoordinator.getCommittedKafkaOffset());
}
private void triggerAndProcessStartCommit() {
mockCoordinator.sendEventFromCoordinator(ControlMessage.EventType.START_COMMIT);
mockKafkaConnect.publishBatchRecordsToParticipant(NUM_RECORDS_BATCH);
assertTrue(mockKafkaConnect.isResumed());
}
private void triggerAndProcessEndCommit() {
mockCoordinator.sendEventFromCoordinator(ControlMessage.EventType.END_COMMIT);
mockKafkaConnect.publishBatchRecordsToParticipant(0);
assertTrue(mockKafkaConnect.isPaused());
}
private void triggerAndProcessAckCommit() {
mockCoordinator.sendEventFromCoordinator(ControlMessage.EventType.ACK_COMMIT);
mockKafkaConnect.publishBatchRecordsToParticipant(0);
assertTrue(mockKafkaConnect.isPaused());
}
private static class MockCoordinator implements TransactionCoordinator {
@@ -279,5 +273,4 @@ public class TestConnectTransactionParticipant {
FAILURE_AFTER_START_COMMIT,
FAILURE_AFTER_END_COMMIT,
}
}

View File

@@ -25,7 +25,6 @@ import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.sink.SinkRecord;
import org.apache.kafka.connect.sink.SinkTaskContext;
import java.io.IOException;
import java.util.Arrays;
import java.util.Map;
import java.util.Set;
@@ -33,20 +32,26 @@ import java.util.Set;
/**
* Helper class that emulates the Kafka Connect f/w and additionally
* implements {@link SinkTaskContext} for testing purposes.
*
* Everytime the consumer (Participant) calls resume, a fixed
* batch of kafka records from the current offset are pushed. If
* the consumer resets the offsets, then a fresh batch of records
* are sent from the new offset.
*/
public class TestKafkaConnect implements SinkTaskContext {
public class MockKafkaConnect implements SinkTaskContext {
private static final int NUM_RECORDS_BATCH = 5;
private final TopicPartition testPartition;
private TransactionParticipant participant;
private long currentKafkaOffset;
private boolean isPaused;
private boolean isResetOffset;
public TestKafkaConnect(TopicPartition testPartition) {
public MockKafkaConnect(TopicPartition testPartition) {
this.testPartition = testPartition;
isPaused = false;
currentKafkaOffset = 0L;
isResetOffset = false;
}
public void setParticipant(TransactionParticipant participant) {
@@ -61,23 +66,6 @@ public class TestKafkaConnect implements SinkTaskContext {
return !isPaused;
}
public int putRecordsToParticipant() throws IOException {
for (int i = 1; i <= NUM_RECORDS_BATCH; i++) {
participant.buffer(getNextKafkaRecord());
}
participant.processRecords();
return NUM_RECORDS_BATCH;
}
public SinkRecord getNextKafkaRecord() {
return new SinkRecord(testPartition.topic(),
testPartition.partition(),
Schema.OPTIONAL_BYTES_SCHEMA,
("key-" + currentKafkaOffset).getBytes(),
Schema.OPTIONAL_BYTES_SCHEMA,
"value".getBytes(), currentKafkaOffset++);
}
public long getCurrentKafkaOffset() {
return currentKafkaOffset;
}
@@ -100,7 +88,7 @@ public class TestKafkaConnect implements SinkTaskContext {
public void offset(Map<TopicPartition, Long> offsets) {
for (TopicPartition tp : offsets.keySet()) {
if (tp.equals(testPartition)) {
currentKafkaOffset = offsets.get(tp);
resetOffset(offsets.get(tp));
}
}
}
@@ -108,7 +96,7 @@ public class TestKafkaConnect implements SinkTaskContext {
@Override
public void offset(TopicPartition tp, long offset) {
if (tp.equals(testPartition)) {
currentKafkaOffset = offset;
resetOffset(offset);
}
}
@@ -129,6 +117,33 @@ public class TestKafkaConnect implements SinkTaskContext {
@Override
public void requestCommit() {
}
public int publishBatchRecordsToParticipant(int numRecords) {
// Send NUM_RECORDS_BATCH to participant
// If client resets offset, send another batch starting
// from the new reset offset value
do {
isResetOffset = false;
for (int i = 1; i <= numRecords; i++) {
participant.buffer(getNextKafkaRecord());
}
participant.processRecords();
} while (isResetOffset);
return numRecords;
}
private SinkRecord getNextKafkaRecord() {
return new SinkRecord(testPartition.topic(),
testPartition.partition(),
Schema.OPTIONAL_BYTES_SCHEMA,
("key-" + currentKafkaOffset).getBytes(),
Schema.OPTIONAL_BYTES_SCHEMA,
"value".getBytes(), currentKafkaOffset++);
}
private void resetOffset(long newOffset) {
currentKafkaOffset = newOffset;
isResetOffset = true;
}
}