[HUDI-2890] Kafka Connect: Fix failed writes and avoid table service concurrent operations (#4211)
* Fix kafka connect readme * Fix handling of errors in write records for kafka connect * By default, ensure we skip error records and keep the pipeline alive * Fix indentation Co-authored-by: Rajesh Mahindra <rmahindra@Rajeshs-MacBook-Pro.local>
This commit is contained in:
@@ -50,7 +50,7 @@ import static org.mockito.Mockito.mock;
|
||||
public class TestConnectTransactionCoordinator {
|
||||
|
||||
private static final String TOPIC_NAME = "kafka-connect-test-topic";
|
||||
private static final int NUM_PARTITIONS = 4;
|
||||
private static final int TOTAL_KAFKA_PARTITIONS = 4;
|
||||
private static final int MAX_COMMIT_ROUNDS = 5;
|
||||
private static final int TEST_TIMEOUT_SECS = 60;
|
||||
|
||||
@@ -63,10 +63,6 @@ public class TestConnectTransactionCoordinator {
|
||||
@BeforeEach
|
||||
public void setUp() throws Exception {
|
||||
transactionServices = new MockConnectTransactionServices();
|
||||
configs = KafkaConnectConfigs.newBuilder()
|
||||
.withCommitIntervalSecs(1L)
|
||||
.withCoordinatorWriteTimeoutSecs(1L)
|
||||
.build();
|
||||
latch = new CountDownLatch(1);
|
||||
}
|
||||
|
||||
@@ -77,13 +73,22 @@ public class TestConnectTransactionCoordinator {
|
||||
participant = new MockParticipant(kafkaControlAgent, latch, scenario, MAX_COMMIT_ROUNDS);
|
||||
participant.start();
|
||||
|
||||
KafkaConnectConfigs.Builder configBuilder = KafkaConnectConfigs.newBuilder()
|
||||
.withCommitIntervalSecs(1L)
|
||||
.withCoordinatorWriteTimeoutSecs(1L);
|
||||
|
||||
if (scenario.equals(MockParticipant.TestScenarios.SUBSET_WRITE_STATUS_FAILED)) {
|
||||
configBuilder.withAllowCommitOnErrors(false);
|
||||
}
|
||||
configs = configBuilder.build();
|
||||
|
||||
// Test the coordinator using the mock participant
|
||||
TransactionCoordinator coordinator = new ConnectTransactionCoordinator(
|
||||
configs,
|
||||
new TopicPartition(TOPIC_NAME, 0),
|
||||
kafkaControlAgent,
|
||||
transactionServices,
|
||||
(bootstrapServers, topicName) -> NUM_PARTITIONS);
|
||||
(bootstrapServers, topicName) -> TOTAL_KAFKA_PARTITIONS);
|
||||
coordinator.start();
|
||||
|
||||
latch.await(TEST_TIMEOUT_SECS, TimeUnit.SECONDS);
|
||||
@@ -119,7 +124,7 @@ public class TestConnectTransactionCoordinator {
|
||||
this.latch = latch;
|
||||
this.testScenario = testScenario;
|
||||
this.maxNumberCommitRounds = maxNumberCommitRounds;
|
||||
this.partition = new TopicPartition(TOPIC_NAME, (NUM_PARTITIONS - 1));
|
||||
this.partition = new TopicPartition(TOPIC_NAME, (TOTAL_KAFKA_PARTITIONS - 1));
|
||||
this.kafkaOffsetsCommitted = new HashMap<>();
|
||||
expectedMsgType = ControlMessage.EventType.START_COMMIT;
|
||||
numberCommitRounds = 0;
|
||||
@@ -162,39 +167,40 @@ public class TestConnectTransactionCoordinator {
|
||||
|
||||
private void testScenarios(ControlMessage message) {
|
||||
assertEquals(expectedMsgType, message.getType());
|
||||
|
||||
switch (message.getType()) {
|
||||
case START_COMMIT:
|
||||
expectedMsgType = ControlMessage.EventType.END_COMMIT;
|
||||
break;
|
||||
case END_COMMIT:
|
||||
assertEquals(kafkaOffsetsCommitted, message.getCoordinatorInfo().getGlobalKafkaCommitOffsets());
|
||||
int numSuccessPartitions;
|
||||
int numPartitionsThatReportWriteStatus;
|
||||
Map<Integer, Long> kafkaOffsets = new HashMap<>();
|
||||
List<ControlMessage> controlEvents = new ArrayList<>();
|
||||
// Prepare the WriteStatuses for all partitions
|
||||
for (int i = 1; i <= NUM_PARTITIONS; i++) {
|
||||
try {
|
||||
long kafkaOffset = (long) (Math.random() * 10000);
|
||||
kafkaOffsets.put(i, kafkaOffset);
|
||||
ControlMessage event = successWriteStatus(
|
||||
message.getCommitTime(),
|
||||
new TopicPartition(TOPIC_NAME, i),
|
||||
kafkaOffset);
|
||||
controlEvents.add(event);
|
||||
} catch (Exception exception) {
|
||||
throw new HoodieException("Fatal error sending control event to Coordinator");
|
||||
}
|
||||
}
|
||||
|
||||
switch (testScenario) {
|
||||
case ALL_CONNECT_TASKS_SUCCESS:
|
||||
numSuccessPartitions = NUM_PARTITIONS;
|
||||
composeControlEvent(message.getCommitTime(), false, kafkaOffsets, controlEvents);
|
||||
numPartitionsThatReportWriteStatus = TOTAL_KAFKA_PARTITIONS;
|
||||
// This commit round should succeed, and the kafka offsets getting committed
|
||||
kafkaOffsetsCommitted.putAll(kafkaOffsets);
|
||||
expectedMsgType = ControlMessage.EventType.ACK_COMMIT;
|
||||
break;
|
||||
case SUBSET_WRITE_STATUS_FAILED_BUT_IGNORED:
|
||||
composeControlEvent(message.getCommitTime(), true, kafkaOffsets, controlEvents);
|
||||
numPartitionsThatReportWriteStatus = TOTAL_KAFKA_PARTITIONS;
|
||||
// Despite error records, this commit round should succeed, and the kafka offsets getting committed
|
||||
kafkaOffsetsCommitted.putAll(kafkaOffsets);
|
||||
expectedMsgType = ControlMessage.EventType.ACK_COMMIT;
|
||||
break;
|
||||
case SUBSET_WRITE_STATUS_FAILED:
|
||||
composeControlEvent(message.getCommitTime(), true, kafkaOffsets, controlEvents);
|
||||
numPartitionsThatReportWriteStatus = TOTAL_KAFKA_PARTITIONS;
|
||||
// This commit round should fail, and a new commit round should start without kafka offsets getting committed
|
||||
expectedMsgType = ControlMessage.EventType.START_COMMIT;
|
||||
break;
|
||||
case SUBSET_CONNECT_TASKS_FAILED:
|
||||
numSuccessPartitions = NUM_PARTITIONS / 2;
|
||||
composeControlEvent(message.getCommitTime(), false, kafkaOffsets, controlEvents);
|
||||
numPartitionsThatReportWriteStatus = TOTAL_KAFKA_PARTITIONS / 2;
|
||||
// This commit round should fail, and a new commit round should start without kafka offsets getting committed
|
||||
expectedMsgType = ControlMessage.EventType.START_COMMIT;
|
||||
break;
|
||||
default:
|
||||
@@ -202,7 +208,7 @@ public class TestConnectTransactionCoordinator {
|
||||
}
|
||||
|
||||
// Send events based on test scenario
|
||||
for (int i = 0; i < numSuccessPartitions; i++) {
|
||||
for (int i = 0; i < numPartitionsThatReportWriteStatus; i++) {
|
||||
kafkaControlAgent.publishMessage(controlEvents.get(i));
|
||||
}
|
||||
break;
|
||||
@@ -227,18 +233,36 @@ public class TestConnectTransactionCoordinator {
|
||||
|
||||
public enum TestScenarios {
|
||||
SUBSET_CONNECT_TASKS_FAILED,
|
||||
SUBSET_WRITE_STATUS_FAILED,
|
||||
SUBSET_WRITE_STATUS_FAILED_BUT_IGNORED,
|
||||
ALL_CONNECT_TASKS_SUCCESS
|
||||
}
|
||||
|
||||
private static ControlMessage successWriteStatus(String commitTime,
|
||||
TopicPartition partition,
|
||||
long kafkaOffset) throws Exception {
|
||||
// send WS
|
||||
WriteStatus writeStatus = new WriteStatus();
|
||||
WriteStatus status = new WriteStatus(false, 1.0);
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
status.markSuccess(mock(HoodieRecord.class), Option.empty());
|
||||
private static void composeControlEvent(String commitTime, boolean shouldIncludeFailedRecords, Map<Integer, Long> kafkaOffsets, List<ControlMessage> controlEvents) {
|
||||
// Prepare the WriteStatuses for all partitions
|
||||
for (int i = 1; i <= TOTAL_KAFKA_PARTITIONS; i++) {
|
||||
try {
|
||||
long kafkaOffset = (long) (Math.random() * 10000);
|
||||
kafkaOffsets.put(i, kafkaOffset);
|
||||
ControlMessage event = composeWriteStatusResponse(
|
||||
commitTime,
|
||||
new TopicPartition(TOPIC_NAME, i),
|
||||
kafkaOffset,
|
||||
shouldIncludeFailedRecords);
|
||||
controlEvents.add(event);
|
||||
} catch (Exception exception) {
|
||||
throw new HoodieException("Fatal error sending control event to Coordinator");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static ControlMessage composeWriteStatusResponse(String commitTime,
|
||||
TopicPartition partition,
|
||||
long kafkaOffset,
|
||||
boolean includeFailedRecords) throws Exception {
|
||||
// send WS
|
||||
WriteStatus writeStatus = includeFailedRecords ? getSubsetFailedRecordsWriteStatus() : getAllSuccessfulRecordsWriteStatus();
|
||||
|
||||
return ControlMessage.newBuilder()
|
||||
.setType(ControlMessage.EventType.WRITE_STATUS)
|
||||
.setTopicName(partition.topic())
|
||||
@@ -255,4 +279,27 @@ public class TestConnectTransactionCoordinator {
|
||||
).build();
|
||||
}
|
||||
}
|
||||
|
||||
private static WriteStatus getAllSuccessfulRecordsWriteStatus() {
|
||||
// send WS
|
||||
WriteStatus status = new WriteStatus(false, 0.0);
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
status.markSuccess(mock(HoodieRecord.class), Option.empty());
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
private static WriteStatus getSubsetFailedRecordsWriteStatus() {
|
||||
// send WS
|
||||
WriteStatus status = new WriteStatus(false, 0.0);
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
if (i % 10 == 0) {
|
||||
status.markFailure(mock(HoodieRecord.class), new Throwable("Error writing record on disk"), Option.empty());
|
||||
} else {
|
||||
status.markSuccess(mock(HoodieRecord.class), Option.empty());
|
||||
}
|
||||
}
|
||||
status.setGlobalError(new Throwable("More than one records failed to be written to storage"));
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -46,8 +46,9 @@ public class MockConnectTransactionServices implements ConnectTransactionService
|
||||
}
|
||||
|
||||
@Override
|
||||
public void endCommit(String commitTime, List<WriteStatus> writeStatuses, Map<String, String> extraMetadata) {
|
||||
public boolean endCommit(String commitTime, List<WriteStatus> writeStatuses, Map<String, String> extraMetadata) {
|
||||
assertEquals(String.valueOf(this.commitTime), commitTime);
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@@ -0,0 +1,30 @@
|
||||
###
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
###
|
||||
log4j.rootLogger=WARN, CONSOLE
|
||||
log4j.logger.org.apache.hudi=DEBUG
|
||||
log4j.logger.org.apache.hadoop.hbase=ERROR
|
||||
|
||||
# CONSOLE is set to be a ConsoleAppender.
|
||||
log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
|
||||
# CONSOLE uses PatternLayout.
|
||||
log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
|
||||
log4j.appender.CONSOLE.layout.ConversionPattern=[%-5p] %d %c %x - %m%n
|
||||
log4j.appender.CONSOLE.filter.a=org.apache.log4j.varia.LevelRangeFilter
|
||||
log4j.appender.CONSOLE.filter.a.AcceptOnMatch=true
|
||||
log4j.appender.CONSOLE.filter.a.LevelMin=WARN
|
||||
log4j.appender.CONSOLE.filter.a.LevelMax=FATAL
|
||||
@@ -0,0 +1,31 @@
|
||||
###
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
###
|
||||
log4j.rootLogger=WARN, CONSOLE
|
||||
log4j.logger.org.apache=INFO
|
||||
log4j.logger.org.apache.hudi=DEBUG
|
||||
log4j.logger.org.apache.hadoop.hbase=ERROR
|
||||
|
||||
# A1 is set to be a ConsoleAppender.
|
||||
log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
|
||||
# A1 uses PatternLayout.
|
||||
log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
|
||||
log4j.appender.CONSOLE.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
|
||||
log4j.appender.CONSOLE.filter.a=org.apache.log4j.varia.LevelRangeFilter
|
||||
log4j.appender.CONSOLE.filter.a.AcceptOnMatch=true
|
||||
log4j.appender.CONSOLE.filter.a.LevelMin=WARN
|
||||
log4j.appender.CONSOLE.filter.a.LevelMax=FATAL
|
||||
Reference in New Issue
Block a user