[HUDI-2469] [Kafka Connect] Replace json based payload with protobuf for Transaction protocol. (#3694)
* Substitue Control Event with protobuf * Fix tests * Fix unit tests * Add javadocs * Add javadocs * Address reviewer comments Co-authored-by: Rajesh Mahindra <rmahindra@Rajeshs-MacBook-Pro.local>
This commit is contained in:
@@ -25,6 +25,7 @@ import org.apache.hudi.connect.transaction.TransactionCoordinator;
|
||||
import org.apache.hudi.connect.transaction.TransactionParticipant;
|
||||
import org.apache.hudi.connect.writers.KafkaConnectConfigs;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
import org.apache.hudi.exception.HoodieIOException;
|
||||
|
||||
import org.apache.kafka.clients.consumer.OffsetAndMetadata;
|
||||
import org.apache.kafka.common.TopicPartition;
|
||||
@@ -36,7 +37,6 @@ import org.apache.kafka.connect.sink.SinkTask;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
@@ -51,7 +51,6 @@ public class HoodieSinkTask extends SinkTask {
|
||||
|
||||
public static final String TASK_ID_CONFIG_NAME = "task.id";
|
||||
private static final Logger LOG = LogManager.getLogger(HoodieSinkTask.class);
|
||||
private static final int COORDINATOR_KAFKA_PARTITION = 0;
|
||||
|
||||
private final Map<TopicPartition, TransactionCoordinator> transactionCoordinators;
|
||||
private final Map<TopicPartition, TransactionParticipant> transactionParticipants;
|
||||
@@ -113,7 +112,7 @@ public class HoodieSinkTask extends SinkTask {
|
||||
}
|
||||
try {
|
||||
transactionParticipants.get(partition).processRecords();
|
||||
} catch (IOException exception) {
|
||||
} catch (HoodieIOException exception) {
|
||||
throw new RetriableException("Intermittent write errors for Hudi "
|
||||
+ " for the topic/partition: " + partition.topic() + ":" + partition.partition()
|
||||
+ " , ensuring kafka connect will retry ", exception);
|
||||
@@ -164,7 +163,7 @@ public class HoodieSinkTask extends SinkTask {
|
||||
// make sure we apply the WAL, and only reuse the temp file if the starting offset is still
|
||||
// valid. For now, we prefer the simpler solution that may result in a bit of wasted effort.
|
||||
for (TopicPartition partition : partitions) {
|
||||
if (partition.partition() == COORDINATOR_KAFKA_PARTITION) {
|
||||
if (partition.partition() == ConnectTransactionCoordinator.COORDINATOR_KAFKA_PARTITION) {
|
||||
if (transactionCoordinators.containsKey(partition)) {
|
||||
transactionCoordinators.get(partition).stop();
|
||||
transactionCoordinators.remove(partition);
|
||||
@@ -188,7 +187,7 @@ public class HoodieSinkTask extends SinkTask {
|
||||
for (TopicPartition partition : partitions) {
|
||||
try {
|
||||
// If the partition is 0, instantiate the Leader
|
||||
if (partition.partition() == COORDINATOR_KAFKA_PARTITION) {
|
||||
if (partition.partition() == ConnectTransactionCoordinator.COORDINATOR_KAFKA_PARTITION) {
|
||||
ConnectTransactionCoordinator coordinator = new ConnectTransactionCoordinator(
|
||||
connectConfigs,
|
||||
partition,
|
||||
|
||||
@@ -18,17 +18,16 @@
|
||||
|
||||
package org.apache.hudi.connect.kafka;
|
||||
|
||||
import org.apache.hudi.connect.transaction.ControlEvent;
|
||||
import org.apache.hudi.connect.ControlMessage;
|
||||
import org.apache.hudi.connect.transaction.TransactionCoordinator;
|
||||
import org.apache.hudi.connect.transaction.TransactionParticipant;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import org.apache.kafka.clients.consumer.CommitFailedException;
|
||||
import org.apache.kafka.clients.consumer.ConsumerConfig;
|
||||
import org.apache.kafka.clients.consumer.ConsumerRecord;
|
||||
import org.apache.kafka.clients.consumer.ConsumerRecords;
|
||||
import org.apache.kafka.clients.consumer.KafkaConsumer;
|
||||
import org.apache.kafka.common.serialization.Deserializer;
|
||||
import org.apache.kafka.common.serialization.ByteArrayDeserializer;
|
||||
import org.apache.kafka.common.serialization.StringDeserializer;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
@@ -66,7 +65,7 @@ public class KafkaConnectControlAgent implements KafkaControlAgent {
|
||||
// List of TransactionParticipants per Kafka Topic
|
||||
private final Map<String, ConcurrentLinkedQueue<TransactionParticipant>> partitionWorkers;
|
||||
private final KafkaControlProducer producer;
|
||||
private KafkaConsumer<String, ControlEvent> consumer;
|
||||
private KafkaConsumer<String, byte[]> consumer;
|
||||
|
||||
public KafkaConnectControlAgent(String bootstrapServers,
|
||||
String controlTopicName) {
|
||||
@@ -118,7 +117,7 @@ public class KafkaConnectControlAgent implements KafkaControlAgent {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void publishMessage(ControlEvent message) {
|
||||
public void publishMessage(ControlMessage message) {
|
||||
producer.publishMessage(message);
|
||||
}
|
||||
|
||||
@@ -128,28 +127,28 @@ public class KafkaConnectControlAgent implements KafkaControlAgent {
|
||||
// Todo fetch the worker id or name instead of a uuid.
|
||||
props.put(ConsumerConfig.GROUP_ID_CONFIG, "hudi-control-group" + UUID.randomUUID().toString());
|
||||
props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
|
||||
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, KafkaJsonDeserializer.class);
|
||||
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class);
|
||||
|
||||
// Since we are using Kafka Control Topic as a RPC like interface,
|
||||
// we want consumers to only process messages that are sent after they come online
|
||||
props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "latest");
|
||||
|
||||
consumer = new KafkaConsumer<>(props, new StringDeserializer(),
|
||||
new KafkaJsonDeserializer<>(ControlEvent.class));
|
||||
consumer = new KafkaConsumer<>(props, new StringDeserializer(), new ByteArrayDeserializer());
|
||||
|
||||
consumer.subscribe(Collections.singletonList(controlTopicName));
|
||||
|
||||
executorService.submit(() -> {
|
||||
while (true) {
|
||||
ConsumerRecords<String, ControlEvent> records;
|
||||
ConsumerRecords<String, byte[]> records;
|
||||
records = consumer.poll(Duration.ofMillis(KAFKA_POLL_TIMEOUT_MS));
|
||||
for (ConsumerRecord<String, ControlEvent> record : records) {
|
||||
for (ConsumerRecord<String, byte[]> record : records) {
|
||||
try {
|
||||
LOG.debug(String.format("Kafka consumerGroupId = %s topic = %s, partition = %s, offset = %s, customer = %s, country = %s",
|
||||
"", record.topic(), record.partition(), record.offset(), record.key(), record.value()));
|
||||
ControlEvent message = record.value();
|
||||
String senderTopic = message.senderPartition().topic();
|
||||
if (message.getSenderType().equals(ControlEvent.SenderType.COORDINATOR)) {
|
||||
ControlMessage message = ControlMessage.parseFrom(record.value());
|
||||
String senderTopic = message.getTopicName();
|
||||
|
||||
if (message.getReceiverType().equals(ControlMessage.EntityType.PARTICIPANT)) {
|
||||
if (partitionWorkers.containsKey(senderTopic)) {
|
||||
for (TransactionParticipant partitionWorker : partitionWorkers.get(senderTopic)) {
|
||||
partitionWorker.processControlEvent(message);
|
||||
@@ -157,11 +156,9 @@ public class KafkaConnectControlAgent implements KafkaControlAgent {
|
||||
} else {
|
||||
LOG.warn(String.format("Failed to send message for unregistered participants for topic %s", senderTopic));
|
||||
}
|
||||
} else if (message.getSenderType().equals(ControlEvent.SenderType.PARTICIPANT)) {
|
||||
} else if (message.getReceiverType().equals(ControlMessage.EntityType.COORDINATOR)) {
|
||||
if (topicCoordinators.containsKey(senderTopic)) {
|
||||
topicCoordinators.get(senderTopic).processControlEvent(message);
|
||||
} else {
|
||||
LOG.warn(String.format("Failed to send message for unregistered coordinator for topic %s", senderTopic));
|
||||
}
|
||||
} else {
|
||||
LOG.warn(String.format("Sender type of Control Message unknown %s", message.getSenderType().name()));
|
||||
@@ -200,31 +197,4 @@ public class KafkaConnectControlAgent implements KafkaControlAgent {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Deserializes the incoming Kafka records for the Control Topic.
|
||||
*
|
||||
* @param <T> represents the object that is sent over the Control Topic.
|
||||
*/
|
||||
public static class KafkaJsonDeserializer<T> implements Deserializer<T> {
|
||||
|
||||
private static final Logger LOG = LogManager.getLogger(KafkaJsonDeserializer.class);
|
||||
private final Class<T> type;
|
||||
|
||||
KafkaJsonDeserializer(Class<T> type) {
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
@Override
|
||||
public T deserialize(String s, byte[] bytes) {
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
T obj = null;
|
||||
try {
|
||||
obj = mapper.readValue(bytes, type);
|
||||
} catch (Exception e) {
|
||||
LOG.error(e.getMessage());
|
||||
}
|
||||
return obj;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
|
||||
package org.apache.hudi.connect.kafka;
|
||||
|
||||
import org.apache.hudi.connect.transaction.ControlEvent;
|
||||
import org.apache.hudi.connect.ControlMessage;
|
||||
import org.apache.hudi.connect.transaction.TransactionCoordinator;
|
||||
import org.apache.hudi.connect.transaction.TransactionParticipant;
|
||||
|
||||
@@ -37,5 +37,5 @@ public interface KafkaControlAgent {
|
||||
|
||||
void deregisterTransactionCoordinator(TransactionCoordinator coordinator);
|
||||
|
||||
void publishMessage(ControlEvent message);
|
||||
void publishMessage(ControlMessage message);
|
||||
}
|
||||
|
||||
@@ -18,16 +18,13 @@
|
||||
|
||||
package org.apache.hudi.connect.kafka;
|
||||
|
||||
import org.apache.hudi.connect.transaction.ControlEvent;
|
||||
import org.apache.hudi.connect.ControlMessage;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonAutoDetect;
|
||||
import com.fasterxml.jackson.annotation.PropertyAccessor;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import org.apache.kafka.clients.producer.KafkaProducer;
|
||||
import org.apache.kafka.clients.producer.Producer;
|
||||
import org.apache.kafka.clients.producer.ProducerConfig;
|
||||
import org.apache.kafka.clients.producer.ProducerRecord;
|
||||
import org.apache.kafka.common.serialization.Serializer;
|
||||
import org.apache.kafka.common.serialization.ByteArraySerializer;
|
||||
import org.apache.kafka.common.serialization.StringSerializer;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
@@ -45,7 +42,7 @@ public class KafkaControlProducer {
|
||||
|
||||
private final String bootstrapServers;
|
||||
private final String controlTopicName;
|
||||
private Producer<String, ControlEvent> producer;
|
||||
private Producer<String, byte[]> producer;
|
||||
|
||||
public KafkaControlProducer(String bootstrapServers, String controlTopicName) {
|
||||
this.bootstrapServers = bootstrapServers;
|
||||
@@ -57,12 +54,12 @@ public class KafkaControlProducer {
|
||||
Properties props = new Properties();
|
||||
props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
|
||||
props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
|
||||
props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, KafkaJsonSerializer.class);
|
||||
props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class);
|
||||
|
||||
producer = new KafkaProducer<>(
|
||||
props,
|
||||
new StringSerializer(),
|
||||
new KafkaJsonSerializer()
|
||||
new ByteArraySerializer()
|
||||
);
|
||||
}
|
||||
|
||||
@@ -70,28 +67,9 @@ public class KafkaControlProducer {
|
||||
producer.close();
|
||||
}
|
||||
|
||||
public void publishMessage(ControlEvent message) {
|
||||
ProducerRecord<String, ControlEvent> record
|
||||
= new ProducerRecord<>(controlTopicName, message.key(), message);
|
||||
public void publishMessage(ControlMessage message) {
|
||||
ProducerRecord<String, byte[]> record
|
||||
= new ProducerRecord<>(controlTopicName, message.getType().name(), message.toByteArray());
|
||||
producer.send(record);
|
||||
}
|
||||
|
||||
public static class KafkaJsonSerializer implements Serializer<ControlEvent> {
|
||||
|
||||
private static final Logger LOG = LogManager.getLogger(KafkaJsonSerializer.class);
|
||||
|
||||
@Override
|
||||
public byte[] serialize(String topic, ControlEvent data) {
|
||||
byte[] retVal = null;
|
||||
ObjectMapper objectMapper = new ObjectMapper();
|
||||
objectMapper.setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.ANY);
|
||||
|
||||
try {
|
||||
retVal = objectMapper.writeValueAsBytes(data);
|
||||
} catch (Exception e) {
|
||||
LOG.error("Fatal error during serialization of Kafka Control Message ", e);
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,6 +20,7 @@ package org.apache.hudi.connect.transaction;
|
||||
|
||||
import org.apache.hudi.client.WriteStatus;
|
||||
import org.apache.hudi.common.util.StringUtils;
|
||||
import org.apache.hudi.connect.ControlMessage;
|
||||
import org.apache.hudi.connect.kafka.KafkaControlAgent;
|
||||
import org.apache.hudi.connect.utils.KafkaConnectUtils;
|
||||
import org.apache.hudi.connect.writers.ConnectTransactionServices;
|
||||
@@ -53,6 +54,8 @@ import java.util.stream.Collectors;
|
||||
*/
|
||||
public class ConnectTransactionCoordinator implements TransactionCoordinator, Runnable {
|
||||
|
||||
public static final int COORDINATOR_KAFKA_PARTITION = 0;
|
||||
|
||||
private static final Logger LOG = LogManager.getLogger(ConnectTransactionCoordinator.class);
|
||||
private static final String BOOTSTRAP_SERVERS_CFG = "bootstrap.servers";
|
||||
private static final String KAFKA_OFFSET_KEY = "kafka.commit.offsets";
|
||||
@@ -158,17 +161,18 @@ public class ConnectTransactionCoordinator implements TransactionCoordinator, Ru
|
||||
}
|
||||
|
||||
@Override
|
||||
public void processControlEvent(ControlEvent message) {
|
||||
public void processControlEvent(ControlMessage message) {
|
||||
CoordinatorEvent.CoordinatorEventType type;
|
||||
if (message.getMsgType().equals(ControlEvent.MsgType.WRITE_STATUS)) {
|
||||
if (message.getType().equals(ControlMessage.EventType.WRITE_STATUS)) {
|
||||
type = CoordinatorEvent.CoordinatorEventType.WRITE_STATUS;
|
||||
} else {
|
||||
LOG.warn(String.format("The Coordinator should not be receiving messages of type %s", message.getMsgType().name()));
|
||||
LOG.warn(String.format("The Coordinator should not be receiving messages of type %s",
|
||||
message.getType().name()));
|
||||
return;
|
||||
}
|
||||
|
||||
CoordinatorEvent event = new CoordinatorEvent(type,
|
||||
message.senderPartition().topic(),
|
||||
message.getTopicName(),
|
||||
message.getCommitTime());
|
||||
event.setMessage(message);
|
||||
submitEvent(event);
|
||||
@@ -242,15 +246,7 @@ public class ConnectTransactionCoordinator implements TransactionCoordinator, Ru
|
||||
partitionsWriteStatusReceived.clear();
|
||||
try {
|
||||
currentCommitTime = transactionServices.startCommit();
|
||||
ControlEvent message = new ControlEvent.Builder(
|
||||
ControlEvent.MsgType.START_COMMIT,
|
||||
ControlEvent.SenderType.COORDINATOR,
|
||||
currentCommitTime,
|
||||
partition)
|
||||
.setCoordinatorInfo(
|
||||
new ControlEvent.CoordinatorInfo(globalCommittedKafkaOffsets))
|
||||
.build();
|
||||
kafkaControlClient.publishMessage(message);
|
||||
kafkaControlClient.publishMessage(buildControlMessage(ControlMessage.EventType.START_COMMIT));
|
||||
currentState = State.STARTED_COMMIT;
|
||||
// schedule a timeout for ending the current commit
|
||||
submitEvent(new CoordinatorEvent(CoordinatorEvent.CoordinatorEventType.END_COMMIT,
|
||||
@@ -268,14 +264,7 @@ public class ConnectTransactionCoordinator implements TransactionCoordinator, Ru
|
||||
|
||||
private void endExistingCommit() {
|
||||
try {
|
||||
ControlEvent message = new ControlEvent.Builder(
|
||||
ControlEvent.MsgType.END_COMMIT,
|
||||
ControlEvent.SenderType.COORDINATOR,
|
||||
currentCommitTime,
|
||||
partition)
|
||||
.setCoordinatorInfo(new ControlEvent.CoordinatorInfo(globalCommittedKafkaOffsets))
|
||||
.build();
|
||||
kafkaControlClient.publishMessage(message);
|
||||
kafkaControlClient.publishMessage(buildControlMessage(ControlMessage.EventType.END_COMMIT));
|
||||
} catch (Exception exception) {
|
||||
LOG.warn(String.format("Could not send END_COMMIT message for partition %s and commitTime %s", partition, currentCommitTime), exception);
|
||||
}
|
||||
@@ -289,13 +278,11 @@ public class ConnectTransactionCoordinator implements TransactionCoordinator, Ru
|
||||
configs.getCoordinatorWriteTimeoutSecs(), TimeUnit.SECONDS);
|
||||
}
|
||||
|
||||
private void onReceiveWriteStatus(ControlEvent message) {
|
||||
ControlEvent.ParticipantInfo participantInfo = message.getParticipantInfo();
|
||||
if (participantInfo.getOutcomeType().equals(ControlEvent.OutcomeType.WRITE_SUCCESS)) {
|
||||
int partition = message.senderPartition().partition();
|
||||
partitionsWriteStatusReceived.put(partition, participantInfo.writeStatuses());
|
||||
currentConsumedKafkaOffsets.put(partition, participantInfo.getKafkaCommitOffset());
|
||||
}
|
||||
private void onReceiveWriteStatus(ControlMessage message) {
|
||||
ControlMessage.ParticipantInfo participantInfo = message.getParticipantInfo();
|
||||
int partition = message.getSenderPartition();
|
||||
partitionsWriteStatusReceived.put(partition, KafkaConnectUtils.getWriteStatuses(participantInfo));
|
||||
currentConsumedKafkaOffsets.put(partition, participantInfo.getKafkaOffset());
|
||||
if (partitionsWriteStatusReceived.size() >= numPartitions
|
||||
&& currentState.equals(State.ENDED_COMMIT)) {
|
||||
// Commit the kafka offsets to the commit file
|
||||
@@ -311,7 +298,7 @@ public class ConnectTransactionCoordinator implements TransactionCoordinator, Ru
|
||||
currentState = State.WRITE_STATUS_RCVD;
|
||||
globalCommittedKafkaOffsets.putAll(currentConsumedKafkaOffsets);
|
||||
submitEvent(new CoordinatorEvent(CoordinatorEvent.CoordinatorEventType.ACK_COMMIT,
|
||||
partition.topic(),
|
||||
message.getTopicName(),
|
||||
currentCommitTime));
|
||||
} catch (Exception exception) {
|
||||
LOG.error("Fatal error while committing file", exception);
|
||||
@@ -334,15 +321,7 @@ public class ConnectTransactionCoordinator implements TransactionCoordinator, Ru
|
||||
|
||||
private void submitAckCommit() {
|
||||
try {
|
||||
ControlEvent message = new ControlEvent.Builder(
|
||||
ControlEvent.MsgType.ACK_COMMIT,
|
||||
ControlEvent.SenderType.COORDINATOR,
|
||||
currentCommitTime,
|
||||
partition)
|
||||
.setCoordinatorInfo(
|
||||
new ControlEvent.CoordinatorInfo(globalCommittedKafkaOffsets))
|
||||
.build();
|
||||
kafkaControlClient.publishMessage(message);
|
||||
kafkaControlClient.publishMessage(buildControlMessage(ControlMessage.EventType.ACK_COMMIT));
|
||||
} catch (Exception exception) {
|
||||
LOG.warn(String.format("Could not send ACK_COMMIT message for partition %s and commitTime %s", partition, currentCommitTime), exception);
|
||||
}
|
||||
@@ -397,4 +376,20 @@ public class ConnectTransactionCoordinator implements TransactionCoordinator, Ru
|
||||
public interface KafkaPartitionProvider {
|
||||
int getLatestNumPartitions(String bootstrapServers, String topicName);
|
||||
}
|
||||
|
||||
private ControlMessage buildControlMessage(ControlMessage.EventType eventType) {
|
||||
return ControlMessage.newBuilder()
|
||||
.setProtocolVersion(KafkaConnectConfigs.CURRENT_PROTOCOL_VERSION)
|
||||
.setType(eventType)
|
||||
.setTopicName(partition.topic())
|
||||
.setSenderType(ControlMessage.EntityType.COORDINATOR)
|
||||
.setSenderPartition(partition.partition())
|
||||
.setReceiverType(ControlMessage.EntityType.PARTICIPANT)
|
||||
.setCommitTime(currentCommitTime)
|
||||
.setCoordinatorInfo(
|
||||
ControlMessage.CoordinatorInfo.newBuilder()
|
||||
.putAllGlobalKafkaCommitOffsets(globalCommittedKafkaOffsets)
|
||||
.build()
|
||||
).build();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,11 +19,14 @@
|
||||
package org.apache.hudi.connect.transaction;
|
||||
|
||||
import org.apache.hudi.client.WriteStatus;
|
||||
import org.apache.hudi.connect.ControlMessage;
|
||||
import org.apache.hudi.connect.kafka.KafkaControlAgent;
|
||||
import org.apache.hudi.connect.utils.KafkaConnectUtils;
|
||||
import org.apache.hudi.connect.writers.ConnectWriterProvider;
|
||||
import org.apache.hudi.connect.writers.KafkaConnectConfigs;
|
||||
import org.apache.hudi.connect.writers.KafkaConnectWriterProvider;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
import org.apache.hudi.exception.HoodieIOException;
|
||||
|
||||
import org.apache.kafka.common.TopicPartition;
|
||||
import org.apache.kafka.connect.sink.SinkRecord;
|
||||
@@ -46,7 +49,7 @@ public class ConnectTransactionParticipant implements TransactionParticipant {
|
||||
private static final Logger LOG = LogManager.getLogger(ConnectTransactionParticipant.class);
|
||||
|
||||
private final LinkedList<SinkRecord> buffer;
|
||||
private final BlockingQueue<ControlEvent> controlEvents;
|
||||
private final BlockingQueue<ControlMessage> controlEvents;
|
||||
private final TopicPartition partition;
|
||||
private final SinkTaskContext context;
|
||||
private final KafkaControlAgent kafkaControlAgent;
|
||||
@@ -95,7 +98,7 @@ public class ConnectTransactionParticipant implements TransactionParticipant {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void processControlEvent(ControlEvent message) {
|
||||
public void processControlEvent(ControlMessage message) {
|
||||
controlEvents.add(message);
|
||||
}
|
||||
|
||||
@@ -110,10 +113,10 @@ public class ConnectTransactionParticipant implements TransactionParticipant {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void processRecords() throws IOException {
|
||||
public void processRecords() {
|
||||
while (!controlEvents.isEmpty()) {
|
||||
ControlEvent message = controlEvents.poll();
|
||||
switch (message.getMsgType()) {
|
||||
ControlMessage message = controlEvents.poll();
|
||||
switch (message.getType()) {
|
||||
case START_COMMIT:
|
||||
handleStartCommit(message);
|
||||
break;
|
||||
@@ -127,14 +130,14 @@ public class ConnectTransactionParticipant implements TransactionParticipant {
|
||||
// ignore write status since its only processed by leader
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("HudiTransactionParticipant received incorrect state " + message.getMsgType());
|
||||
throw new IllegalStateException("HudiTransactionParticipant received incorrect state " + message.getType().name());
|
||||
}
|
||||
}
|
||||
|
||||
writeRecords();
|
||||
}
|
||||
|
||||
private void handleStartCommit(ControlEvent message) {
|
||||
private void handleStartCommit(ControlMessage message) {
|
||||
// If there is an existing/ongoing transaction locally
|
||||
// but it failed globally since we received another START_COMMIT instead of an END_COMMIT or ACK_COMMIT,
|
||||
// so close it and start new transaction
|
||||
@@ -152,7 +155,7 @@ public class ConnectTransactionParticipant implements TransactionParticipant {
|
||||
}
|
||||
}
|
||||
|
||||
private void handleEndCommit(ControlEvent message) throws IOException {
|
||||
private void handleEndCommit(ControlMessage message) {
|
||||
if (ongoingTransactionInfo == null) {
|
||||
LOG.warn(String.format("END_COMMIT %s is received while we were NOT in active transaction", message.getCommitTime()));
|
||||
return;
|
||||
@@ -172,21 +175,32 @@ public class ConnectTransactionParticipant implements TransactionParticipant {
|
||||
try {
|
||||
//sendWriterStatus
|
||||
List<WriteStatus> writeStatuses = ongoingTransactionInfo.getWriter().close();
|
||||
ControlEvent writeStatusEvent = new ControlEvent.Builder(ControlEvent.MsgType.WRITE_STATUS,
|
||||
ControlEvent.SenderType.PARTICIPANT, ongoingTransactionInfo.getCommitTime(), partition)
|
||||
.setParticipantInfo(new ControlEvent.ParticipantInfo(
|
||||
writeStatuses,
|
||||
ongoingTransactionInfo.getLastWrittenKafkaOffset(),
|
||||
ControlEvent.OutcomeType.WRITE_SUCCESS))
|
||||
.build();
|
||||
|
||||
ControlMessage writeStatusEvent = ControlMessage.newBuilder()
|
||||
.setProtocolVersion(KafkaConnectConfigs.CURRENT_PROTOCOL_VERSION)
|
||||
.setType(ControlMessage.EventType.WRITE_STATUS)
|
||||
.setTopicName(partition.topic())
|
||||
.setSenderType(ControlMessage.EntityType.PARTICIPANT)
|
||||
.setSenderPartition(partition.partition())
|
||||
.setReceiverType(ControlMessage.EntityType.COORDINATOR)
|
||||
.setReceiverPartition(ConnectTransactionCoordinator.COORDINATOR_KAFKA_PARTITION)
|
||||
.setCommitTime(ongoingTransactionInfo.getCommitTime())
|
||||
.setParticipantInfo(
|
||||
ControlMessage.ParticipantInfo.newBuilder()
|
||||
.setWriteStatus(KafkaConnectUtils.buildWriteStatuses(writeStatuses))
|
||||
.setKafkaOffset(ongoingTransactionInfo.getLastWrittenKafkaOffset())
|
||||
.build()
|
||||
).build();
|
||||
|
||||
kafkaControlAgent.publishMessage(writeStatusEvent);
|
||||
} catch (Exception exception) {
|
||||
LOG.error(String.format("Error writing records and ending commit %s for partition %s", message.getCommitTime(), partition.partition()), exception);
|
||||
throw new IOException(String.format("Error writing records and ending commit %s for partition %s", message.getCommitTime(), partition.partition()), exception);
|
||||
throw new HoodieIOException(String.format("Error writing records and ending commit %s for partition %s", message.getCommitTime(), partition.partition()),
|
||||
new IOException(exception));
|
||||
}
|
||||
}
|
||||
|
||||
private void handleAckCommit(ControlEvent message) {
|
||||
private void handleAckCommit(ControlMessage message) {
|
||||
// Update lastKafkCommitedOffset locally.
|
||||
if (ongoingTransactionInfo != null && committedKafkaOffset < ongoingTransactionInfo.getLastWrittenKafkaOffset()) {
|
||||
committedKafkaOffset = ongoingTransactionInfo.getLastWrittenKafkaOffset();
|
||||
@@ -224,15 +238,15 @@ public class ConnectTransactionParticipant implements TransactionParticipant {
|
||||
try {
|
||||
ongoingTransactionInfo.getWriter().close();
|
||||
ongoingTransactionInfo = null;
|
||||
} catch (IOException exception) {
|
||||
} catch (HoodieIOException exception) {
|
||||
LOG.warn("Error received while trying to cleanup existing transaction", exception);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void syncKafkaOffsetWithLeader(ControlEvent message) {
|
||||
if (message.getCoordinatorInfo() != null) {
|
||||
Long coordinatorCommittedKafkaOffset = message.getCoordinatorInfo().getGlobalKafkaCommitOffsets().get(partition.partition());
|
||||
private void syncKafkaOffsetWithLeader(ControlMessage message) {
|
||||
if (message.getCoordinatorInfo().getGlobalKafkaCommitOffsetsMap().containsKey(partition.partition())) {
|
||||
Long coordinatorCommittedKafkaOffset = message.getCoordinatorInfo().getGlobalKafkaCommitOffsetsMap().get(partition.partition());
|
||||
// Recover kafka committed offsets, treating the commit offset from the coordinator
|
||||
// as the source of truth
|
||||
if (coordinatorCommittedKafkaOffset != null && coordinatorCommittedKafkaOffset >= 0) {
|
||||
|
||||
@@ -1,237 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.connect.transaction;
|
||||
|
||||
import org.apache.hudi.client.WriteStatus;
|
||||
import org.apache.hudi.common.util.SerializationUtils;
|
||||
|
||||
import org.apache.kafka.common.TopicPartition;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* The events sent over the Kafka Control Topic between the
|
||||
* coordinator and the followers, in order to ensure
|
||||
* coordination across all the writes.
|
||||
*/
|
||||
@SuppressWarnings("checkstyle:VisibilityModifier")
|
||||
public class ControlEvent implements Serializable {
|
||||
|
||||
private static final Logger LOG = LogManager.getLogger(ControlEvent.class);
|
||||
private static final int CURRENT_VERSION = 0;
|
||||
|
||||
private final int version = CURRENT_VERSION;
|
||||
private MsgType msgType;
|
||||
private SenderType senderType;
|
||||
private String commitTime;
|
||||
private byte[] senderPartition;
|
||||
private CoordinatorInfo coordinatorInfo;
|
||||
private ParticipantInfo participantInfo;
|
||||
|
||||
public ControlEvent() {
|
||||
}
|
||||
|
||||
public ControlEvent(MsgType msgType,
|
||||
SenderType senderType,
|
||||
String commitTime,
|
||||
byte[] senderPartition,
|
||||
CoordinatorInfo coordinatorInfo,
|
||||
ParticipantInfo participantInfo) {
|
||||
this.msgType = msgType;
|
||||
this.senderType = senderType;
|
||||
this.commitTime = commitTime;
|
||||
this.senderPartition = senderPartition;
|
||||
this.coordinatorInfo = coordinatorInfo;
|
||||
this.participantInfo = participantInfo;
|
||||
}
|
||||
|
||||
public String key() {
|
||||
return msgType.name().toLowerCase(Locale.ROOT);
|
||||
}
|
||||
|
||||
public MsgType getMsgType() {
|
||||
return msgType;
|
||||
}
|
||||
|
||||
public SenderType getSenderType() {
|
||||
return senderType;
|
||||
}
|
||||
|
||||
public String getCommitTime() {
|
||||
return commitTime;
|
||||
}
|
||||
|
||||
public byte[] getSenderPartition() {
|
||||
return senderPartition;
|
||||
}
|
||||
|
||||
public TopicPartition senderPartition() {
|
||||
return SerializationUtils.deserialize(senderPartition);
|
||||
}
|
||||
|
||||
public CoordinatorInfo getCoordinatorInfo() {
|
||||
return coordinatorInfo;
|
||||
}
|
||||
|
||||
public ParticipantInfo getParticipantInfo() {
|
||||
return participantInfo;
|
||||
}
|
||||
|
||||
public int getVersion() {
|
||||
return version;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("%s %s %s %s %s %s", version, msgType.name(), commitTime,
|
||||
Arrays.toString(senderPartition),
|
||||
(coordinatorInfo == null) ? "" : coordinatorInfo.toString(),
|
||||
(participantInfo == null) ? "" : participantInfo.toString());
|
||||
}
|
||||
|
||||
/**
|
||||
* Builder that helps build {@link ControlEvent}.
|
||||
*/
|
||||
public static class Builder {
|
||||
|
||||
private final MsgType msgType;
|
||||
private SenderType senderType;
|
||||
private final String commitTime;
|
||||
private final byte[] senderPartition;
|
||||
private CoordinatorInfo coordinatorInfo;
|
||||
private ParticipantInfo participantInfo;
|
||||
|
||||
public Builder(MsgType msgType, SenderType senderType, String commitTime, TopicPartition senderPartition) throws IOException {
|
||||
this.msgType = msgType;
|
||||
this.senderType = senderType;
|
||||
this.commitTime = commitTime;
|
||||
this.senderPartition = SerializationUtils.serialize(senderPartition);
|
||||
}
|
||||
|
||||
public Builder setCoordinatorInfo(CoordinatorInfo coordinatorInfo) {
|
||||
this.coordinatorInfo = coordinatorInfo;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setParticipantInfo(ParticipantInfo participantInfo) {
|
||||
this.participantInfo = participantInfo;
|
||||
return this;
|
||||
}
|
||||
|
||||
public ControlEvent build() {
|
||||
return new ControlEvent(msgType, senderType, commitTime, senderPartition, coordinatorInfo, participantInfo);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The info sent by the {@link TransactionCoordinator} to one or more
|
||||
* {@link TransactionParticipant}s.
|
||||
*/
|
||||
public static class CoordinatorInfo implements Serializable {
|
||||
|
||||
private Map<Integer, Long> globalKafkaCommitOffsets;
|
||||
|
||||
public CoordinatorInfo() {
|
||||
}
|
||||
|
||||
public CoordinatorInfo(Map<Integer, Long> globalKafkaCommitOffsets) {
|
||||
this.globalKafkaCommitOffsets = globalKafkaCommitOffsets;
|
||||
}
|
||||
|
||||
public Map<Integer, Long> getGlobalKafkaCommitOffsets() {
|
||||
return (globalKafkaCommitOffsets == null) ? new HashMap<>() : globalKafkaCommitOffsets;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("%s", globalKafkaCommitOffsets.keySet().stream()
|
||||
.map(key -> key + "=" + globalKafkaCommitOffsets.get(key))
|
||||
.collect(Collectors.joining(", ", "{", "}")));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The info sent by a {@link TransactionParticipant} instances to the
|
||||
* {@link TransactionCoordinator}.
|
||||
*/
|
||||
public static class ParticipantInfo implements Serializable {
|
||||
|
||||
private byte[] writeStatusList;
|
||||
private long kafkaCommitOffset;
|
||||
private OutcomeType outcomeType;
|
||||
|
||||
public ParticipantInfo() {
|
||||
}
|
||||
|
||||
public ParticipantInfo(List<WriteStatus> writeStatuses, long kafkaCommitOffset, OutcomeType outcomeType) throws IOException {
|
||||
this.writeStatusList = SerializationUtils.serialize(writeStatuses);
|
||||
this.kafkaCommitOffset = kafkaCommitOffset;
|
||||
this.outcomeType = outcomeType;
|
||||
}
|
||||
|
||||
public byte[] getWriteStatusList() {
|
||||
return writeStatusList;
|
||||
}
|
||||
|
||||
public List<WriteStatus> writeStatuses() {
|
||||
return SerializationUtils.deserialize(writeStatusList);
|
||||
}
|
||||
|
||||
public long getKafkaCommitOffset() {
|
||||
return kafkaCommitOffset;
|
||||
}
|
||||
|
||||
public OutcomeType getOutcomeType() {
|
||||
return outcomeType;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("%s %s %s", Arrays.toString(writeStatusList), kafkaCommitOffset, outcomeType.name());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Type of Control Event.
|
||||
*/
|
||||
public enum MsgType {
|
||||
START_COMMIT,
|
||||
END_COMMIT,
|
||||
ACK_COMMIT,
|
||||
WRITE_STATUS,
|
||||
}
|
||||
|
||||
public enum SenderType {
|
||||
COORDINATOR,
|
||||
PARTICIPANT
|
||||
}
|
||||
|
||||
public enum OutcomeType {
|
||||
WRITE_SUCCESS,
|
||||
}
|
||||
}
|
||||
@@ -18,6 +18,8 @@
|
||||
|
||||
package org.apache.hudi.connect.transaction;
|
||||
|
||||
import org.apache.hudi.connect.ControlMessage;
|
||||
|
||||
/**
|
||||
* The events within the Coordinator that trigger
|
||||
* the state changes in the state machine of
|
||||
@@ -28,7 +30,7 @@ public class CoordinatorEvent {
|
||||
private final CoordinatorEventType eventType;
|
||||
private final String topicName;
|
||||
private final String commitTime;
|
||||
private ControlEvent message;
|
||||
private ControlMessage message;
|
||||
|
||||
public CoordinatorEvent(CoordinatorEventType eventType,
|
||||
String topicName,
|
||||
@@ -50,11 +52,11 @@ public class CoordinatorEvent {
|
||||
return commitTime;
|
||||
}
|
||||
|
||||
public ControlEvent getMessage() {
|
||||
public ControlMessage getMessage() {
|
||||
return message;
|
||||
}
|
||||
|
||||
public void setMessage(ControlEvent message) {
|
||||
public void setMessage(ControlMessage message) {
|
||||
this.message = message;
|
||||
}
|
||||
|
||||
|
||||
@@ -18,6 +18,8 @@
|
||||
|
||||
package org.apache.hudi.connect.transaction;
|
||||
|
||||
import org.apache.hudi.connect.ControlMessage;
|
||||
|
||||
import org.apache.kafka.common.TopicPartition;
|
||||
|
||||
/**
|
||||
@@ -36,5 +38,5 @@ public interface TransactionCoordinator {
|
||||
TopicPartition getPartition();
|
||||
|
||||
/* Called when a control event is received from the Kafka control topic */
|
||||
void processControlEvent(ControlEvent message);
|
||||
void processControlEvent(ControlMessage message);
|
||||
}
|
||||
|
||||
@@ -18,11 +18,11 @@
|
||||
|
||||
package org.apache.hudi.connect.transaction;
|
||||
|
||||
import org.apache.hudi.connect.ControlMessage;
|
||||
|
||||
import org.apache.kafka.common.TopicPartition;
|
||||
import org.apache.kafka.connect.sink.SinkRecord;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Interface for the Participant that
|
||||
* manages Writes for a
|
||||
@@ -37,11 +37,11 @@ public interface TransactionParticipant {
|
||||
|
||||
void buffer(SinkRecord record);
|
||||
|
||||
void processRecords() throws IOException;
|
||||
void processRecords();
|
||||
|
||||
TopicPartition getPartition();
|
||||
|
||||
void processControlEvent(ControlEvent message);
|
||||
void processControlEvent(ControlMessage message);
|
||||
|
||||
long getLastKafkaCommittedOffset();
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
|
||||
package org.apache.hudi.connect.utils;
|
||||
|
||||
import org.apache.hudi.client.WriteStatus;
|
||||
import org.apache.hudi.common.config.TypedProperties;
|
||||
import org.apache.hudi.common.model.HoodieCommitMetadata;
|
||||
import org.apache.hudi.common.model.HoodieTableType;
|
||||
@@ -26,7 +27,9 @@ import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.common.util.SerializationUtils;
|
||||
import org.apache.hudi.common.util.StringUtils;
|
||||
import org.apache.hudi.connect.ControlMessage;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
import org.apache.hudi.keygen.BaseKeyGenerator;
|
||||
import org.apache.hudi.keygen.CustomAvroKeyGenerator;
|
||||
@@ -34,6 +37,7 @@ import org.apache.hudi.keygen.CustomKeyGenerator;
|
||||
import org.apache.hudi.keygen.KeyGenerator;
|
||||
import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
|
||||
|
||||
import com.google.protobuf.ByteString;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.kafka.clients.admin.AdminClient;
|
||||
import org.apache.kafka.clients.admin.DescribeTopicsResult;
|
||||
@@ -42,10 +46,12 @@ import org.apache.kafka.common.KafkaFuture;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Properties;
|
||||
@@ -76,6 +82,7 @@ public class KafkaConnectUtils {
|
||||
|
||||
/**
|
||||
* Returns the default Hadoop Configuration.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public static Configuration getDefaultHadoopConf() {
|
||||
@@ -86,6 +93,7 @@ public class KafkaConnectUtils {
|
||||
|
||||
/**
|
||||
* Extract the record fields.
|
||||
*
|
||||
* @param keyGenerator key generator Instance of the keygenerator.
|
||||
* @return Returns the record key columns separated by comma.
|
||||
*/
|
||||
@@ -97,7 +105,7 @@ public class KafkaConnectUtils {
|
||||
* Extract partition columns directly if an instance of class {@link BaseKeyGenerator},
|
||||
* else extract partition columns from the properties.
|
||||
*
|
||||
* @param keyGenerator key generator Instance of the keygenerator.
|
||||
* @param keyGenerator key generator Instance of the keygenerator.
|
||||
* @param typedProperties properties from the config.
|
||||
* @return partition columns Returns the partition columns separated by comma.
|
||||
*/
|
||||
@@ -142,7 +150,7 @@ public class KafkaConnectUtils {
|
||||
return Option.empty();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static String hashDigest(String stringToHash) {
|
||||
MessageDigest md;
|
||||
try {
|
||||
@@ -154,4 +162,32 @@ public class KafkaConnectUtils {
|
||||
byte[] digest = Objects.requireNonNull(md).digest(stringToHash.getBytes(StandardCharsets.UTF_8));
|
||||
return StringUtils.toHexString(digest).toUpperCase();
|
||||
}
|
||||
|
||||
/**
|
||||
* Build Protobuf message containing the Hudi {@link WriteStatus}.
|
||||
*
|
||||
* @param writeStatuses The list of Hudi {@link WriteStatus}.
|
||||
* @return the protobuf message {@link org.apache.hudi.connect.ControlMessage.ConnectWriteStatus}
|
||||
* that wraps the Hudi {@link WriteStatus}.
|
||||
* @throws IOException thrown if the conversion failed.
|
||||
*/
|
||||
public static ControlMessage.ConnectWriteStatus buildWriteStatuses(List<WriteStatus> writeStatuses) throws IOException {
|
||||
return ControlMessage.ConnectWriteStatus.newBuilder()
|
||||
.setSerializedWriteStatus(
|
||||
ByteString.copyFrom(
|
||||
SerializationUtils.serialize(writeStatuses)))
|
||||
.build();
|
||||
}
|
||||
|
||||
/**
|
||||
* Unwrap the Hudi {@link WriteStatus} from the received Protobuf message.
|
||||
*
|
||||
* @param participantInfo The {@link ControlMessage.ParticipantInfo} that contains the
|
||||
* underlying {@link WriteStatus} sent by the participants.
|
||||
* @return the list of {@link WriteStatus} returned by Hudi on a write transaction.
|
||||
*/
|
||||
public static List<WriteStatus> getWriteStatuses(ControlMessage.ParticipantInfo participantInfo) {
|
||||
ControlMessage.ConnectWriteStatus connectWriteStatus = participantInfo.getWriteStatus();
|
||||
return SerializationUtils.deserialize(connectWriteStatus.getSerializedWriteStatus().toByteArray());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -91,11 +91,11 @@ public abstract class AbstractConnectWriter implements ConnectWriter<WriteStatus
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<WriteStatus> close() throws IOException {
|
||||
public List<WriteStatus> close() {
|
||||
return flushRecords();
|
||||
}
|
||||
|
||||
protected abstract void writeHudiRecord(HoodieRecord<?> record);
|
||||
|
||||
protected abstract List<WriteStatus> flushRecords() throws IOException;
|
||||
protected abstract List<WriteStatus> flushRecords();
|
||||
}
|
||||
|
||||
@@ -92,7 +92,7 @@ public class BufferedConnectWriter extends AbstractConnectWriter {
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<WriteStatus> flushRecords() throws IOException {
|
||||
public List<WriteStatus> flushRecords() {
|
||||
try {
|
||||
LOG.info("Number of entries in MemoryBasedMap => "
|
||||
+ bufferedRecords.getInMemoryMapNumEntries()
|
||||
@@ -122,7 +122,7 @@ public class BufferedConnectWriter extends AbstractConnectWriter {
|
||||
LOG.info("Flushed hudi records and got writeStatuses: " + writeStatuses);
|
||||
return writeStatuses;
|
||||
} catch (Exception e) {
|
||||
throw new IOException("Write records failed", e);
|
||||
throw new HoodieIOException("Write records failed", new IOException(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -27,5 +27,5 @@ public interface ConnectWriter<T> {
|
||||
|
||||
void writeRecord(SinkRecord record) throws IOException;
|
||||
|
||||
List<T> close() throws IOException;
|
||||
List<T> close();
|
||||
}
|
||||
|
||||
@@ -39,6 +39,7 @@ import java.util.Properties;
|
||||
description = "Configurations for Kafka Connect Sink Connector for Hudi.")
|
||||
public class KafkaConnectConfigs extends HoodieConfig {
|
||||
|
||||
public static final int CURRENT_PROTOCOL_VERSION = 0;
|
||||
public static final String KAFKA_VALUE_CONVERTER = "value.converter";
|
||||
|
||||
public static final ConfigProperty<String> KAFKA_BOOTSTRAP_SERVERS = ConfigProperty
|
||||
|
||||
65
hudi-kafka-connect/src/main/resources/ControlMessage.proto
Normal file
65
hudi-kafka-connect/src/main/resources/ControlMessage.proto
Normal file
@@ -0,0 +1,65 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
syntax = "proto3";
|
||||
|
||||
option java_multiple_files = true;
|
||||
option java_package = "org.apache.hudi.connect";
|
||||
option java_outer_classname = "ConnectControl";
|
||||
|
||||
package connect;
|
||||
|
||||
message ControlMessage {
|
||||
uint32 protocolVersion = 1;
|
||||
EventType type = 2;
|
||||
string topic_name = 3;
|
||||
EntityType sender_type = 4;
|
||||
uint32 sender_partition = 5;
|
||||
EntityType receiver_type = 6;
|
||||
uint32 receiver_partition = 7;
|
||||
string commitTime = 8;
|
||||
oneof payload {
|
||||
CoordinatorInfo coordinator_info = 9;
|
||||
ParticipantInfo participant_info = 10;
|
||||
}
|
||||
|
||||
message CoordinatorInfo {
|
||||
map<int32, int64> globalKafkaCommitOffsets = 1;
|
||||
}
|
||||
|
||||
message ParticipantInfo {
|
||||
ConnectWriteStatus writeStatus = 1;
|
||||
uint64 kafkaOffset = 2;
|
||||
}
|
||||
|
||||
message ConnectWriteStatus {
|
||||
bytes serializedWriteStatus = 1;
|
||||
}
|
||||
|
||||
enum EventType {
|
||||
START_COMMIT = 0;
|
||||
END_COMMIT = 1;
|
||||
ACK_COMMIT = 2;
|
||||
WRITE_STATUS = 3;
|
||||
}
|
||||
|
||||
enum EntityType {
|
||||
COORDINATOR = 0;
|
||||
PARTICIPANT = 1;
|
||||
}
|
||||
}
|
||||
23
hudi-kafka-connect/src/main/resources/log4j.properties
Normal file
23
hudi-kafka-connect/src/main/resources/log4j.properties
Normal file
@@ -0,0 +1,23 @@
|
||||
###
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
###
|
||||
log4j.rootLogger=INFO, A1
|
||||
# A1 is set to be a ConsoleAppender.
|
||||
log4j.appender.A1=org.apache.log4j.ConsoleAppender
|
||||
# A1 uses PatternLayout.
|
||||
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
|
||||
log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
|
||||
Reference in New Issue
Block a user