[HUDI-2332] Add clustering and compaction in Kafka Connect Sink (#3857)
* [HUDI-2332] Add clustering and compaction in Kafka Connect Sink * Disable validation check on instant time for compaction and adjust configs * Add javadocs * Add clustering and compaction config * Fix transaction causing missing records in the target table * Add debugging logs * Fix kafka offset sync in participant * Adjust how clustering and compaction are configured in kafka-connect * Fix clustering strategy * Remove irrelevant changes from other published PRs * Update clustering logic and others * Update README * Fix test failures * Fix indentation * Fix clustering config * Add JavaCustomColumnsSortPartitioner and make async compaction enabled by default * Add test for JavaCustomColumnsSortPartitioner * Add more changes after IDE sync * Update README with clarification * Fix clustering logic after rebasing * Remove unrelated changes
This commit is contained in:
@@ -73,6 +73,11 @@ public class KafkaConnectConfigs extends HoodieConfig {
|
||||
+ "the coordinator will wait for the write statuses from all the partitions"
|
||||
+ "to ignore the current commit and start a new commit.");
|
||||
|
||||
public static final ConfigProperty<String> ASYNC_COMPACT_ENABLE = ConfigProperty
|
||||
.key("hoodie.kafka.compaction.async.enable")
|
||||
.defaultValue("true")
|
||||
.withDocumentation("Controls whether async compaction should be turned on for MOR table writing.");
|
||||
|
||||
public static final ConfigProperty<String> META_SYNC_ENABLE = ConfigProperty
|
||||
.key("hoodie.meta.sync.enable")
|
||||
.defaultValue("false")
|
||||
@@ -121,6 +126,10 @@ public class KafkaConnectConfigs extends HoodieConfig {
|
||||
return getString(KAFKA_VALUE_CONVERTER);
|
||||
}
|
||||
|
||||
public Boolean isAsyncCompactEnabled() {
|
||||
return getBoolean(ASYNC_COMPACT_ENABLE);
|
||||
}
|
||||
|
||||
public Boolean isMetaSyncEnabled() {
|
||||
return getBoolean(META_SYNC_ENABLE);
|
||||
}
|
||||
|
||||
@@ -22,6 +22,7 @@ import org.apache.hudi.client.HoodieJavaWriteClient;
|
||||
import org.apache.hudi.client.WriteStatus;
|
||||
import org.apache.hudi.client.common.HoodieJavaEngineContext;
|
||||
import org.apache.hudi.common.config.TypedProperties;
|
||||
import org.apache.hudi.common.engine.EngineType;
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.common.model.HoodieAvroPayload;
|
||||
import org.apache.hudi.common.model.HoodieCommitMetadata;
|
||||
@@ -54,6 +55,8 @@ public class KafkaConnectTransactionServices implements ConnectTransactionServic
|
||||
|
||||
private final Option<HoodieTableMetaClient> tableMetaClient;
|
||||
private final Configuration hadoopConf;
|
||||
private final HoodieWriteConfig writeConfig;
|
||||
private final KafkaConnectConfigs connectConfigs;
|
||||
private final String tableBasePath;
|
||||
private final String tableName;
|
||||
private final HoodieEngineContext context;
|
||||
@@ -61,8 +64,11 @@ public class KafkaConnectTransactionServices implements ConnectTransactionServic
|
||||
private final HoodieJavaWriteClient<HoodieAvroPayload> javaClient;
|
||||
|
||||
public KafkaConnectTransactionServices(KafkaConnectConfigs connectConfigs) throws HoodieException {
|
||||
HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder()
|
||||
.withProperties(connectConfigs.getProps()).build();
|
||||
this.connectConfigs = connectConfigs;
|
||||
this.writeConfig = HoodieWriteConfig.newBuilder()
|
||||
.withEngineType(EngineType.JAVA)
|
||||
.withProperties(connectConfigs.getProps())
|
||||
.build();
|
||||
|
||||
tableBasePath = writeConfig.getBasePath();
|
||||
tableName = writeConfig.getTableName();
|
||||
@@ -95,6 +101,7 @@ public class KafkaConnectTransactionServices implements ConnectTransactionServic
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String startCommit() {
|
||||
String newCommitTime = javaClient.startCommit();
|
||||
javaClient.transitionInflight(newCommitTime);
|
||||
@@ -102,11 +109,23 @@ public class KafkaConnectTransactionServices implements ConnectTransactionServic
|
||||
return newCommitTime;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void endCommit(String commitTime, List<WriteStatus> writeStatuses, Map<String, String> extraMetadata) {
|
||||
javaClient.commit(commitTime, writeStatuses, Option.of(extraMetadata));
|
||||
LOG.info("Ending Hudi commit " + commitTime);
|
||||
|
||||
// Schedule clustering and compaction as needed.
|
||||
if (writeConfig.isAsyncClusteringEnabled()) {
|
||||
javaClient.scheduleClustering(Option.empty()).ifPresent(
|
||||
instantTs -> LOG.info("Scheduled clustering at instant time:" + instantTs));
|
||||
}
|
||||
if (isAsyncCompactionEnabled()) {
|
||||
javaClient.scheduleCompaction(Option.empty()).ifPresent(
|
||||
instantTs -> LOG.info("Scheduled compaction at instant time:" + instantTs));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, String> fetchLatestExtraCommitMetadata() {
|
||||
if (tableMetaClient.isPresent()) {
|
||||
Option<HoodieCommitMetadata> metadata = KafkaConnectUtils.getCommitMetadataForLatestInstant(tableMetaClient.get());
|
||||
@@ -119,4 +138,10 @@ public class KafkaConnectTransactionServices implements ConnectTransactionServic
|
||||
}
|
||||
throw new HoodieException("Fatal error retrieving Hoodie Extra Metadata since Table Meta Client is absent");
|
||||
}
|
||||
|
||||
private boolean isAsyncCompactionEnabled() {
|
||||
return tableMetaClient.isPresent()
|
||||
&& HoodieTableType.MERGE_ON_READ.equals(tableMetaClient.get().getTableType())
|
||||
&& connectConfigs.isAsyncCompactEnabled();
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user