[HUDI-2332] Add clustering and compaction in Kafka Connect Sink (#3857)
* [HUDI-2332] Add clustering and compaction in Kafka Connect Sink * Disable validation check on instant time for compaction and adjust configs * Add javadocs * Add clustering and compaction config * Fix transaction causing missing records in the target table * Add debugging logs * Fix kafka offset sync in participant * Adjust how clustering and compaction are configured in kafka-connect * Fix clustering strategy * Remove irrelevant changes from other published PRs * Update clustering logic and others * Update README * Fix test failures * Fix indentation * Fix clustering config * Add JavaCustomColumnsSortPartitioner and make async compaction enabled by default * Add test for JavaCustomColumnsSortPartitioner * Add more changes after IDE sync * Update README with clarification * Fix clustering logic after rebasing * Remove unrelated changes
This commit is contained in:
@@ -20,7 +20,6 @@ package org.apache.hudi.config;
|
||||
|
||||
import org.apache.hudi.common.engine.EngineType;
|
||||
import org.apache.hudi.config.HoodieWriteConfig.Builder;
|
||||
|
||||
import org.apache.hudi.index.HoodieIndex;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.params.ParameterizedTest;
|
||||
@@ -81,6 +80,52 @@ public class TestHoodieWriteConfig {
|
||||
assertEquals(HoodieIndex.IndexType.INMEMORY, writeConfig.getIndexType());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDefaultClusteringPlanStrategyClassAccordingToEngineType() {
|
||||
// Default (as Spark)
|
||||
HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath("/tmp").build();
|
||||
assertEquals(
|
||||
HoodieClusteringConfig.SPARK_SIZED_BASED_CLUSTERING_PLAN_STRATEGY,
|
||||
writeConfig.getClusteringPlanStrategyClass());
|
||||
|
||||
// Spark
|
||||
writeConfig = HoodieWriteConfig.newBuilder().withEngineType(EngineType.SPARK).withPath("/tmp").build();
|
||||
assertEquals(
|
||||
HoodieClusteringConfig.SPARK_SIZED_BASED_CLUSTERING_PLAN_STRATEGY,
|
||||
writeConfig.getClusteringPlanStrategyClass());
|
||||
|
||||
// Flink and Java
|
||||
for (EngineType engineType : new EngineType[] {EngineType.FLINK, EngineType.JAVA}) {
|
||||
writeConfig = HoodieWriteConfig.newBuilder().withEngineType(engineType).withPath("/tmp").build();
|
||||
assertEquals(
|
||||
HoodieClusteringConfig.JAVA_SIZED_BASED_CLUSTERING_PLAN_STRATEGY,
|
||||
writeConfig.getClusteringPlanStrategyClass());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDefaultClusteringExecutionStrategyClassAccordingToEngineType() {
|
||||
// Default (as Spark)
|
||||
HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath("/tmp").build();
|
||||
assertEquals(
|
||||
HoodieClusteringConfig.SPARK_SORT_AND_SIZE_EXECUTION_STRATEGY,
|
||||
writeConfig.getClusteringExecutionStrategyClass());
|
||||
|
||||
// Spark
|
||||
writeConfig = HoodieWriteConfig.newBuilder().withEngineType(EngineType.SPARK).withPath("/tmp").build();
|
||||
assertEquals(
|
||||
HoodieClusteringConfig.SPARK_SORT_AND_SIZE_EXECUTION_STRATEGY,
|
||||
writeConfig.getClusteringExecutionStrategyClass());
|
||||
|
||||
// Flink and Java
|
||||
for (EngineType engineType : new EngineType[] {EngineType.FLINK, EngineType.JAVA}) {
|
||||
writeConfig = HoodieWriteConfig.newBuilder().withEngineType(engineType).withPath("/tmp").build();
|
||||
assertEquals(
|
||||
HoodieClusteringConfig.JAVA_SORT_AND_SIZE_EXECUTION_STRATEGY,
|
||||
writeConfig.getClusteringExecutionStrategyClass());
|
||||
}
|
||||
}
|
||||
|
||||
private ByteArrayOutputStream saveParamsIntoOutputStream(Map<String, String> params) throws IOException {
|
||||
Properties properties = new Properties();
|
||||
properties.putAll(params);
|
||||
|
||||
Reference in New Issue
Block a user