[HUDI-3834] Fixing performance hits in reading Column Stats Index (#5266)
Fixing performance hits in reading Column Stats Index: [HUDI-3834] There's substantial performance degradation in Avro 1.10 default generated Builder classes: they by default rely on SpecificData.getForSchema that load corresponding model's class using reflection, which takes a hit when executed on the hot-path (this was bringing overall runtime to read full Column Stats Index of 800k records to 60s, whereas now it's taking mere 3s) Addressing memory churn by over-used Hadoop's Path creation: Path ctor is not a lightweight sequence and produces quite a bit of memory churn adding pressure on GC. Cleaning such avoidable allocations up to make sure there's no unnecessarily added pressure on GC.
This commit is contained in:
@@ -27,7 +27,6 @@ import org.apache.hudi.client.common.HoodieSparkEngineContext;
|
||||
import org.apache.hudi.common.model.HoodieAvroPayload;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.model.HoodieTableType;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
|
||||
@@ -37,7 +36,6 @@ import org.apache.hudi.config.HoodieIndexConfig;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.index.HoodieIndex;
|
||||
import org.apache.hudi.testutils.HoodieClientTestBase;
|
||||
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
@@ -61,21 +59,18 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
*/
|
||||
public class ITTestClusteringCommand extends AbstractShellIntegrationTest {
|
||||
|
||||
private String tablePath;
|
||||
private String tableName;
|
||||
|
||||
@BeforeEach
|
||||
public void init() throws IOException {
|
||||
tableName = "test_table_" + ITTestClusteringCommand.class.getName();
|
||||
tablePath = Paths.get(basePath, tableName).toString();
|
||||
basePath = Paths.get(basePath, tableName).toString();
|
||||
|
||||
HoodieCLI.conf = jsc.hadoopConfiguration();
|
||||
// Create table and connect
|
||||
new TableCommand().createTable(
|
||||
tablePath, tableName, HoodieTableType.COPY_ON_WRITE.name(),
|
||||
basePath, tableName, HoodieTableType.COPY_ON_WRITE.name(),
|
||||
"", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload");
|
||||
metaClient.setBasePath(tablePath);
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
|
||||
initMetaClient();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -168,7 +163,7 @@ public class ITTestClusteringCommand extends AbstractShellIntegrationTest {
|
||||
HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator();
|
||||
|
||||
// Create the write client to write some records in
|
||||
HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(tablePath)
|
||||
HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath)
|
||||
.withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
|
||||
.withDeleteParallelism(2).forTable(tableName)
|
||||
.withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()).build();
|
||||
|
||||
@@ -54,16 +54,16 @@ public class ITTestCommitsCommand extends AbstractShellIntegrationTest {
|
||||
|
||||
@BeforeEach
|
||||
public void init() throws IOException {
|
||||
String tableName = "test_table_" + ITTestCommitsCommand.class.getName();
|
||||
String tablePath = Paths.get(basePath, tableName).toString();
|
||||
tableName = "test_table_" + ITTestCommitsCommand.class.getName();
|
||||
basePath = Paths.get(basePath, tableName).toString();
|
||||
|
||||
HoodieCLI.conf = jsc.hadoopConfiguration();
|
||||
// Create table and connect
|
||||
new TableCommand().createTable(
|
||||
tablePath, tableName, HoodieTableType.COPY_ON_WRITE.name(),
|
||||
basePath, tableName, HoodieTableType.COPY_ON_WRITE.name(),
|
||||
"", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload");
|
||||
metaClient.setBasePath(tablePath);
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
|
||||
initMetaClient();
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -32,7 +32,6 @@ import org.apache.hudi.common.model.HoodieKey;
|
||||
import org.apache.hudi.common.model.HoodieLogFile;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.model.HoodieTableType;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
|
||||
@@ -48,7 +47,6 @@ import org.apache.hudi.index.HoodieIndex;
|
||||
import org.apache.hudi.testutils.HoodieClientTestBase;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.shell.core.CommandResult;
|
||||
@@ -73,21 +71,18 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
*/
|
||||
public class ITTestCompactionCommand extends AbstractShellIntegrationTest {
|
||||
|
||||
private String tablePath;
|
||||
private String tableName;
|
||||
|
||||
@BeforeEach
|
||||
public void init() throws IOException {
|
||||
tableName = "test_table_" + ITTestCompactionCommand.class.getName();
|
||||
tablePath = Paths.get(basePath, tableName).toString();
|
||||
basePath = Paths.get(basePath, tableName).toString();
|
||||
|
||||
HoodieCLI.conf = jsc.hadoopConfiguration();
|
||||
// Create table and connect
|
||||
new TableCommand().createTable(
|
||||
tablePath, tableName, HoodieTableType.MERGE_ON_READ.name(),
|
||||
basePath, tableName, HoodieTableType.MERGE_ON_READ.name(),
|
||||
"", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload");
|
||||
metaClient.setBasePath(tablePath);
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
|
||||
initMetaClient();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -298,7 +293,7 @@ public class ITTestCompactionCommand extends AbstractShellIntegrationTest {
|
||||
HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator();
|
||||
|
||||
// Create the write client to write some records in
|
||||
HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(tablePath)
|
||||
HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath)
|
||||
.withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
|
||||
.withDeleteParallelism(2).forTable(tableName)
|
||||
.withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()).build();
|
||||
|
||||
Reference in New Issue
Block a user