[HUDI-2044] Integrate consumers with rocksDB and compression within External Spillable Map (#3318)
This commit is contained in:
@@ -68,7 +68,8 @@ public class EmbeddedTimelineServerHelper {
|
||||
Option<String> hostAddr = context.getProperty(EngineProperty.EMBEDDED_SERVER_HOST);
|
||||
EmbeddedTimelineService timelineService = new EmbeddedTimelineService(
|
||||
context, hostAddr.orElse(null),config.getEmbeddedTimelineServerPort(),
|
||||
config.getMetadataConfig(), config.getClientSpecifiedViewStorageConfig(), config.getBasePath(),
|
||||
config.getMetadataConfig(), config.getCommonConfig(),
|
||||
config.getClientSpecifiedViewStorageConfig(), config.getBasePath(),
|
||||
config.getEmbeddedTimelineServerThreads(), config.getEmbeddedTimelineServerCompressOutput(),
|
||||
config.getEmbeddedTimelineServerUseAsync());
|
||||
timelineService.startServer();
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
|
||||
package org.apache.hudi.client.embedded;
|
||||
|
||||
import org.apache.hudi.common.config.HoodieCommonConfig;
|
||||
import org.apache.hudi.common.config.HoodieMetadataConfig;
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.common.config.SerializableConfiguration;
|
||||
@@ -46,6 +47,7 @@ public class EmbeddedTimelineService {
|
||||
private final SerializableConfiguration hadoopConf;
|
||||
private final FileSystemViewStorageConfig config;
|
||||
private final HoodieMetadataConfig metadataConfig;
|
||||
private final HoodieCommonConfig commonConfig;
|
||||
private final String basePath;
|
||||
|
||||
private final int numThreads;
|
||||
@@ -55,13 +57,14 @@ public class EmbeddedTimelineService {
|
||||
private transient TimelineService server;
|
||||
|
||||
public EmbeddedTimelineService(HoodieEngineContext context, String embeddedTimelineServiceHostAddr, int embeddedTimelineServerPort,
|
||||
HoodieMetadataConfig metadataConfig, FileSystemViewStorageConfig config, String basePath,
|
||||
HoodieMetadataConfig metadataConfig, HoodieCommonConfig commonConfig, FileSystemViewStorageConfig config, String basePath,
|
||||
int numThreads, boolean compressOutput, boolean useAsync) {
|
||||
setHostAddr(embeddedTimelineServiceHostAddr);
|
||||
this.context = context;
|
||||
this.config = config;
|
||||
this.basePath = basePath;
|
||||
this.metadataConfig = metadataConfig;
|
||||
this.commonConfig = commonConfig;
|
||||
this.hadoopConf = context.getHadoopConf();
|
||||
this.viewManager = createViewManager();
|
||||
this.preferredPort = embeddedTimelineServerPort;
|
||||
@@ -80,7 +83,7 @@ public class EmbeddedTimelineService {
|
||||
// Reset to default if set to Remote
|
||||
builder.withStorageType(FileSystemViewStorageType.MEMORY);
|
||||
}
|
||||
return FileSystemViewManager.createViewManager(context, metadataConfig, builder.build(), basePath);
|
||||
return FileSystemViewManager.createViewManager(context, metadataConfig, builder.build(), commonConfig, basePath);
|
||||
}
|
||||
|
||||
public void startServer() throws IOException {
|
||||
|
||||
@@ -24,6 +24,7 @@ import org.apache.hudi.client.transaction.ConflictResolutionStrategy;
|
||||
import org.apache.hudi.common.config.ConfigClassProperty;
|
||||
import org.apache.hudi.common.config.ConfigGroups;
|
||||
import org.apache.hudi.common.config.ConfigProperty;
|
||||
import org.apache.hudi.common.config.HoodieCommonConfig;
|
||||
import org.apache.hudi.common.config.HoodieConfig;
|
||||
import org.apache.hudi.common.config.HoodieMetadataConfig;
|
||||
import org.apache.hudi.common.engine.EngineType;
|
||||
@@ -38,7 +39,6 @@ import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
|
||||
import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
|
||||
import org.apache.hudi.common.util.ReflectionUtils;
|
||||
import org.apache.hudi.common.util.ValidationUtils;
|
||||
import org.apache.hudi.common.util.collection.ExternalSpillableMap;
|
||||
import org.apache.hudi.execution.bulkinsert.BulkInsertSortMode;
|
||||
import org.apache.hudi.index.HoodieIndex;
|
||||
import org.apache.hudi.keygen.constant.KeyGeneratorType;
|
||||
@@ -59,7 +59,6 @@ import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Properties;
|
||||
@@ -312,18 +311,6 @@ public class HoodieWriteConfig extends HoodieConfig {
|
||||
.withDocumentation("When enabled, we allow duplicate keys even if inserts are routed to merge with an existing file (for ensuring file sizing)."
|
||||
+ " This is only relevant for insert operation, since upsert, delete operations will ensure unique key constraints are maintained.");
|
||||
|
||||
public static final ConfigProperty<ExternalSpillableMap.DiskMapType> SPILLABLE_DISK_MAP_TYPE = ConfigProperty
|
||||
.key("hoodie.spillable.diskmap.type")
|
||||
.defaultValue(ExternalSpillableMap.DiskMapType.BITCASK)
|
||||
.withDocumentation("When handling input data that cannot be held in memory, to merge with a file on storage, a spillable diskmap is employed. "
|
||||
+ "By default, we use a persistent hashmap based loosely on bitcask, that offers O(1) inserts, lookups. "
|
||||
+ "Change this to `ROCKS_DB` to prefer using rocksDB, for handling the spill.");
|
||||
|
||||
public static final ConfigProperty<Boolean> DISK_MAP_BITCASK_COMPRESSION_ENABLED = ConfigProperty
|
||||
.key("hoodie.diskmap.bitcask.compression.enabled")
|
||||
.defaultValue(true)
|
||||
.withDocumentation("Turn on compression for BITCASK disk map used by the External Spillable Map");
|
||||
|
||||
public static final ConfigProperty<Integer> CLIENT_HEARTBEAT_INTERVAL_IN_MS_PROP = ConfigProperty
|
||||
.key("hoodie.client.heartbeat.interval_in_ms")
|
||||
.defaultValue(60 * 1000)
|
||||
@@ -388,6 +375,7 @@ public class HoodieWriteConfig extends HoodieConfig {
|
||||
private FileSystemViewStorageConfig viewStorageConfig;
|
||||
private HoodiePayloadConfig hoodiePayloadConfig;
|
||||
private HoodieMetadataConfig metadataConfig;
|
||||
private HoodieCommonConfig commonConfig;
|
||||
private EngineType engineType;
|
||||
|
||||
/**
|
||||
@@ -409,6 +397,7 @@ public class HoodieWriteConfig extends HoodieConfig {
|
||||
this.viewStorageConfig = clientSpecifiedViewStorageConfig;
|
||||
this.hoodiePayloadConfig = HoodiePayloadConfig.newBuilder().fromProperties(newProps).build();
|
||||
this.metadataConfig = HoodieMetadataConfig.newBuilder().fromProperties(props).build();
|
||||
this.commonConfig = HoodieCommonConfig.newBuilder().fromProperties(props).build();
|
||||
}
|
||||
|
||||
public static HoodieWriteConfig.Builder newBuilder() {
|
||||
@@ -597,14 +586,6 @@ public class HoodieWriteConfig extends HoodieConfig {
|
||||
return getBoolean(MERGE_ALLOW_DUPLICATE_ON_INSERTS);
|
||||
}
|
||||
|
||||
public ExternalSpillableMap.DiskMapType getSpillableDiskMapType() {
|
||||
return ExternalSpillableMap.DiskMapType.valueOf(getString(SPILLABLE_DISK_MAP_TYPE).toUpperCase(Locale.ROOT));
|
||||
}
|
||||
|
||||
public boolean isBitCaskDiskMapCompressionEnabled() {
|
||||
return getBoolean(DISK_MAP_BITCASK_COMPRESSION_ENABLED);
|
||||
}
|
||||
|
||||
public EngineType getEngineType() {
|
||||
return engineType;
|
||||
}
|
||||
@@ -1159,6 +1140,10 @@ public class HoodieWriteConfig extends HoodieConfig {
|
||||
return metadataConfig;
|
||||
}
|
||||
|
||||
public HoodieCommonConfig getCommonConfig() {
|
||||
return commonConfig;
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit call back configs.
|
||||
*/
|
||||
@@ -1564,16 +1549,6 @@ public class HoodieWriteConfig extends HoodieConfig {
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withSpillableDiskMapType(ExternalSpillableMap.DiskMapType diskMapType) {
|
||||
writeConfig.setValue(SPILLABLE_DISK_MAP_TYPE, diskMapType.name());
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withBitcaskDiskMapCompressionEnabled(boolean bitcaskDiskMapCompressionEnabled) {
|
||||
writeConfig.setValue(DISK_MAP_BITCASK_COMPRESSION_ENABLED, String.valueOf(bitcaskDiskMapCompressionEnabled));
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withHeartbeatIntervalInMs(Integer heartbeatIntervalInMs) {
|
||||
writeConfig.setValue(CLIENT_HEARTBEAT_INTERVAL_IN_MS_PROP, String.valueOf(heartbeatIntervalInMs));
|
||||
return this;
|
||||
|
||||
@@ -211,7 +211,8 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload, I, K, O> extends H
|
||||
LOG.info("MaxMemoryPerPartitionMerge => " + memoryForMerge);
|
||||
this.keyToNewRecords = new ExternalSpillableMap<>(memoryForMerge, config.getSpillableMapBasePath(),
|
||||
new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(tableSchema),
|
||||
config.getSpillableDiskMapType(), config.isBitCaskDiskMapCompressionEnabled());
|
||||
config.getCommonConfig().getSpillableDiskMapType(),
|
||||
config.getCommonConfig().isBitCaskDiskMapCompressionEnabled());
|
||||
} catch (IOException io) {
|
||||
throw new HoodieIOException("Cannot instantiate an ExternalSpillableMap", io);
|
||||
}
|
||||
|
||||
@@ -113,7 +113,7 @@ public abstract class HoodieTable<T extends HoodieRecordPayload, I, K, O> implem
|
||||
this.metadata = HoodieTableMetadata.create(context, metadataConfig, config.getBasePath(),
|
||||
FileSystemViewStorageConfig.FILESYSTEM_VIEW_SPILLABLE_DIR.defaultValue());
|
||||
|
||||
this.viewManager = FileSystemViewManager.createViewManager(context, config.getMetadataConfig(), config.getViewStorageConfig(), () -> metadata);
|
||||
this.viewManager = FileSystemViewManager.createViewManager(context, config.getMetadataConfig(), config.getViewStorageConfig(), config.getCommonConfig(), () -> metadata);
|
||||
this.metaClient = metaClient;
|
||||
this.index = getIndex(config, context);
|
||||
this.taskContextSupplier = context.getTaskContextSupplier();
|
||||
@@ -123,7 +123,7 @@ public abstract class HoodieTable<T extends HoodieRecordPayload, I, K, O> implem
|
||||
|
||||
private synchronized FileSystemViewManager getViewManager() {
|
||||
if (null == viewManager) {
|
||||
viewManager = FileSystemViewManager.createViewManager(getContext(), config.getMetadataConfig(), config.getViewStorageConfig(), () -> metadata);
|
||||
viewManager = FileSystemViewManager.createViewManager(getContext(), config.getMetadataConfig(), config.getViewStorageConfig(), config.getCommonConfig(), () -> metadata);
|
||||
}
|
||||
return viewManager;
|
||||
}
|
||||
|
||||
@@ -129,6 +129,8 @@ public class HoodieFlinkMergeOnReadTableCompactor<T extends HoodieRecordPayload>
|
||||
.withReverseReader(config.getCompactionReverseLogReadEnabled())
|
||||
.withBufferSize(config.getMaxDFSStreamBufferSize())
|
||||
.withSpillableMapBasePath(config.getSpillableMapBasePath())
|
||||
.withDiskMapType(config.getCommonConfig().getSpillableDiskMapType())
|
||||
.withBitCaskDiskMapCompressionEnabled(config.getCommonConfig().isBitCaskDiskMapCompressionEnabled())
|
||||
.build();
|
||||
if (!scanner.iterator().hasNext()) {
|
||||
return new ArrayList<>();
|
||||
|
||||
@@ -207,6 +207,8 @@ public class SparkExecuteClusteringCommitActionExecutor<T extends HoodieRecordPa
|
||||
.withReverseReader(config.getCompactionReverseLogReadEnabled())
|
||||
.withBufferSize(config.getMaxDFSStreamBufferSize())
|
||||
.withSpillableMapBasePath(config.getSpillableMapBasePath())
|
||||
.withDiskMapType(config.getCommonConfig().getSpillableDiskMapType())
|
||||
.withBitCaskDiskMapCompressionEnabled(config.getCommonConfig().isBitCaskDiskMapCompressionEnabled())
|
||||
.build();
|
||||
|
||||
recordIterators.add(HoodieFileSliceReader.getFileSliceReader(baseFileReader, scanner, readerSchema,
|
||||
|
||||
@@ -151,6 +151,8 @@ public class HoodieSparkMergeOnReadTableCompactor<T extends HoodieRecordPayload>
|
||||
.withReverseReader(config.getCompactionReverseLogReadEnabled())
|
||||
.withBufferSize(config.getMaxDFSStreamBufferSize())
|
||||
.withSpillableMapBasePath(config.getSpillableMapBasePath())
|
||||
.withDiskMapType(config.getCommonConfig().getSpillableDiskMapType())
|
||||
.withBitCaskDiskMapCompressionEnabled(config.getCommonConfig().isBitCaskDiskMapCompressionEnabled())
|
||||
.build();
|
||||
if (!scanner.iterator().hasNext()) {
|
||||
return new ArrayList<>();
|
||||
|
||||
@@ -20,6 +20,7 @@ package org.apache.hudi.io;
|
||||
|
||||
import org.apache.hudi.client.SparkRDDWriteClient;
|
||||
import org.apache.hudi.client.WriteStatus;
|
||||
import org.apache.hudi.common.config.HoodieCommonConfig;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.model.HoodieWriteStat;
|
||||
@@ -27,6 +28,7 @@ import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
|
||||
import org.apache.hudi.common.util.collection.ExternalSpillableMap;
|
||||
import org.apache.hudi.config.HoodieCompactionConfig;
|
||||
import org.apache.hudi.config.HoodieIndexConfig;
|
||||
import org.apache.hudi.config.HoodieStorageConfig;
|
||||
@@ -41,16 +43,21 @@ import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Row;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.params.ParameterizedTest;
|
||||
import org.junit.jupiter.params.provider.Arguments;
|
||||
import org.junit.jupiter.params.provider.MethodSource;
|
||||
|
||||
import java.nio.file.Paths;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Properties;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import static org.apache.hudi.testutils.Assertions.assertNoWriteErrors;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
import static org.junit.jupiter.params.provider.Arguments.arguments;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public class TestHoodieMergeHandle extends HoodieClientTestHarness {
|
||||
@@ -69,14 +76,23 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
|
||||
cleanupResources();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUpsertsForMultipleRecordsInSameFile() throws Exception {
|
||||
@ParameterizedTest
|
||||
@MethodSource("testArguments")
|
||||
public void testUpsertsForMultipleRecordsInSameFile(ExternalSpillableMap.DiskMapType diskMapType,
|
||||
boolean isCompressionEnabled) throws Exception {
|
||||
// Create records in a single partition
|
||||
String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[0];
|
||||
dataGen = new HoodieTestDataGenerator(new String[] {partitionPath});
|
||||
|
||||
// Build a common config with diff configs
|
||||
Properties properties = new Properties();
|
||||
properties.setProperty(HoodieCommonConfig.SPILLABLE_DISK_MAP_TYPE.key(), diskMapType.name());
|
||||
properties.setProperty(HoodieCommonConfig.DISK_MAP_BITCASK_COMPRESSION_ENABLED.key(), String.valueOf(isCompressionEnabled));
|
||||
|
||||
// Build a write config with bulkinsertparallelism set
|
||||
HoodieWriteConfig cfg = getConfigBuilder().build();
|
||||
HoodieWriteConfig cfg = getConfigBuilder()
|
||||
.withProperties(properties)
|
||||
.build();
|
||||
try (SparkRDDWriteClient client = getHoodieWriteClient(cfg);) {
|
||||
FileSystem fs = FSUtils.getFs(basePath, hadoopConf);
|
||||
|
||||
@@ -215,10 +231,19 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHoodieMergeHandleWriteStatMetrics() throws Exception {
|
||||
@ParameterizedTest
|
||||
@MethodSource("testArguments")
|
||||
public void testHoodieMergeHandleWriteStatMetrics(ExternalSpillableMap.DiskMapType diskMapType,
|
||||
boolean isCompressionEnabled) throws Exception {
|
||||
// insert 100 records
|
||||
HoodieWriteConfig config = getConfigBuilder().build();
|
||||
// Build a common config with diff configs
|
||||
Properties properties = new Properties();
|
||||
properties.setProperty(HoodieCommonConfig.SPILLABLE_DISK_MAP_TYPE.key(), diskMapType.name());
|
||||
properties.setProperty(HoodieCommonConfig.DISK_MAP_BITCASK_COMPRESSION_ENABLED.key(), String.valueOf(isCompressionEnabled));
|
||||
|
||||
HoodieWriteConfig config = getConfigBuilder()
|
||||
.withProperties(properties)
|
||||
.build();
|
||||
try (SparkRDDWriteClient writeClient = getHoodieWriteClient(config);) {
|
||||
String newCommitTime = "100";
|
||||
writeClient.startCommitWithTime(newCommitTime);
|
||||
@@ -317,6 +342,16 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
|
||||
.withBulkInsertParallelism(2).withWriteStatusClass(TestWriteStatus.class);
|
||||
}
|
||||
|
||||
private static Stream<Arguments> testArguments() {
|
||||
// Arg1: ExternalSpillableMap Type, Arg2: isDiskMapCompressionEnabled
|
||||
return Stream.of(
|
||||
arguments(ExternalSpillableMap.DiskMapType.BITCASK, false),
|
||||
arguments(ExternalSpillableMap.DiskMapType.ROCKS_DB, false),
|
||||
arguments(ExternalSpillableMap.DiskMapType.BITCASK, true),
|
||||
arguments(ExternalSpillableMap.DiskMapType.ROCKS_DB, true)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Overridden so that we can capture and inspect all success records.
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user