diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieStorageConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieStorageConfig.java index 0553c86fb..d7c2a20a6 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieStorageConfig.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieStorageConfig.java @@ -110,6 +110,11 @@ public class HoodieStorageConfig extends HoodieConfig { .defaultValue("gzip") .withDocumentation("Compression Codec for parquet files"); + public static final ConfigProperty PARQUET_DICTIONARY_ENABLED = ConfigProperty + .key("hoodie.parquet.dictionary.enabled") + .defaultValue(true) + .withDocumentation("Whether to use dictionary encoding"); + public static final ConfigProperty HFILE_COMPRESSION_ALGORITHM_NAME = ConfigProperty .key("hoodie.hfile.compression.algorithm") .defaultValue("GZ") diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java index 7f0ec1076..ead3ef1f0 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java @@ -1403,10 +1403,6 @@ public class HoodieWriteConfig extends HoodieConfig { return getInt(HoodieStorageConfig.LOGFILE_DATA_BLOCK_MAX_SIZE); } - public long getLogFileMaxSize() { - return getLong(HoodieStorageConfig.LOGFILE_MAX_SIZE); - } - public double getParquetCompressionRatio() { return getDouble(HoodieStorageConfig.PARQUET_COMPRESSION_RATIO_FRACTION); } @@ -1415,6 +1411,14 @@ public class HoodieWriteConfig extends HoodieConfig { return CompressionCodecName.fromConf(getString(HoodieStorageConfig.PARQUET_COMPRESSION_CODEC_NAME)); } + public boolean parquetDictionaryEnabled() { + return getBoolean(HoodieStorageConfig.PARQUET_DICTIONARY_ENABLED); + } + + public long getLogFileMaxSize() { + return getLong(HoodieStorageConfig.LOGFILE_MAX_SIZE); + } + public double getLogFileToParquetCompressionRatio() { return getDouble(HoodieStorageConfig.LOGFILE_TO_PARQUET_COMPRESSION_RATIO_FRACTION); } diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroParquetConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroParquetConfig.java index f934a8a83..1a10e6a71 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroParquetConfig.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroParquetConfig.java @@ -33,4 +33,10 @@ public class HoodieAvroParquetConfig extends HoodieBaseParquetConfig { private final T writeSupport; - private CompressionCodecName compressionCodecName; - private int blockSize; - private int pageSize; - private long maxFileSize; - private Configuration hadoopConf; - private double compressionRatio; + private final CompressionCodecName compressionCodecName; + private final int blockSize; + private final int pageSize; + private final long maxFileSize; + private final Configuration hadoopConf; + private final double compressionRatio; + private final boolean dictionaryEnabled; public HoodieBaseParquetConfig(T writeSupport, CompressionCodecName compressionCodecName, int blockSize, - int pageSize, long maxFileSize, Configuration hadoopConf, double compressionRatio) { + int pageSize, long maxFileSize, Configuration hadoopConf, double compressionRatio) { + this(writeSupport, compressionCodecName, blockSize, pageSize, maxFileSize, hadoopConf, compressionRatio, false); + } + + public HoodieBaseParquetConfig(T writeSupport, CompressionCodecName compressionCodecName, int blockSize, + int pageSize, long maxFileSize, Configuration hadoopConf, double compressionRatio, boolean dictionaryEnabled) { this.writeSupport = writeSupport; this.compressionCodecName = compressionCodecName; this.blockSize = blockSize; @@ -43,6 +49,7 @@ public class HoodieBaseParquetConfig { this.maxFileSize = maxFileSize; this.hadoopConf = hadoopConf; this.compressionRatio = compressionRatio; + this.dictionaryEnabled = dictionaryEnabled; } public CompressionCodecName getCompressionCodecName() { @@ -72,4 +79,8 @@ public class HoodieBaseParquetConfig { public T getWriteSupport() { return writeSupport; } + + public boolean dictionaryEnabled() { + return dictionaryEnabled; + } } diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java index 56022c94e..e88c34f60 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java @@ -71,7 +71,7 @@ public class HoodieFileWriterFactory { HoodieAvroParquetConfig parquetConfig = new HoodieAvroParquetConfig(writeSupport, config.getParquetCompressionCodec(), config.getParquetBlockSize(), config.getParquetPageSize(), config.getParquetMaxFileSize(), - hoodieTable.getHadoopConf(), config.getParquetCompressionRatio()); + hoodieTable.getHadoopConf(), config.getParquetCompressionRatio(), config.parquetDictionaryEnabled()); return new HoodieParquetWriter<>(instantTime, path, parquetConfig, schema, taskContextSupplier, populateMetaFields); } diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetWriter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetWriter.java index e7328fb50..4f51de35d 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetWriter.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetWriter.java @@ -56,7 +56,7 @@ public class HoodieParquetWriter