1
0

[HUDI-3245] Convert uppercase letters to lowercase in storage configs (#4602)

This commit is contained in:
Thinking Chen
2022-01-19 03:51:09 +08:00
committed by GitHub
parent a09c231911
commit caeea946fb
5 changed files with 13 additions and 12 deletions

View File

@@ -116,14 +116,14 @@ public class HoodieStorageConfig extends HoodieConfig {
.withDocumentation("Whether to use dictionary encoding"); .withDocumentation("Whether to use dictionary encoding");
public static final ConfigProperty<String> PARQUET_WRITE_LEGACY_FORMAT_ENABLED = ConfigProperty public static final ConfigProperty<String> PARQUET_WRITE_LEGACY_FORMAT_ENABLED = ConfigProperty
.key("hoodie.parquet.writeLegacyFormat.enabled") .key("hoodie.parquet.writelegacyformat.enabled")
.defaultValue("false") .defaultValue("false")
.withDocumentation("Sets spark.sql.parquet.writeLegacyFormat. If true, data will be written in a way of Spark 1.4 and earlier. " .withDocumentation("Sets spark.sql.parquet.writeLegacyFormat. If true, data will be written in a way of Spark 1.4 and earlier. "
+ "For example, decimal values will be written in Parquet's fixed-length byte array format which other systems such as Apache Hive and Apache Impala use. " + "For example, decimal values will be written in Parquet's fixed-length byte array format which other systems such as Apache Hive and Apache Impala use. "
+ "If false, the newer format in Parquet will be used. For example, decimals will be written in int-based format."); + "If false, the newer format in Parquet will be used. For example, decimals will be written in int-based format.");
public static final ConfigProperty<String> PARQUET_OUTPUT_TIMESTAMP_TYPE = ConfigProperty public static final ConfigProperty<String> PARQUET_OUTPUT_TIMESTAMP_TYPE = ConfigProperty
.key("hoodie.parquet.outputTimestampType") .key("hoodie.parquet.outputtimestamptype")
.defaultValue("TIMESTAMP_MILLIS") .defaultValue("TIMESTAMP_MILLIS")
.withDocumentation("Sets spark.sql.parquet.outputTimestampType. Parquet timestamp type to use when Spark writes data to Parquet files."); .withDocumentation("Sets spark.sql.parquet.outputTimestampType. Parquet timestamp type to use when Spark writes data to Parquet files.");

View File

@@ -38,6 +38,7 @@ import org.apache.hudi.config.HoodieClusteringConfig;
import org.apache.hudi.config.HoodieCompactionConfig; import org.apache.hudi.config.HoodieCompactionConfig;
import org.apache.hudi.config.HoodieIndexConfig; import org.apache.hudi.config.HoodieIndexConfig;
import org.apache.hudi.config.HoodiePayloadConfig; import org.apache.hudi.config.HoodiePayloadConfig;
import org.apache.hudi.config.HoodieStorageConfig;
import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieNotSupportedException; import org.apache.hudi.exception.HoodieNotSupportedException;
@@ -318,12 +319,12 @@ public class DataSourceUtils {
// Now by default ParquetWriteSupport will write DecimalType to parquet as int32/int64 when the scale of decimalType < Decimal.MAX_LONG_DIGITS(), // Now by default ParquetWriteSupport will write DecimalType to parquet as int32/int64 when the scale of decimalType < Decimal.MAX_LONG_DIGITS(),
// but AvroParquetReader which used by HoodieParquetReader cannot support read int32/int64 as DecimalType. // but AvroParquetReader which used by HoodieParquetReader cannot support read int32/int64 as DecimalType.
// try to find current schema whether contains that DecimalType, and auto set the value of "hoodie.parquet.writeLegacyFormat.enabled" // try to find current schema whether contains that DecimalType, and auto set the value of "hoodie.parquet.writelegacyformat.enabled"
public static void mayBeOverwriteParquetWriteLegacyFormatProp(Map<String, String> properties, StructType schema) { public static void mayBeOverwriteParquetWriteLegacyFormatProp(Map<String, String> properties, StructType schema) {
if (DataTypeUtils.foundSmallPrecisionDecimalType(schema) if (DataTypeUtils.foundSmallPrecisionDecimalType(schema)
&& !Boolean.parseBoolean(properties.getOrDefault("hoodie.parquet.writeLegacyFormat.enabled", "false"))) { && !Boolean.parseBoolean(properties.getOrDefault(HoodieStorageConfig.PARQUET_WRITE_LEGACY_FORMAT_ENABLED.key(), "false"))) {
properties.put("hoodie.parquet.writeLegacyFormat.enabled", "true"); properties.put(HoodieStorageConfig.PARQUET_WRITE_LEGACY_FORMAT_ENABLED.key(), "true");
LOG.warn("Small Decimal Type found in current schema, auto set the value of hoodie.parquet.writeLegacyFormat.enabled to true"); LOG.warn("Small Decimal Type found in current schema, auto set the value of hoodie.parquet.writelegacyformat.enabled to true");
} }
} }
} }

View File

@@ -299,18 +299,18 @@ public class TestDataSourceUtils {
StructType structType = StructType$.MODULE$.apply(structFields); StructType structType = StructType$.MODULE$.apply(structFields);
// create write options // create write options
Map<String, String> options = new HashMap<>(); Map<String, String> options = new HashMap<>();
options.put("hoodie.parquet.writeLegacyFormat.enabled", String.valueOf(defaultWriteValue)); options.put("hoodie.parquet.writelegacyformat.enabled", String.valueOf(defaultWriteValue));
// start test // start test
mayBeOverwriteParquetWriteLegacyFormatProp(options, structType); mayBeOverwriteParquetWriteLegacyFormatProp(options, structType);
// check result // check result
boolean res = Boolean.parseBoolean(options.get("hoodie.parquet.writeLegacyFormat.enabled")); boolean res = Boolean.parseBoolean(options.get("hoodie.parquet.writelegacyformat.enabled"));
if (smallDecimal) { if (smallDecimal) {
// should auto modify "hoodie.parquet.writeLegacyFormat.enabled" = "true". // should auto modify "hoodie.parquet.writelegacyformat.enabled" = "true".
assertEquals(true, res); assertEquals(true, res);
} else { } else {
// should not modify the value of "hoodie.parquet.writeLegacyFormat.enabled". // should not modify the value of "hoodie.parquet.writelegacyformat.enabled".
assertEquals(defaultWriteValue, res); assertEquals(defaultWriteValue, res);
} }
} }

View File

@@ -68,7 +68,7 @@ public class DefaultSource extends BaseDefaultSource implements DataSourceV2,
boolean populateMetaFields = options.getBoolean(HoodieTableConfig.POPULATE_META_FIELDS.key(), boolean populateMetaFields = options.getBoolean(HoodieTableConfig.POPULATE_META_FIELDS.key(),
Boolean.parseBoolean(HoodieTableConfig.POPULATE_META_FIELDS.defaultValue())); Boolean.parseBoolean(HoodieTableConfig.POPULATE_META_FIELDS.defaultValue()));
Map<String, String> properties = options.asMap(); Map<String, String> properties = options.asMap();
// Auto set the value of "hoodie.parquet.writeLegacyFormat.enabled" // Auto set the value of "hoodie.parquet.writelegacyformat.enabled"
mayBeOverwriteParquetWriteLegacyFormatProp(properties, schema); mayBeOverwriteParquetWriteLegacyFormatProp(properties, schema);
// 1st arg to createHoodieConfig is not really required to be set. but passing it anyways. // 1st arg to createHoodieConfig is not really required to be set. but passing it anyways.
HoodieWriteConfig config = DataSourceUtils.createHoodieConfig(options.get(HoodieWriteConfig.AVRO_SCHEMA_STRING.key()).get(), path, tblName, properties); HoodieWriteConfig config = DataSourceUtils.createHoodieConfig(options.get(HoodieWriteConfig.AVRO_SCHEMA_STRING.key()).get(), path, tblName, properties);

View File

@@ -58,7 +58,7 @@ public class DefaultSource extends BaseDefaultSource implements TableProvider {
Boolean.toString(HoodieInternalConfig.DEFAULT_BULKINSERT_ARE_PARTITIONER_RECORDS_SORTED))); Boolean.toString(HoodieInternalConfig.DEFAULT_BULKINSERT_ARE_PARTITIONER_RECORDS_SORTED)));
// Create a new map as the properties is an unmodifiableMap on Spark 3.2.0 // Create a new map as the properties is an unmodifiableMap on Spark 3.2.0
Map<String, String> newProps = new HashMap<>(properties); Map<String, String> newProps = new HashMap<>(properties);
// Auto set the value of "hoodie.parquet.writeLegacyFormat.enabled" // Auto set the value of "hoodie.parquet.writelegacyformat.enabled"
mayBeOverwriteParquetWriteLegacyFormatProp(newProps, schema); mayBeOverwriteParquetWriteLegacyFormatProp(newProps, schema);
// 1st arg to createHoodieConfig is not really required to be set. but passing it anyways. // 1st arg to createHoodieConfig is not really required to be set. but passing it anyways.
HoodieWriteConfig config = DataSourceUtils.createHoodieConfig(newProps.get(HoodieWriteConfig.AVRO_SCHEMA_STRING.key()), path, tblName, newProps); HoodieWriteConfig config = DataSourceUtils.createHoodieConfig(newProps.get(HoodieWriteConfig.AVRO_SCHEMA_STRING.key()), path, tblName, newProps);