[HUDI-3245] Convert uppercase letters to lowercase in storage configs (#4602)
This commit is contained in:
@@ -116,14 +116,14 @@ public class HoodieStorageConfig extends HoodieConfig {
|
|||||||
.withDocumentation("Whether to use dictionary encoding");
|
.withDocumentation("Whether to use dictionary encoding");
|
||||||
|
|
||||||
public static final ConfigProperty<String> PARQUET_WRITE_LEGACY_FORMAT_ENABLED = ConfigProperty
|
public static final ConfigProperty<String> PARQUET_WRITE_LEGACY_FORMAT_ENABLED = ConfigProperty
|
||||||
.key("hoodie.parquet.writeLegacyFormat.enabled")
|
.key("hoodie.parquet.writelegacyformat.enabled")
|
||||||
.defaultValue("false")
|
.defaultValue("false")
|
||||||
.withDocumentation("Sets spark.sql.parquet.writeLegacyFormat. If true, data will be written in a way of Spark 1.4 and earlier. "
|
.withDocumentation("Sets spark.sql.parquet.writeLegacyFormat. If true, data will be written in a way of Spark 1.4 and earlier. "
|
||||||
+ "For example, decimal values will be written in Parquet's fixed-length byte array format which other systems such as Apache Hive and Apache Impala use. "
|
+ "For example, decimal values will be written in Parquet's fixed-length byte array format which other systems such as Apache Hive and Apache Impala use. "
|
||||||
+ "If false, the newer format in Parquet will be used. For example, decimals will be written in int-based format.");
|
+ "If false, the newer format in Parquet will be used. For example, decimals will be written in int-based format.");
|
||||||
|
|
||||||
public static final ConfigProperty<String> PARQUET_OUTPUT_TIMESTAMP_TYPE = ConfigProperty
|
public static final ConfigProperty<String> PARQUET_OUTPUT_TIMESTAMP_TYPE = ConfigProperty
|
||||||
.key("hoodie.parquet.outputTimestampType")
|
.key("hoodie.parquet.outputtimestamptype")
|
||||||
.defaultValue("TIMESTAMP_MILLIS")
|
.defaultValue("TIMESTAMP_MILLIS")
|
||||||
.withDocumentation("Sets spark.sql.parquet.outputTimestampType. Parquet timestamp type to use when Spark writes data to Parquet files.");
|
.withDocumentation("Sets spark.sql.parquet.outputTimestampType. Parquet timestamp type to use when Spark writes data to Parquet files.");
|
||||||
|
|
||||||
|
|||||||
@@ -38,6 +38,7 @@ import org.apache.hudi.config.HoodieClusteringConfig;
|
|||||||
import org.apache.hudi.config.HoodieCompactionConfig;
|
import org.apache.hudi.config.HoodieCompactionConfig;
|
||||||
import org.apache.hudi.config.HoodieIndexConfig;
|
import org.apache.hudi.config.HoodieIndexConfig;
|
||||||
import org.apache.hudi.config.HoodiePayloadConfig;
|
import org.apache.hudi.config.HoodiePayloadConfig;
|
||||||
|
import org.apache.hudi.config.HoodieStorageConfig;
|
||||||
import org.apache.hudi.config.HoodieWriteConfig;
|
import org.apache.hudi.config.HoodieWriteConfig;
|
||||||
import org.apache.hudi.exception.HoodieException;
|
import org.apache.hudi.exception.HoodieException;
|
||||||
import org.apache.hudi.exception.HoodieNotSupportedException;
|
import org.apache.hudi.exception.HoodieNotSupportedException;
|
||||||
@@ -318,12 +319,12 @@ public class DataSourceUtils {
|
|||||||
|
|
||||||
// Now by default ParquetWriteSupport will write DecimalType to parquet as int32/int64 when the scale of decimalType < Decimal.MAX_LONG_DIGITS(),
|
// Now by default ParquetWriteSupport will write DecimalType to parquet as int32/int64 when the scale of decimalType < Decimal.MAX_LONG_DIGITS(),
|
||||||
// but AvroParquetReader which used by HoodieParquetReader cannot support read int32/int64 as DecimalType.
|
// but AvroParquetReader which used by HoodieParquetReader cannot support read int32/int64 as DecimalType.
|
||||||
// try to find current schema whether contains that DecimalType, and auto set the value of "hoodie.parquet.writeLegacyFormat.enabled"
|
// try to find current schema whether contains that DecimalType, and auto set the value of "hoodie.parquet.writelegacyformat.enabled"
|
||||||
public static void mayBeOverwriteParquetWriteLegacyFormatProp(Map<String, String> properties, StructType schema) {
|
public static void mayBeOverwriteParquetWriteLegacyFormatProp(Map<String, String> properties, StructType schema) {
|
||||||
if (DataTypeUtils.foundSmallPrecisionDecimalType(schema)
|
if (DataTypeUtils.foundSmallPrecisionDecimalType(schema)
|
||||||
&& !Boolean.parseBoolean(properties.getOrDefault("hoodie.parquet.writeLegacyFormat.enabled", "false"))) {
|
&& !Boolean.parseBoolean(properties.getOrDefault(HoodieStorageConfig.PARQUET_WRITE_LEGACY_FORMAT_ENABLED.key(), "false"))) {
|
||||||
properties.put("hoodie.parquet.writeLegacyFormat.enabled", "true");
|
properties.put(HoodieStorageConfig.PARQUET_WRITE_LEGACY_FORMAT_ENABLED.key(), "true");
|
||||||
LOG.warn("Small Decimal Type found in current schema, auto set the value of hoodie.parquet.writeLegacyFormat.enabled to true");
|
LOG.warn("Small Decimal Type found in current schema, auto set the value of hoodie.parquet.writelegacyformat.enabled to true");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -299,18 +299,18 @@ public class TestDataSourceUtils {
|
|||||||
StructType structType = StructType$.MODULE$.apply(structFields);
|
StructType structType = StructType$.MODULE$.apply(structFields);
|
||||||
// create write options
|
// create write options
|
||||||
Map<String, String> options = new HashMap<>();
|
Map<String, String> options = new HashMap<>();
|
||||||
options.put("hoodie.parquet.writeLegacyFormat.enabled", String.valueOf(defaultWriteValue));
|
options.put("hoodie.parquet.writelegacyformat.enabled", String.valueOf(defaultWriteValue));
|
||||||
|
|
||||||
// start test
|
// start test
|
||||||
mayBeOverwriteParquetWriteLegacyFormatProp(options, structType);
|
mayBeOverwriteParquetWriteLegacyFormatProp(options, structType);
|
||||||
|
|
||||||
// check result
|
// check result
|
||||||
boolean res = Boolean.parseBoolean(options.get("hoodie.parquet.writeLegacyFormat.enabled"));
|
boolean res = Boolean.parseBoolean(options.get("hoodie.parquet.writelegacyformat.enabled"));
|
||||||
if (smallDecimal) {
|
if (smallDecimal) {
|
||||||
// should auto modify "hoodie.parquet.writeLegacyFormat.enabled" = "true".
|
// should auto modify "hoodie.parquet.writelegacyformat.enabled" = "true".
|
||||||
assertEquals(true, res);
|
assertEquals(true, res);
|
||||||
} else {
|
} else {
|
||||||
// should not modify the value of "hoodie.parquet.writeLegacyFormat.enabled".
|
// should not modify the value of "hoodie.parquet.writelegacyformat.enabled".
|
||||||
assertEquals(defaultWriteValue, res);
|
assertEquals(defaultWriteValue, res);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -68,7 +68,7 @@ public class DefaultSource extends BaseDefaultSource implements DataSourceV2,
|
|||||||
boolean populateMetaFields = options.getBoolean(HoodieTableConfig.POPULATE_META_FIELDS.key(),
|
boolean populateMetaFields = options.getBoolean(HoodieTableConfig.POPULATE_META_FIELDS.key(),
|
||||||
Boolean.parseBoolean(HoodieTableConfig.POPULATE_META_FIELDS.defaultValue()));
|
Boolean.parseBoolean(HoodieTableConfig.POPULATE_META_FIELDS.defaultValue()));
|
||||||
Map<String, String> properties = options.asMap();
|
Map<String, String> properties = options.asMap();
|
||||||
// Auto set the value of "hoodie.parquet.writeLegacyFormat.enabled"
|
// Auto set the value of "hoodie.parquet.writelegacyformat.enabled"
|
||||||
mayBeOverwriteParquetWriteLegacyFormatProp(properties, schema);
|
mayBeOverwriteParquetWriteLegacyFormatProp(properties, schema);
|
||||||
// 1st arg to createHoodieConfig is not really required to be set. but passing it anyways.
|
// 1st arg to createHoodieConfig is not really required to be set. but passing it anyways.
|
||||||
HoodieWriteConfig config = DataSourceUtils.createHoodieConfig(options.get(HoodieWriteConfig.AVRO_SCHEMA_STRING.key()).get(), path, tblName, properties);
|
HoodieWriteConfig config = DataSourceUtils.createHoodieConfig(options.get(HoodieWriteConfig.AVRO_SCHEMA_STRING.key()).get(), path, tblName, properties);
|
||||||
|
|||||||
@@ -58,7 +58,7 @@ public class DefaultSource extends BaseDefaultSource implements TableProvider {
|
|||||||
Boolean.toString(HoodieInternalConfig.DEFAULT_BULKINSERT_ARE_PARTITIONER_RECORDS_SORTED)));
|
Boolean.toString(HoodieInternalConfig.DEFAULT_BULKINSERT_ARE_PARTITIONER_RECORDS_SORTED)));
|
||||||
// Create a new map as the properties is an unmodifiableMap on Spark 3.2.0
|
// Create a new map as the properties is an unmodifiableMap on Spark 3.2.0
|
||||||
Map<String, String> newProps = new HashMap<>(properties);
|
Map<String, String> newProps = new HashMap<>(properties);
|
||||||
// Auto set the value of "hoodie.parquet.writeLegacyFormat.enabled"
|
// Auto set the value of "hoodie.parquet.writelegacyformat.enabled"
|
||||||
mayBeOverwriteParquetWriteLegacyFormatProp(newProps, schema);
|
mayBeOverwriteParquetWriteLegacyFormatProp(newProps, schema);
|
||||||
// 1st arg to createHoodieConfig is not really required to be set. but passing it anyways.
|
// 1st arg to createHoodieConfig is not really required to be set. but passing it anyways.
|
||||||
HoodieWriteConfig config = DataSourceUtils.createHoodieConfig(newProps.get(HoodieWriteConfig.AVRO_SCHEMA_STRING.key()), path, tblName, newProps);
|
HoodieWriteConfig config = DataSourceUtils.createHoodieConfig(newProps.get(HoodieWriteConfig.AVRO_SCHEMA_STRING.key()), path, tblName, newProps);
|
||||||
|
|||||||
Reference in New Issue
Block a user