1
0

[HUDI-4320] Make sure HoodieStorageConfig.PARQUET_WRITE_LEGACY_FORMAT_ENABLED could be specified by the writer (#5970)

Fixed sequence determining whether Parquet's legacy-format writing property should be overridden to only kick in when it has not been explicitly specified by the caller
This commit is contained in:
Alexey Kudinkin
2022-06-28 12:27:32 -07:00
committed by GitHub
parent efb9719018
commit ed823f1c6f
5 changed files with 89 additions and 54 deletions

View File

@@ -34,7 +34,7 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap;
import java.util.HashMap;
import java.util.Map;
import static org.apache.hudi.DataSourceUtils.mayBeOverwriteParquetWriteLegacyFormatProp;
import static org.apache.hudi.DataSourceUtils.tryOverrideParquetWriteLegacyFormatProperty;
/**
* DataSource V2 implementation for managing internal write logic. Only called internally.
@@ -59,7 +59,7 @@ public class DefaultSource extends BaseDefaultSource implements TableProvider {
// Create a new map as the properties is an unmodifiableMap on Spark 3.2.0
Map<String, String> newProps = new HashMap<>(properties);
// Auto set the value of "hoodie.parquet.writelegacyformat.enabled"
mayBeOverwriteParquetWriteLegacyFormatProp(newProps, schema);
tryOverrideParquetWriteLegacyFormatProperty(newProps, schema);
// 1st arg to createHoodieConfig is not really required to be set. but passing it anyways.
HoodieWriteConfig config = DataSourceUtils.createHoodieConfig(newProps.get(HoodieWriteConfig.AVRO_SCHEMA_STRING.key()), path, tblName, newProps);
return new HoodieDataSourceInternalTable(instantTime, config, schema, getSparkSession(),