1
0

[HUDI-3204] Fixing partition-values being derived from partition-path instead of source columns (#5364)

- Scaffolded `Spark24HoodieParquetFileFormat` extending `ParquetFileFormat` and overriding the behavior of adding partition columns to every row
 - Amended `SparkAdapter`s `createHoodieParquetFileFormat` API to be able to configure whether to append partition values or not
 - Fallback to append partition values in cases when the source columns are not persisted in data-file
 - Fixing HoodieBaseRelation incorrectly handling mandatory columns
This commit is contained in:
Alexey Kudinkin
2022-04-20 04:30:27 -07:00
committed by GitHub
parent 408663c42b
commit f7544e23ac
28 changed files with 1156 additions and 686 deletions

View File

@@ -20,6 +20,7 @@ package org.apache.hudi.common.table;
import org.apache.avro.Schema;
import org.apache.hudi.avro.AvroSchemaUtils;
import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
import org.apache.hudi.common.util.Option;
@@ -57,7 +58,7 @@ public class TestTableSchemaResolver {
assertNotEquals(originSchema, s4);
assertTrue(s4.getFields().stream().anyMatch(f -> f.name().equals("user_partition")));
Schema.Field f = s4.getField("user_partition");
assertEquals(f.schema().getType().getName(), "string");
assertEquals(f.schema(), AvroSchemaUtils.createNullableSchema(Schema.Type.STRING));
// case5: user_partition is in originSchema, but partition_path is in originSchema
String[] pts4 = {"user_partition", "partition_path"};