1
0

[HUDI-2632] Schema evolution for flink parquet reader (#3872)

This commit is contained in:
Danny Chan
2021-10-27 20:00:24 +08:00
committed by GitHub
parent ae000795d7
commit 909c3ba45e
6 changed files with 131 additions and 28 deletions

View File

@@ -960,6 +960,44 @@ public class HoodieDataSourceITCase extends AbstractTestBase {
assertRowsEquals(result, TestData.dataSetInsert(5, 6));
}
@ParameterizedTest
@EnumSource(value = HoodieTableType.class)
void testReadWithWiderSchema(HoodieTableType tableType) throws Exception {
TableEnvironment tableEnv = batchTableEnv;
Configuration conf = TestConfigurations.getDefaultConf(tempFile.getAbsolutePath());
conf.setString(FlinkOptions.TABLE_NAME, "t1");
conf.setString(FlinkOptions.TABLE_TYPE, tableType.name());
// write a batch of data set
TestData.writeData(TestData.DATA_SET_INSERT, conf);
String hoodieTableDDL = sql("t1")
.field("uuid varchar(20)")
.field("name varchar(10)")
.field("age int")
.field("salary double")
.field("ts timestamp(3)")
.field("`partition` varchar(10)")
.pkField("uuid")
.option(FlinkOptions.PATH, tempFile.getAbsolutePath())
.option(FlinkOptions.TABLE_TYPE, tableType)
.end();
tableEnv.executeSql(hoodieTableDDL);
List<Row> result = CollectionUtil.iterableToList(
() -> tableEnv.sqlQuery("select * from t1").execute().collect());
final String expected = "["
+ "+I[id1, Danny, 23, null, 1970-01-01T00:00:00.001, par1], "
+ "+I[id2, Stephen, 33, null, 1970-01-01T00:00:00.002, par1], "
+ "+I[id3, Julian, 53, null, 1970-01-01T00:00:00.003, par2], "
+ "+I[id4, Fabian, 31, null, 1970-01-01T00:00:00.004, par2], "
+ "+I[id5, Sophia, 18, null, 1970-01-01T00:00:00.005, par3], "
+ "+I[id6, Emma, 20, null, 1970-01-01T00:00:00.006, par3], "
+ "+I[id7, Bob, 44, null, 1970-01-01T00:00:00.007, par4], "
+ "+I[id8, Han, 56, null, 1970-01-01T00:00:00.008, par4]]";
assertRowsEquals(result, expected);
}
// -------------------------------------------------------------------------
// Utilities
// -------------------------------------------------------------------------

View File

@@ -25,6 +25,7 @@ import org.apache.hudi.configuration.FlinkOptions;
import org.apache.hudi.table.HoodieTableSource;
import org.apache.hudi.table.format.cow.CopyOnWriteInputFormat;
import org.apache.hudi.table.format.mor.MergeOnReadInputFormat;
import org.apache.hudi.util.AvroSchemaConverter;
import org.apache.hudi.util.StreamerUtil;
import org.apache.hudi.utils.TestConfigurations;
import org.apache.hudi.utils.TestData;
@@ -445,6 +446,20 @@ public class TestInputFormat {
TestData.assertRowDataEquals(actual4, expected4);
}
@ParameterizedTest
@EnumSource(value = HoodieTableType.class)
void testReadWithWiderSchema(HoodieTableType tableType) throws Exception {
Map<String, String> options = new HashMap<>();
options.put(FlinkOptions.SOURCE_AVRO_SCHEMA.key(),
AvroSchemaConverter.convertToSchema(TestConfigurations.ROW_TYPE_WIDER).toString());
beforeEach(tableType, options);
TestData.writeData(TestData.DATA_SET_INSERT, conf);
InputFormat<RowData, ?> inputFormat = this.tableSource.getInputFormat();
List<RowData> result = readData(inputFormat);
TestData.assertRowDataEquals(result, TestData.DATA_SET_INSERT);
}
// -------------------------------------------------------------------------
// Utilities
// -------------------------------------------------------------------------

View File

@@ -63,6 +63,17 @@ public class TestConfigurations {
private static final List<String> FIELDS = ROW_TYPE.getFields().stream()
.map(RowType.RowField::asSummaryString).collect(Collectors.toList());
public static final DataType ROW_DATA_TYPE_WIDER = DataTypes.ROW(
DataTypes.FIELD("uuid", DataTypes.VARCHAR(20)),// record key
DataTypes.FIELD("name", DataTypes.VARCHAR(10)),
DataTypes.FIELD("age", DataTypes.INT()),
DataTypes.FIELD("salary", DataTypes.DOUBLE()),
DataTypes.FIELD("ts", DataTypes.TIMESTAMP(3)), // precombine field
DataTypes.FIELD("partition", DataTypes.VARCHAR(10)))
.notNull();
public static final RowType ROW_TYPE_WIDER = (RowType) ROW_DATA_TYPE_WIDER.getLogicalType();
public static String getCreateHoodieTableDDL(String tableName, Map<String, String> options) {
return getCreateHoodieTableDDL(tableName, options, true, "partition");
}
@@ -92,8 +103,9 @@ public class TestConfigurations {
if (havePartition) {
builder.append("PARTITIONED BY (`").append(partitionField).append("`)\n");
}
final String connector = options.computeIfAbsent("connector", k -> "hudi");
builder.append("with (\n"
+ " 'connector' = 'hudi'");
+ " 'connector' = '").append(connector).append("'");
options.forEach((k, v) -> builder.append(",\n")
.append(" '").append(k).append("' = '").append(v).append("'"));
builder.append("\n)");
@@ -235,6 +247,11 @@ public class TestConfigurations {
return this;
}
public Sql options(Map<String, String> options) {
this.options.putAll(options);
return this;
}
public Sql noPartition() {
this.withPartition = false;
return this;