[HUDI-2632] Schema evolution for flink parquet reader (#3872)
This commit is contained in:
@@ -960,6 +960,44 @@ public class HoodieDataSourceITCase extends AbstractTestBase {
|
||||
assertRowsEquals(result, TestData.dataSetInsert(5, 6));
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@EnumSource(value = HoodieTableType.class)
|
||||
void testReadWithWiderSchema(HoodieTableType tableType) throws Exception {
|
||||
TableEnvironment tableEnv = batchTableEnv;
|
||||
Configuration conf = TestConfigurations.getDefaultConf(tempFile.getAbsolutePath());
|
||||
conf.setString(FlinkOptions.TABLE_NAME, "t1");
|
||||
conf.setString(FlinkOptions.TABLE_TYPE, tableType.name());
|
||||
|
||||
// write a batch of data set
|
||||
TestData.writeData(TestData.DATA_SET_INSERT, conf);
|
||||
|
||||
String hoodieTableDDL = sql("t1")
|
||||
.field("uuid varchar(20)")
|
||||
.field("name varchar(10)")
|
||||
.field("age int")
|
||||
.field("salary double")
|
||||
.field("ts timestamp(3)")
|
||||
.field("`partition` varchar(10)")
|
||||
.pkField("uuid")
|
||||
.option(FlinkOptions.PATH, tempFile.getAbsolutePath())
|
||||
.option(FlinkOptions.TABLE_TYPE, tableType)
|
||||
.end();
|
||||
tableEnv.executeSql(hoodieTableDDL);
|
||||
|
||||
List<Row> result = CollectionUtil.iterableToList(
|
||||
() -> tableEnv.sqlQuery("select * from t1").execute().collect());
|
||||
final String expected = "["
|
||||
+ "+I[id1, Danny, 23, null, 1970-01-01T00:00:00.001, par1], "
|
||||
+ "+I[id2, Stephen, 33, null, 1970-01-01T00:00:00.002, par1], "
|
||||
+ "+I[id3, Julian, 53, null, 1970-01-01T00:00:00.003, par2], "
|
||||
+ "+I[id4, Fabian, 31, null, 1970-01-01T00:00:00.004, par2], "
|
||||
+ "+I[id5, Sophia, 18, null, 1970-01-01T00:00:00.005, par3], "
|
||||
+ "+I[id6, Emma, 20, null, 1970-01-01T00:00:00.006, par3], "
|
||||
+ "+I[id7, Bob, 44, null, 1970-01-01T00:00:00.007, par4], "
|
||||
+ "+I[id8, Han, 56, null, 1970-01-01T00:00:00.008, par4]]";
|
||||
assertRowsEquals(result, expected);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Utilities
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
@@ -25,6 +25,7 @@ import org.apache.hudi.configuration.FlinkOptions;
|
||||
import org.apache.hudi.table.HoodieTableSource;
|
||||
import org.apache.hudi.table.format.cow.CopyOnWriteInputFormat;
|
||||
import org.apache.hudi.table.format.mor.MergeOnReadInputFormat;
|
||||
import org.apache.hudi.util.AvroSchemaConverter;
|
||||
import org.apache.hudi.util.StreamerUtil;
|
||||
import org.apache.hudi.utils.TestConfigurations;
|
||||
import org.apache.hudi.utils.TestData;
|
||||
@@ -445,6 +446,20 @@ public class TestInputFormat {
|
||||
TestData.assertRowDataEquals(actual4, expected4);
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@EnumSource(value = HoodieTableType.class)
|
||||
void testReadWithWiderSchema(HoodieTableType tableType) throws Exception {
|
||||
Map<String, String> options = new HashMap<>();
|
||||
options.put(FlinkOptions.SOURCE_AVRO_SCHEMA.key(),
|
||||
AvroSchemaConverter.convertToSchema(TestConfigurations.ROW_TYPE_WIDER).toString());
|
||||
beforeEach(tableType, options);
|
||||
|
||||
TestData.writeData(TestData.DATA_SET_INSERT, conf);
|
||||
InputFormat<RowData, ?> inputFormat = this.tableSource.getInputFormat();
|
||||
List<RowData> result = readData(inputFormat);
|
||||
TestData.assertRowDataEquals(result, TestData.DATA_SET_INSERT);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Utilities
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
@@ -63,6 +63,17 @@ public class TestConfigurations {
|
||||
private static final List<String> FIELDS = ROW_TYPE.getFields().stream()
|
||||
.map(RowType.RowField::asSummaryString).collect(Collectors.toList());
|
||||
|
||||
public static final DataType ROW_DATA_TYPE_WIDER = DataTypes.ROW(
|
||||
DataTypes.FIELD("uuid", DataTypes.VARCHAR(20)),// record key
|
||||
DataTypes.FIELD("name", DataTypes.VARCHAR(10)),
|
||||
DataTypes.FIELD("age", DataTypes.INT()),
|
||||
DataTypes.FIELD("salary", DataTypes.DOUBLE()),
|
||||
DataTypes.FIELD("ts", DataTypes.TIMESTAMP(3)), // precombine field
|
||||
DataTypes.FIELD("partition", DataTypes.VARCHAR(10)))
|
||||
.notNull();
|
||||
|
||||
public static final RowType ROW_TYPE_WIDER = (RowType) ROW_DATA_TYPE_WIDER.getLogicalType();
|
||||
|
||||
public static String getCreateHoodieTableDDL(String tableName, Map<String, String> options) {
|
||||
return getCreateHoodieTableDDL(tableName, options, true, "partition");
|
||||
}
|
||||
@@ -92,8 +103,9 @@ public class TestConfigurations {
|
||||
if (havePartition) {
|
||||
builder.append("PARTITIONED BY (`").append(partitionField).append("`)\n");
|
||||
}
|
||||
final String connector = options.computeIfAbsent("connector", k -> "hudi");
|
||||
builder.append("with (\n"
|
||||
+ " 'connector' = 'hudi'");
|
||||
+ " 'connector' = '").append(connector).append("'");
|
||||
options.forEach((k, v) -> builder.append(",\n")
|
||||
.append(" '").append(k).append("' = '").append(v).append("'"));
|
||||
builder.append("\n)");
|
||||
@@ -235,6 +247,11 @@ public class TestConfigurations {
|
||||
return this;
|
||||
}
|
||||
|
||||
public Sql options(Map<String, String> options) {
|
||||
this.options.putAll(options);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Sql noPartition() {
|
||||
this.withPartition = false;
|
||||
return this;
|
||||
|
||||
Reference in New Issue
Block a user