1
0

[HUDI-1818] Validate required fields for Flink HoodieTable (#2930)

This commit is contained in:
hiscat
2021-05-11 11:11:19 +08:00
committed by GitHub
parent 42ec7e30d7
commit 7a5af806cf
2 changed files with 77 additions and 0 deletions

View File

@@ -40,9 +40,11 @@ import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
/**
* Hoodie data source/sink factory.
@@ -59,6 +61,7 @@ public class HoodieTableFactory implements DynamicTableSourceFactory, DynamicTab
Configuration conf = (Configuration) helper.getOptions();
TableSchema schema = TableSchemaUtils.getPhysicalSchema(context.getCatalogTable().getSchema());
validateRequiredFields(conf, schema);
setupConfOptions(conf, context.getObjectIdentifier().getObjectName(), context.getCatalogTable(), schema);
Path path = new Path(conf.getOptional(FlinkOptions.PATH).orElseThrow(() ->
@@ -75,6 +78,7 @@ public class HoodieTableFactory implements DynamicTableSourceFactory, DynamicTab
public DynamicTableSink createDynamicTableSink(Context context) {
Configuration conf = FlinkOptions.fromMap(context.getCatalogTable().getOptions());
TableSchema schema = TableSchemaUtils.getPhysicalSchema(context.getCatalogTable().getSchema());
validateRequiredFields(conf, schema);
setupConfOptions(conf, context.getObjectIdentifier().getObjectName(), context.getCatalogTable(), schema);
return new HoodieTableSink(conf, schema);
}
@@ -98,6 +102,33 @@ public class HoodieTableFactory implements DynamicTableSourceFactory, DynamicTab
// Utilities
// -------------------------------------------------------------------------
/** Validate required options. e.g record key and pre combine key.
*
* @param conf The table options
* @param schema The table schema
*/
private void validateRequiredFields(Configuration conf, TableSchema schema) {
List<String> fields = Arrays.stream(schema.getFieldNames()).collect(Collectors.toList());
// validate record key in pk absence.
if (!schema.getPrimaryKey().isPresent()) {
Arrays.stream(conf.get(FlinkOptions.RECORD_KEY_FIELD).split(","))
.filter(field -> !fields.contains(field))
.findAny()
.ifPresent(e -> {
throw new ValidationException("The " + e + " field not exists in table schema."
+ "Please define primary key or modify hoodie.datasource.write.recordkey.field option.");
});
}
// validate pre combine key
String preCombineField = conf.get(FlinkOptions.PRECOMBINE_FIELD);
if (!fields.contains(preCombineField)) {
throw new ValidationException("The " + preCombineField + " field not exists in table schema."
+ "Please check write.precombine.field option.");
}
}
/**
* Setup the config options based on the table definition, for e.g the table name, primary key.
*