[HUDI-2255] Refactor Datasource options (#3373)
Co-authored-by: Wenning Ding <wenningd@amazon.com>
This commit is contained in:
@@ -79,8 +79,8 @@ public class ValidateDatasetNode extends DagNode<Boolean> {
|
||||
log.debug("Listing all Micro batches to be validated :: " + fileStatus.getPath().toString());
|
||||
}
|
||||
|
||||
String recordKeyField = context.getWriterContext().getProps().getString(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY().key());
|
||||
String partitionPathField = context.getWriterContext().getProps().getString(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY().key());
|
||||
String recordKeyField = context.getWriterContext().getProps().getString(DataSourceWriteOptions.RECORDKEY_FIELD().key());
|
||||
String partitionPathField = context.getWriterContext().getProps().getString(DataSourceWriteOptions.PARTITIONPATH_FIELD().key());
|
||||
// todo: fix hard coded fields from configs.
|
||||
// read input and resolve insert, updates, etc.
|
||||
Dataset<Row> inputDf = session.read().format("avro").load(inputPath);
|
||||
@@ -112,8 +112,8 @@ public class ValidateDatasetNode extends DagNode<Boolean> {
|
||||
}
|
||||
|
||||
if (config.isValidateHive()) {
|
||||
String database = context.getWriterContext().getProps().getString(DataSourceWriteOptions.HIVE_DATABASE_OPT_KEY().key());
|
||||
String tableName = context.getWriterContext().getProps().getString(DataSourceWriteOptions.HIVE_TABLE_OPT_KEY().key());
|
||||
String database = context.getWriterContext().getProps().getString(DataSourceWriteOptions.HIVE_DATABASE().key());
|
||||
String tableName = context.getWriterContext().getProps().getString(DataSourceWriteOptions.HIVE_TABLE().key());
|
||||
log.warn("Validating hive table with db : " + database + " and table : " + tableName);
|
||||
Dataset<Row> cowDf = session.sql("SELECT * FROM " + database + "." + tableName);
|
||||
Dataset<Row> trimmedCowDf = cowDf.drop(HoodieRecord.COMMIT_TIME_METADATA_FIELD).drop(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD).drop(HoodieRecord.RECORD_KEY_METADATA_FIELD)
|
||||
|
||||
@@ -53,11 +53,11 @@ class SparkBulkInsertNode(config1: Config) extends DagNode[RDD[WriteStatus]] {
|
||||
val saveMode = if(curItrCount == 0) SaveMode.Overwrite else SaveMode.Append
|
||||
inputDF.write.format("hudi")
|
||||
.options(DataSourceWriteOptions.translateSqlOptions(context.getWriterContext.getProps.asScala.toMap))
|
||||
.option(DataSourceWriteOptions.TABLE_NAME_OPT_KEY.key(), context.getHoodieTestSuiteWriter.getCfg.targetTableName)
|
||||
.option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key(), context.getHoodieTestSuiteWriter.getCfg.tableType)
|
||||
.option(DataSourceWriteOptions.OPERATION_OPT_KEY.key(), DataSourceWriteOptions.BULK_INSERT_OPERATION_OPT_VAL)
|
||||
.option(DataSourceWriteOptions.ENABLE_ROW_WRITER_OPT_KEY.key(), String.valueOf(config.enableRowWriting()))
|
||||
.option(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX_OPT_KEY.key(), "deltastreamer.checkpoint.key")
|
||||
.option(DataSourceWriteOptions.TABLE_NAME.key(), context.getHoodieTestSuiteWriter.getCfg.targetTableName)
|
||||
.option(DataSourceWriteOptions.TABLE_TYPE.key(), context.getHoodieTestSuiteWriter.getCfg.tableType)
|
||||
.option(DataSourceWriteOptions.OPERATION.key(), DataSourceWriteOptions.BULK_INSERT_OPERATION_OPT_VAL)
|
||||
.option(DataSourceWriteOptions.ENABLE_ROW_WRITER.key(), String.valueOf(config.enableRowWriting()))
|
||||
.option(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX.key(), "deltastreamer.checkpoint.key")
|
||||
.option("deltastreamer.checkpoint.key", context.getWriterContext.getHoodieTestSuiteWriter.getLastCheckpoint.orElse(""))
|
||||
.option(HoodieWriteConfig.TABLE_NAME.key(), context.getHoodieTestSuiteWriter.getCfg.targetTableName)
|
||||
.mode(saveMode)
|
||||
|
||||
@@ -53,10 +53,10 @@ class SparkInsertNode(config1: Config) extends DagNode[RDD[WriteStatus]] {
|
||||
context.getWriterContext.getSparkSession)
|
||||
inputDF.write.format("hudi")
|
||||
.options(DataSourceWriteOptions.translateSqlOptions(context.getWriterContext.getProps.asScala.toMap))
|
||||
.option(DataSourceWriteOptions.TABLE_NAME_OPT_KEY.key, context.getHoodieTestSuiteWriter.getCfg.targetTableName)
|
||||
.option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key, context.getHoodieTestSuiteWriter.getCfg.tableType)
|
||||
.option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL)
|
||||
.option(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX_OPT_KEY.key, "deltastreamer.checkpoint.key")
|
||||
.option(DataSourceWriteOptions.TABLE_NAME.key, context.getHoodieTestSuiteWriter.getCfg.targetTableName)
|
||||
.option(DataSourceWriteOptions.TABLE_TYPE.key, context.getHoodieTestSuiteWriter.getCfg.tableType)
|
||||
.option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL)
|
||||
.option(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX.key, "deltastreamer.checkpoint.key")
|
||||
.option("deltastreamer.checkpoint.key", context.getWriterContext.getHoodieTestSuiteWriter.getLastCheckpoint.orElse(""))
|
||||
.option(HoodieWriteConfig.TABLE_NAME.key, context.getHoodieTestSuiteWriter.getCfg.targetTableName)
|
||||
.mode(SaveMode.Overwrite)
|
||||
|
||||
@@ -53,10 +53,10 @@ class SparkUpsertNode(config1: Config) extends DagNode[RDD[WriteStatus]] {
|
||||
context.getWriterContext.getSparkSession)
|
||||
inputDF.write.format("hudi")
|
||||
.options(DataSourceWriteOptions.translateSqlOptions(context.getWriterContext.getProps.asScala.toMap))
|
||||
.option(DataSourceWriteOptions.TABLE_NAME_OPT_KEY.key, context.getHoodieTestSuiteWriter.getCfg.targetTableName)
|
||||
.option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key, context.getHoodieTestSuiteWriter.getCfg.tableType)
|
||||
.option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL)
|
||||
.option(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX_OPT_KEY.key, "deltastreamer.checkpoint.key")
|
||||
.option(DataSourceWriteOptions.TABLE_NAME.key, context.getHoodieTestSuiteWriter.getCfg.targetTableName)
|
||||
.option(DataSourceWriteOptions.TABLE_TYPE.key, context.getHoodieTestSuiteWriter.getCfg.tableType)
|
||||
.option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL)
|
||||
.option(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX.key, "deltastreamer.checkpoint.key")
|
||||
.option("deltastreamer.checkpoint.key", context.getWriterContext.getHoodieTestSuiteWriter.getLastCheckpoint.orElse(""))
|
||||
.option(HoodieWriteConfig.TABLE_NAME.key, context.getHoodieTestSuiteWriter.getCfg.targetTableName)
|
||||
.mode(SaveMode.Append)
|
||||
|
||||
Reference in New Issue
Block a user