1
0

[HUDI-1991] Fixing drop dups exception in bulk insert row writer path (#3055)

This commit is contained in:
Sivabalan Narayanan
2021-06-13 21:55:52 -04:00
committed by GitHub
parent 6e78682cea
commit 7d9f9d7d82
2 changed files with 47 additions and 11 deletions

View File

@@ -333,6 +333,9 @@ object HoodieSparkSqlWriter {
val schema = AvroConversionUtils.convertStructTypeToAvroSchema(df.schema, structName, nameSpace)
sparkContext.getConf.registerAvroSchemas(schema)
log.info(s"Registered avro schema : ${schema.toString(true)}")
if (parameters(INSERT_DROP_DUPS_OPT_KEY).toBoolean) {
throw new HoodieException("Dropping duplicates with bulk_insert in row writer path is not supported yet")
}
val params = parameters.updated(HoodieWriteConfig.AVRO_SCHEMA, schema.toString)
val writeConfig = DataSourceUtils.createHoodieConfig(schema.toString, path.get, tblName, mapAsJavaMap(params))
val hoodieDF = HoodieDatasetBulkInsertHelper.prepareHoodieDatasetForBulkInsert(sqlContext, writeConfig, df, structName, nameSpace)