1
0

Making DataSource/DeltaStreamer use defaults for combining

- Addresses issue where insert will combine and remove duplicates within batch
 - Setting default insert combining to false (write client default)
 - Set to true if filtering duplicates on insert/bulk_insert
This commit is contained in:
vinothchandar
2019-05-01 05:06:34 -07:00
committed by Balaji Varadarajan
parent ea20d47248
commit 57a8b9cc8c
3 changed files with 12 additions and 11 deletions

View File

@@ -138,10 +138,7 @@ private[hoodie] object HoodieSparkSqlWriter {
}
// Create a HoodieWriteClient & issue the write.
val client = DataSourceUtils.createHoodieClient(jsc,
schema.toString,
path.get,
tblName.get,
val client = DataSourceUtils.createHoodieClient(jsc, schema.toString, path.get, tblName.get,
mapAsJavaMap(parameters)
)
val commitTime = client.startCommit()
@@ -257,4 +254,4 @@ private[hoodie] object HoodieSparkSqlWriter {
hiveSyncConfig.partitionValueExtractorClass = parameters(HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY)
hiveSyncConfig
}
}
}