1
0

Making DataSource/DeltaStreamer use defaults for combining

- Addresses issue where insert will combine and remove duplicates within batch
 - Setting default insert combining to false (write client default)
 - Set to true if filtering duplicates on insert/bulk_insert
This commit is contained in:
vinothchandar
2019-05-01 05:06:34 -07:00
committed by Balaji Varadarajan
parent ea20d47248
commit 57a8b9cc8c
3 changed files with 12 additions and 11 deletions

View File

@@ -326,8 +326,8 @@ public class HoodieDeltaStreamer implements Serializable {
private HoodieWriteConfig getHoodieClientConfig(SchemaProvider schemaProvider) {
HoodieWriteConfig.Builder builder =
HoodieWriteConfig.newBuilder().combineInput(true, true).withPath(cfg.targetBasePath)
.withAutoCommit(false)
HoodieWriteConfig.newBuilder().withPath(cfg.targetBasePath)
.withAutoCommit(false).combineInput(cfg.filterDupes, true)
.withCompactionConfig(HoodieCompactionConfig.newBuilder()
.withPayloadClass(cfg.payloadClassName)
// turn on inline compaction by default, for MOR tables