Making DataSource/DeltaStreamer use defaults for combining
- Addresses issue where insert will combine and remove duplicates within batch - Setting default insert combining to false (write client default) - Set to true if filtering duplicates on insert/bulk_insert
This commit is contained in:
committed by
Balaji Varadarajan
parent
ea20d47248
commit
57a8b9cc8c
@@ -326,8 +326,8 @@ public class HoodieDeltaStreamer implements Serializable {
|
||||
|
||||
private HoodieWriteConfig getHoodieClientConfig(SchemaProvider schemaProvider) {
|
||||
HoodieWriteConfig.Builder builder =
|
||||
HoodieWriteConfig.newBuilder().combineInput(true, true).withPath(cfg.targetBasePath)
|
||||
.withAutoCommit(false)
|
||||
HoodieWriteConfig.newBuilder().withPath(cfg.targetBasePath)
|
||||
.withAutoCommit(false).combineInput(cfg.filterDupes, true)
|
||||
.withCompactionConfig(HoodieCompactionConfig.newBuilder()
|
||||
.withPayloadClass(cfg.payloadClassName)
|
||||
// turn on inline compaction by default, for MOR tables
|
||||
|
||||
Reference in New Issue
Block a user