1
0

Making DataSource/DeltaStreamer use defaults for combining

- Addresses issue where insert will combine and remove duplicates within batch
 - Setting default insert combining to false (write client default)
 - Set to true if filtering duplicates on insert/bulk_insert
This commit is contained in:
vinothchandar
2019-05-01 05:06:34 -07:00
committed by Balaji Varadarajan
parent ea20d47248
commit 57a8b9cc8c
3 changed files with 12 additions and 11 deletions

View File

@@ -128,12 +128,16 @@ public class DataSourceUtils {
String basePath, String tblName, Map<String, String> parameters) throws Exception {
// inline compaction is on by default for MOR
boolean inlineCompact = parameters.containsKey(DataSourceWriteOptions.STORAGE_TYPE_OPT_KEY())
&& parameters.get(DataSourceWriteOptions.STORAGE_TYPE_OPT_KEY()).equals(DataSourceWriteOptions
.MOR_STORAGE_TYPE_OPT_VAL());
boolean inlineCompact = parameters.get(DataSourceWriteOptions.STORAGE_TYPE_OPT_KEY())
.equals(DataSourceWriteOptions.MOR_STORAGE_TYPE_OPT_VAL());
HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().combineInput(true, true)
// insert/bulk-insert combining to be true, if filtering for duplicates
boolean combineInserts = Boolean.parseBoolean(parameters.get(
DataSourceWriteOptions.INSERT_DROP_DUPS_OPT_KEY()));
HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder()
.withPath(basePath).withAutoCommit(false)
.combineInput(combineInserts, true)
.withSchema(schemaStr).forTable(tblName).withIndexConfig(
HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build())
.withCompactionConfig(HoodieCompactionConfig.newBuilder()