From 4f217fe718b0b4e9656c2a45f7b89cb5df15a4f2 Mon Sep 17 00:00:00 2001 From: Sivabalan Narayanan Date: Fri, 12 Nov 2021 07:29:37 -0500 Subject: [PATCH] [HUDI-2151] Part1 Setting default parallelism to 200 for some of write configs (#3948) --- .../java/org/apache/hudi/config/HoodieWriteConfig.java | 10 +++++----- .../hudi/common/config/HoodieMetadataConfig.java | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java index 4eaff7e5f..e167b12c8 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java @@ -159,12 +159,12 @@ public class HoodieWriteConfig extends HoodieConfig { public static final ConfigProperty INSERT_PARALLELISM_VALUE = ConfigProperty .key("hoodie.insert.shuffle.parallelism") - .defaultValue("1500") + .defaultValue("200") .withDocumentation("Parallelism for inserting records into the table. Inserts can shuffle data before writing to tune file sizes and optimize the storage layout."); public static final ConfigProperty BULKINSERT_PARALLELISM_VALUE = ConfigProperty .key("hoodie.bulkinsert.shuffle.parallelism") - .defaultValue("1500") + .defaultValue("200") .withDocumentation("For large initial imports using bulk_insert operation, controls the parallelism to use for sort modes or custom partitioning done" + "before writing records to the table."); @@ -183,13 +183,13 @@ public class HoodieWriteConfig extends HoodieConfig { public static final ConfigProperty UPSERT_PARALLELISM_VALUE = ConfigProperty .key("hoodie.upsert.shuffle.parallelism") - .defaultValue("1500") + .defaultValue("200") .withDocumentation("Parallelism to use for upsert operation on the table. Upserts can shuffle data to perform index lookups, file sizing, bin packing records optimally" + "into file groups."); public static final ConfigProperty DELETE_PARALLELISM_VALUE = ConfigProperty .key("hoodie.delete.shuffle.parallelism") - .defaultValue("1500") + .defaultValue("200") .withDocumentation("Parallelism used for “delete” operation. Delete operations also performs shuffles, similar to upsert operation."); public static final ConfigProperty ROLLBACK_PARALLELISM_VALUE = ConfigProperty @@ -241,7 +241,7 @@ public class HoodieWriteConfig extends HoodieConfig { public static final ConfigProperty FINALIZE_WRITE_PARALLELISM_VALUE = ConfigProperty .key("hoodie.finalize.write.parallelism") - .defaultValue("1500") + .defaultValue("200") .withDocumentation("Parallelism for the write finalization internal operation, which involves removing any partially written " + "files from lake storage, before committing the write. Reduce this value, if the high number of tasks incur delays for smaller tables " + "or low latency writes."); diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java index d52629440..75d014521 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java @@ -111,7 +111,7 @@ public final class HoodieMetadataConfig extends HoodieConfig { public static final ConfigProperty FILE_LISTING_PARALLELISM_VALUE = ConfigProperty .key("hoodie.file.listing.parallelism") - .defaultValue(1500) + .defaultValue(200) .sinceVersion("0.7.0") .withDocumentation("Parallelism to use, when listing the table on lake storage.");