From 99b14a78e38b081efbdda59243626c244335ea7c Mon Sep 17 00:00:00 2001 From: zhangminglei <18717838093@126.com> Date: Fri, 21 May 2021 21:59:47 +0800 Subject: [PATCH] [HUDI-1918] Fix incorrect keyBy field cause serious data skew, to avoid multiple subtasks write to a partition at the same time (#2972) --- .../java/org/apache/hudi/streamer/HoodieFlinkStreamer.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hudi-flink/src/main/java/org/apache/hudi/streamer/HoodieFlinkStreamer.java b/hudi-flink/src/main/java/org/apache/hudi/streamer/HoodieFlinkStreamer.java index 88ab2b658..bd485634d 100644 --- a/hudi-flink/src/main/java/org/apache/hudi/streamer/HoodieFlinkStreamer.java +++ b/hudi-flink/src/main/java/org/apache/hudi/streamer/HoodieFlinkStreamer.java @@ -88,8 +88,8 @@ public class HoodieFlinkStreamer { .name("kafka_source") .uid("uid_kafka_source") .map(new RowDataToHoodieFunction<>(rowType, conf), TypeInformation.of(HoodieRecord.class)) - // Key-by partition path, to avoid multiple subtasks write to a partition at the same time - .keyBy(HoodieRecord::getPartitionPath) + // Key-by record key, to avoid multiple subtasks write to a partition at the same time + .keyBy(HoodieRecord::getRecordKey) .transform( "bucket_assigner", TypeInformation.of(HoodieRecord.class),