1
0

[HUDI-1918] Fix incorrect keyBy field cause serious data skew, to avoid multiple subtasks write to a partition at the same time (#2972)

This commit is contained in:
zhangminglei
2021-05-21 21:59:47 +08:00
committed by GitHub
parent a96034d38d
commit 99b14a78e3

View File

@@ -88,8 +88,8 @@ public class HoodieFlinkStreamer {
.name("kafka_source")
.uid("uid_kafka_source")
.map(new RowDataToHoodieFunction<>(rowType, conf), TypeInformation.of(HoodieRecord.class))
// Key-by partition path, to avoid multiple subtasks write to a partition at the same time
.keyBy(HoodieRecord::getPartitionPath)
// Key-by record key, to avoid multiple subtasks write to a partition at the same time
.keyBy(HoodieRecord::getRecordKey)
.transform(
"bucket_assigner",
TypeInformation.of(HoodieRecord.class),