perf(executor-task): 减少数据流转

通过设置相同的并行度,让数据读取和数据过滤合并到一个算子里,避免大表base文件扫描失败
This commit is contained in:
v-zhangjc9
2024-05-13 08:44:40 +08:00
parent 80fae0be38
commit e5f945c74b
4 changed files with 223 additions and 6 deletions

View File

@@ -78,8 +78,7 @@ public class DataScanner {
DataStream<String> stream = filterKeys.apply(
environment
.fromSource(new ReadPulsarSource(taskContext, pulsarUrl, pulsarTopic), WatermarkStrategy.noWatermarks(), "Read pulsar")
.setParallelism(totalParallelism)
.disableChaining(),
.setParallelism(totalParallelism),
totalParallelism
);
if (ObjectUtil.isNull(source)) {
@@ -110,7 +109,8 @@ public class DataScanner {
.flatMap(new ReadHudiFile(filterFields))
.name("Read log file")
.setParallelism(parallelism),
parallelismPredict.apply(totalParallelism)
// parallelismPredict.apply(totalParallelism)
totalParallelism
);
if (ObjectUtil.isNull(source)) {
source = stream;
@@ -130,7 +130,8 @@ public class DataScanner {
.flatMap(new ReadHudiFile(filterFields))
.name("Read base file")
.setParallelism(parallelism),
parallelismPredict.apply(totalParallelism)
// parallelismPredict.apply(totalParallelism)
totalParallelism
);
if (ObjectUtil.isNull(source)) {
source = stream;