feat(executor-task): 完成动态调整的pulsar扫描
对于pulsar在时间段上资源分配不均,通过pulsar reader中重新分配split解决,保证整个读取的流程中都能尽可能利用多线程的优势
This commit is contained in:
200
.idea/httpRequests/http-requests-log.http
generated
200
.idea/httpRequests/http-requests-log.http
generated
@@ -1,3 +1,103 @@
|
|||||||
|
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s10.hdp.dc:31719/task/law_enforcement?pulsar_url=pulsar://132.122.115.158:16650,132.122.115.159:16650,132.122.115.160:16650,132.122.115.161:16650,132.122.115.167:16650,132.122.115.168:16650&pulsar_topic=persistent://odcp/acct_sz/acct_item_755&start_time=1716858000000&end_time=1716861600000&primary_keys=ACCT_ITEM_ID&partition_keys=ACCT_ID
|
||||||
|
Connection: Keep-Alive
|
||||||
|
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.10)
|
||||||
|
Cookie: JSESSIONID=F957FA9C224455994EAC8E1880C973EC
|
||||||
|
Accept-Encoding: br,deflate,gzip,x-gzip
|
||||||
|
|
||||||
|
<> 2024-06-05T162824.200.txt
|
||||||
|
|
||||||
|
###
|
||||||
|
|
||||||
|
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s10.hdp.dc:31719/task/law_enforcement?pulsar_url=pulsar://132.122.115.158:16650,132.122.115.159:16650,132.122.115.160:16650,132.122.115.161:16650,132.122.115.167:16650,132.122.115.168:16650&pulsar_topic=persistent://odcp/acct_sz/acct_item_755&start_time=1716858000000&end_time=1716861600000&primary_keys=ACCT_ITEM_ID&partition_keys=ACCT_ID
|
||||||
|
Connection: Keep-Alive
|
||||||
|
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.10)
|
||||||
|
Cookie: JSESSIONID=F957FA9C224455994EAC8E1880C973EC
|
||||||
|
Accept-Encoding: br,deflate,gzip,x-gzip
|
||||||
|
|
||||||
|
<> 2024-06-05T161759.200.txt
|
||||||
|
|
||||||
|
###
|
||||||
|
|
||||||
|
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s10.hdp.dc:31719/task/law_enforcement?pulsar_url=pulsar://132.122.115.158:16650,132.122.115.159:16650,132.122.115.160:16650,132.122.115.161:16650,132.122.115.167:16650,132.122.115.168:16650&pulsar_topic=persistent://odcp/acct_sz/acct_item_755&start_time=1716858000000&end_time=1716861600000&primary_keys=ACCT_ITEM_ID&partition_keys=ACCT_ID
|
||||||
|
Connection: Keep-Alive
|
||||||
|
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.10)
|
||||||
|
Cookie: JSESSIONID=87BC302EE6B883F529C227E8450A4153
|
||||||
|
Accept-Encoding: br,deflate,gzip,x-gzip
|
||||||
|
|
||||||
|
<> 2024-06-05T155607.200.txt
|
||||||
|
|
||||||
|
###
|
||||||
|
|
||||||
|
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s10.hdp.dc:31719/task/law_enforcement?pulsar_url=pulsar://132.122.115.158:16650,132.122.115.159:16650,132.122.115.160:16650,132.122.115.161:16650,132.122.115.167:16650,132.122.115.168:16650&pulsar_topic=persistent://odcp/acct_sz/acct_item_755&start_time=1716858000000&end_time=1716861600000&primary_keys=ACCT_ITEM_ID&partition_keys=ACCT_ID
|
||||||
|
Connection: Keep-Alive
|
||||||
|
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.10)
|
||||||
|
Cookie: JSESSIONID=87BC302EE6B883F529C227E8450A4153
|
||||||
|
Accept-Encoding: br,deflate,gzip,x-gzip
|
||||||
|
|
||||||
|
<> 2024-06-05T141449.200.txt
|
||||||
|
|
||||||
|
###
|
||||||
|
|
||||||
|
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s10.hdp.dc:31719/task/law_enforcement?pulsar_url=pulsar://132.122.115.158:16650,132.122.115.159:16650,132.122.115.160:16650,132.122.115.161:16650,132.122.115.167:16650,132.122.115.168:16650&pulsar_topic=persistent://odcp/acct_sz/acct_item_755&start_time=1716858000000&end_time=1716861600000&primary_keys=ACCT_ITEM_ID&partition_keys=ACCT_ID
|
||||||
|
Connection: Keep-Alive
|
||||||
|
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.10)
|
||||||
|
Cookie: JSESSIONID=5B43FB1FE3B1826286434211887CF8F3
|
||||||
|
Accept-Encoding: br,deflate,gzip,x-gzip
|
||||||
|
|
||||||
|
<> 2024-06-05T140729.200.txt
|
||||||
|
|
||||||
|
###
|
||||||
|
|
||||||
|
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s10.hdp.dc:31719/task/law_enforcement?pulsar_url=pulsar://132.122.115.158:16650,132.122.115.159:16650,132.122.115.160:16650,132.122.115.161:16650,132.122.115.167:16650,132.122.115.168:16650&pulsar_topic=persistent://odcp/acct_sz/acct_item_755&start_time=1716858000000&end_time=1716861600000&primary_keys=ACCT_ITEM_ID&partition_keys=ACCT_ID
|
||||||
|
Connection: Keep-Alive
|
||||||
|
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.10)
|
||||||
|
Cookie: JSESSIONID=5B43FB1FE3B1826286434211887CF8F3
|
||||||
|
Accept-Encoding: br,deflate,gzip,x-gzip
|
||||||
|
|
||||||
|
<> 2024-06-05T112612.200.txt
|
||||||
|
|
||||||
|
###
|
||||||
|
|
||||||
|
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s10.hdp.dc:31719/task/law_enforcement?pulsar_url=pulsar://132.122.115.158:16650,132.122.115.159:16650,132.122.115.160:16650,132.122.115.161:16650,132.122.115.167:16650,132.122.115.168:16650&pulsar_topic=persistent://odcp/acct_sz/acct_item_755&start_time=1716858000000&end_time=1716861600000&primary_keys=ACCT_ITEM_ID&partition_keys=ACCT_ID
|
||||||
|
Connection: Keep-Alive
|
||||||
|
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.10)
|
||||||
|
Cookie: JSESSIONID=5B43FB1FE3B1826286434211887CF8F3
|
||||||
|
Accept-Encoding: br,deflate,gzip,x-gzip
|
||||||
|
|
||||||
|
<> 2024-06-05T111740.200.txt
|
||||||
|
|
||||||
|
###
|
||||||
|
|
||||||
|
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s10.hdp.dc:31719/task/law_enforcement?pulsar_url=pulsar://132.122.115.158:16650,132.122.115.159:16650,132.122.115.160:16650,132.122.115.161:16650,132.122.115.167:16650,132.122.115.168:16650&pulsar_topic=persistent://odcp/acct_sz/acct_item_755&start_time=1716858000000&end_time=1716861600000&primary_keys=ACCT_ITEM_ID&partition_keys=ACCT_ID
|
||||||
|
Connection: Keep-Alive
|
||||||
|
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.10)
|
||||||
|
Cookie: JSESSIONID=5B43FB1FE3B1826286434211887CF8F3
|
||||||
|
Accept-Encoding: br,deflate,gzip,x-gzip
|
||||||
|
|
||||||
|
<> 2024-06-05T111548.200.txt
|
||||||
|
|
||||||
|
###
|
||||||
|
|
||||||
|
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s10.hdp.dc:31719/task/law_enforcement?pulsar_url=pulsar://132.122.115.158:16650,132.122.115.159:16650,132.122.115.160:16650,132.122.115.161:16650,132.122.115.167:16650,132.122.115.168:16650&pulsar_topic=persistent://odcp/acct_sz/acct_item_755&start_time=1716858000000&end_time=1716861600000&primary_keys=ACCT_ITEM_ID&partition_keys=ACCT_ID
|
||||||
|
Connection: Keep-Alive
|
||||||
|
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.10)
|
||||||
|
Cookie: JSESSIONID=D47906EE784653B11896CB69773F0979
|
||||||
|
Accept-Encoding: br,deflate,gzip,x-gzip
|
||||||
|
|
||||||
|
<> 2024-06-05T110457.200.txt
|
||||||
|
|
||||||
|
###
|
||||||
|
|
||||||
|
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s10.hdp.dc:31719/task/law_enforcement?pulsar_url=pulsar://132.122.115.158:16650,132.122.115.159:16650,132.122.115.160:16650,132.122.115.161:16650,132.122.115.167:16650,132.122.115.168:16650&pulsar_topic=persistent://odcp/acct_sz/acct_item_755&start_time=1716858000000&end_time=1716861600000&primary_keys=ACCT_ITEM_ID&partition_keys=ACCT_ID
|
||||||
|
Connection: Keep-Alive
|
||||||
|
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.10)
|
||||||
|
Cookie: JSESSIONID=D47906EE784653B11896CB69773F0979
|
||||||
|
Accept-Encoding: br,deflate,gzip,x-gzip
|
||||||
|
|
||||||
|
<> 2024-06-05T094747.200.txt
|
||||||
|
|
||||||
|
###
|
||||||
|
|
||||||
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s10.hdp.dc:33535/task/law_enforcement?pulsar_url=pulsar://132.122.115.158:16650,132.122.115.159:16650,132.122.115.160:16650,132.122.115.161:16650,132.122.115.167:16650,132.122.115.168:16650&pulsar_topic=persistent://odcp/acct_sz/acct_item_755&start_time=1716858000000&end_time=1716861600000&primary_keys=ACCT_ITEM_ID&partition_keys=ACCT_ID
|
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s10.hdp.dc:33535/task/law_enforcement?pulsar_url=pulsar://132.122.115.158:16650,132.122.115.159:16650,132.122.115.160:16650,132.122.115.161:16650,132.122.115.167:16650,132.122.115.168:16650&pulsar_topic=persistent://odcp/acct_sz/acct_item_755&start_time=1716858000000&end_time=1716861600000&primary_keys=ACCT_ITEM_ID&partition_keys=ACCT_ID
|
||||||
Connection: Keep-Alive
|
Connection: Keep-Alive
|
||||||
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.10)
|
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.10)
|
||||||
@@ -418,103 +518,3 @@ Hello world
|
|||||||
|
|
||||||
###
|
###
|
||||||
|
|
||||||
POST http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s8.hdp.dc:34469/hdfs/write?root=hdfs://b2/apps/datalake/test/test.txt
|
|
||||||
Content-Length: 11
|
|
||||||
Content-Type: */*; charset=UTF-8
|
|
||||||
Connection: Keep-Alive
|
|
||||||
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.10)
|
|
||||||
Cookie: JSESSIONID=21482112F88BCF63D4FE4F5D2A6681FF
|
|
||||||
Accept-Encoding: br,deflate,gzip,x-gzip
|
|
||||||
|
|
||||||
Hello world
|
|
||||||
|
|
||||||
###
|
|
||||||
|
|
||||||
POST http://b12s8.hdp.dc:34469/hdfs/write?root=hdfs://b2/apps/datalake/test/test.txt
|
|
||||||
Content-Length: 11
|
|
||||||
Content-Type: */*; charset=UTF-8
|
|
||||||
Connection: Keep-Alive
|
|
||||||
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.10)
|
|
||||||
Accept-Encoding: br,deflate,gzip,x-gzip
|
|
||||||
|
|
||||||
Hello world
|
|
||||||
|
|
||||||
###
|
|
||||||
|
|
||||||
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s10.hdp.dc:16695/hdfs/list?root=hdfs://b2/apps/datalake/hive/dws_test/external_table_hudi/dws_ord_prod_inst_attr
|
|
||||||
Connection: Keep-Alive
|
|
||||||
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.10)
|
|
||||||
Cookie: JSESSIONID=C23877E9843F4E9C87FC2787EC5EA701
|
|
||||||
Accept-Encoding: br,deflate,gzip,x-gzip
|
|
||||||
|
|
||||||
<> 2024-04-26T172547.200.json
|
|
||||||
|
|
||||||
###
|
|
||||||
|
|
||||||
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@http:/b12s8.hdp.dc:33681/hdfs/list?root=hdfs://b2/apps/datalake/hive/dws_test/external_table_hudi/dws_ord_prod_inst_attr
|
|
||||||
Connection: Keep-Alive
|
|
||||||
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.10)
|
|
||||||
Accept-Encoding: br,deflate,gzip,x-gzip
|
|
||||||
|
|
||||||
<> 2024-04-26T172511.503.html
|
|
||||||
|
|
||||||
###
|
|
||||||
|
|
||||||
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s10.hdp.dc:30943/hdfs/list?root=hdfs://b2/apps/datalake/hive/dws_test/external_table_hudi/dws_ord_prod_inst_attr/.hoodie
|
|
||||||
Connection: Keep-Alive
|
|
||||||
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.10)
|
|
||||||
Cookie: JSESSIONID=C23877E9843F4E9C87FC2787EC5EA701
|
|
||||||
Accept-Encoding: br,deflate,gzip,x-gzip
|
|
||||||
|
|
||||||
<> 2024-04-26T162856.200.json
|
|
||||||
|
|
||||||
###
|
|
||||||
|
|
||||||
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s10.hdp.dc:30943/hdfs/list?root=hdfs://b2/apps/datalake/hive/dws_test/external_table_hudi/dws_ord_prod_inst_attr
|
|
||||||
Connection: Keep-Alive
|
|
||||||
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.10)
|
|
||||||
Accept-Encoding: br,deflate,gzip,x-gzip
|
|
||||||
|
|
||||||
<> 2024-04-26T162825.200.json
|
|
||||||
|
|
||||||
###
|
|
||||||
|
|
||||||
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s15.hdp.dc:21685/pulsar/backlog?name=main&topic=persistent://odcp/grid/grid_serv_staff&subscription=Hudi_Sync_Pulsar_Reader_1552408245762723840_grid_grid_serv_staff_b_20230425
|
|
||||||
Connection: Keep-Alive
|
|
||||||
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.10)
|
|
||||||
Accept-Encoding: br,deflate,gzip,x-gzip
|
|
||||||
|
|
||||||
<> 2024-03-05T111533.200.json
|
|
||||||
|
|
||||||
###
|
|
||||||
|
|
||||||
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@132.126.207.130:35690/hudi_services/service-exporter/exporter/un_running_flink_job
|
|
||||||
Connection: Keep-Alive
|
|
||||||
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.9)
|
|
||||||
Cookie: JSESSIONID=E12F3C1D9B6AE9937CA57DE2EE8656C7
|
|
||||||
Accept-Encoding: br,deflate,gzip,x-gzip
|
|
||||||
|
|
||||||
<> 2024-02-04T153541.200.json
|
|
||||||
|
|
||||||
###
|
|
||||||
|
|
||||||
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@132.126.207.130:35690/hudi_services/service-exporter/exporter/un_running_flink_job
|
|
||||||
Connection: Keep-Alive
|
|
||||||
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.9)
|
|
||||||
Cookie: JSESSIONID=606B213C2F4A113AC3CCC2A0614BB558
|
|
||||||
Accept-Encoding: br,deflate,gzip,x-gzip
|
|
||||||
|
|
||||||
<> 2024-02-04T152955.200.json
|
|
||||||
|
|
||||||
###
|
|
||||||
|
|
||||||
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@132.126.207.130:35690/hudi_services/service-exporter/exporter/un_running_flink_job
|
|
||||||
Connection: Keep-Alive
|
|
||||||
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.9)
|
|
||||||
Cookie: JSESSIONID=5569F2E918CF60BB8B439404BCD2255A
|
|
||||||
Accept-Encoding: br,deflate,gzip,x-gzip
|
|
||||||
|
|
||||||
<> 2024-02-04T152612.200.json
|
|
||||||
|
|
||||||
###
|
|
||||||
|
|
||||||
|
|||||||
@@ -48,12 +48,6 @@ public class HoodiePolice {
|
|||||||
.filter(ObjectUtil::isNotNull)
|
.filter(ObjectUtil::isNotNull)
|
||||||
.keyBy(Prisoner::getPartition)
|
.keyBy(Prisoner::getPartition)
|
||||||
.keyBy(Prisoner::getKey)
|
.keyBy(Prisoner::getKey)
|
||||||
.reduce(new RichReduceFunction<Prisoner>() {
|
|
||||||
@Override
|
|
||||||
public Prisoner reduce(Prisoner value1, Prisoner value2) throws Exception {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.addSink(new PrisonerSink(taskContext));
|
.addSink(new PrisonerSink(taskContext));
|
||||||
environment.execute();
|
environment.execute();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -60,7 +60,7 @@ public class ReadPulsarSource implements Source<RecordView, ReadPulsarSplit, Col
|
|||||||
latestMessageId.toString(),
|
latestMessageId.toString(),
|
||||||
range.getStart(),
|
range.getStart(),
|
||||||
range.getEnd(),
|
range.getEnd(),
|
||||||
TASK_GAP
|
range.getGap()
|
||||||
));
|
));
|
||||||
logger.info("Gap: {}, Splits: {}", TASK_GAP, tasks.size());
|
logger.info("Gap: {}, Splits: {}", TASK_GAP, tasks.size());
|
||||||
for (ReadPulsarSplit split : tasks) {
|
for (ReadPulsarSplit split : tasks) {
|
||||||
|
|||||||
@@ -1,15 +1,22 @@
|
|||||||
package com.lanyuanxiaoyao.service.executor.task.functions.pulsar;
|
package com.lanyuanxiaoyao.service.executor.task.functions.pulsar;
|
||||||
|
|
||||||
import cn.hutool.core.util.ObjectUtil;
|
import cn.hutool.core.util.ObjectUtil;
|
||||||
|
import com.lanyuanxiaoyao.service.executor.task.functions.pulsar.event.AddEvent;
|
||||||
|
import com.lanyuanxiaoyao.service.executor.task.functions.pulsar.event.EndEvent;
|
||||||
|
import com.lanyuanxiaoyao.service.executor.task.functions.pulsar.event.StartEvent;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.ArrayDeque;
|
import java.util.ArrayDeque;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Queue;
|
import java.util.Queue;
|
||||||
|
import java.util.Set;
|
||||||
import javax.annotation.Nullable;
|
import javax.annotation.Nullable;
|
||||||
|
import org.apache.flink.api.connector.source.SourceEvent;
|
||||||
import org.apache.flink.api.connector.source.SplitEnumerator;
|
import org.apache.flink.api.connector.source.SplitEnumerator;
|
||||||
import org.apache.flink.api.connector.source.SplitEnumeratorContext;
|
import org.apache.flink.api.connector.source.SplitEnumeratorContext;
|
||||||
|
import org.eclipse.collections.api.factory.Lists;
|
||||||
|
import org.eclipse.collections.api.factory.Sets;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@@ -21,6 +28,7 @@ public class ReadPulsarSourceEnumerator implements SplitEnumerator<ReadPulsarSpl
|
|||||||
private static final Logger logger = LoggerFactory.getLogger(ReadPulsarSourceEnumerator.class);
|
private static final Logger logger = LoggerFactory.getLogger(ReadPulsarSourceEnumerator.class);
|
||||||
private final SplitEnumeratorContext<ReadPulsarSplit> context;
|
private final SplitEnumeratorContext<ReadPulsarSplit> context;
|
||||||
private final Queue<ReadPulsarSplit> readQueue;
|
private final Queue<ReadPulsarSplit> readQueue;
|
||||||
|
private final Set<String> runningTasks = Sets.mutable.<String>empty().asSynchronized();
|
||||||
|
|
||||||
public ReadPulsarSourceEnumerator(SplitEnumeratorContext<ReadPulsarSplit> context, Collection<ReadPulsarSplit> splits) {
|
public ReadPulsarSourceEnumerator(SplitEnumeratorContext<ReadPulsarSplit> context, Collection<ReadPulsarSplit> splits) {
|
||||||
this.context = context;
|
this.context = context;
|
||||||
@@ -31,15 +39,40 @@ public class ReadPulsarSourceEnumerator implements SplitEnumerator<ReadPulsarSpl
|
|||||||
public void start() {
|
public void start() {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void handleSourceEvent(int subtaskId, SourceEvent sourceEvent) {
|
||||||
|
if (sourceEvent instanceof AddEvent) {
|
||||||
|
AddEvent event = (AddEvent) sourceEvent;
|
||||||
|
logger.info("t{} Add: {}", subtaskId, event.getSplits());
|
||||||
|
readQueue.addAll(event.getSplits());
|
||||||
|
|
||||||
|
logger.info("t{} ReSend task, running tasks: {}", subtaskId, runningTasks);
|
||||||
|
context.registeredReaders()
|
||||||
|
.values()
|
||||||
|
.forEach(info -> handleSourceEvent(info.getSubtaskId(), null));
|
||||||
|
} else if (sourceEvent instanceof EndEvent) {
|
||||||
|
EndEvent event = (EndEvent) sourceEvent;
|
||||||
|
logger.info("t{} End: {}", subtaskId, event.getSplit());
|
||||||
|
if (ObjectUtil.isNotNull(event.getSplit())) {
|
||||||
|
runningTasks.remove(event.getSplit().getSplitId());
|
||||||
|
}
|
||||||
|
logger.info("t{} Queue: {} running tasks: {}", subtaskId, readQueue.size(), runningTasks);
|
||||||
|
if (ObjectUtil.isEmpty(readQueue) && ObjectUtil.isEmpty(runningTasks)) {
|
||||||
|
logger.info("t{} No more", subtaskId);
|
||||||
|
context.registeredReaders()
|
||||||
|
.values()
|
||||||
|
.forEach(info -> context.signalNoMoreSplits(info.getSubtaskId()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void handleSplitRequest(int subtaskId, @Nullable String requesterHostname) {
|
public void handleSplitRequest(int subtaskId, @Nullable String requesterHostname) {
|
||||||
final ReadPulsarSplit split = readQueue.poll();
|
final ReadPulsarSplit split = readQueue.poll();
|
||||||
if (ObjectUtil.isNotNull(split)) {
|
if (ObjectUtil.isNotNull(split)) {
|
||||||
logger.info("t{} Assign split for {}, Queue rest: {}", subtaskId, subtaskId, readQueue.size());
|
logger.info("t{} Assign split for {}, Queue rest: {}", subtaskId, subtaskId, readQueue.size());
|
||||||
context.assignSplit(split, subtaskId);
|
context.assignSplit(split, subtaskId);
|
||||||
} else {
|
runningTasks.add(split.getSplitId());
|
||||||
logger.info("t{} No more split for {}", subtaskId, subtaskId);
|
|
||||||
context.signalNoMoreSplits(subtaskId);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,9 +1,14 @@
|
|||||||
package com.lanyuanxiaoyao.service.executor.task.functions.pulsar;
|
package com.lanyuanxiaoyao.service.executor.task.functions.pulsar;
|
||||||
|
|
||||||
import cn.hutool.core.collection.ListUtil;
|
import cn.hutool.core.collection.ListUtil;
|
||||||
|
import cn.hutool.core.util.IdUtil;
|
||||||
import cn.hutool.core.util.ObjectUtil;
|
import cn.hutool.core.util.ObjectUtil;
|
||||||
import cn.hutool.core.util.StrUtil;
|
import cn.hutool.core.util.StrUtil;
|
||||||
import com.lanyuanxiaoyao.service.executor.task.entity.RecordView;
|
import com.lanyuanxiaoyao.service.executor.task.entity.RecordView;
|
||||||
|
import com.lanyuanxiaoyao.service.executor.task.functions.pulsar.event.AddEvent;
|
||||||
|
import com.lanyuanxiaoyao.service.executor.task.functions.pulsar.event.EndEvent;
|
||||||
|
import com.lanyuanxiaoyao.service.executor.task.functions.pulsar.event.StartEvent;
|
||||||
|
import com.lanyuanxiaoyao.service.executor.task.helper.TimeRangeHelper;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.time.Instant;
|
import java.time.Instant;
|
||||||
import java.time.ZoneId;
|
import java.time.ZoneId;
|
||||||
@@ -19,14 +24,12 @@ import org.apache.flink.api.connector.source.ReaderOutput;
|
|||||||
import org.apache.flink.api.connector.source.SourceReader;
|
import org.apache.flink.api.connector.source.SourceReader;
|
||||||
import org.apache.flink.api.connector.source.SourceReaderContext;
|
import org.apache.flink.api.connector.source.SourceReaderContext;
|
||||||
import org.apache.flink.core.io.InputStatus;
|
import org.apache.flink.core.io.InputStatus;
|
||||||
import org.apache.pulsar.client.api.Consumer;
|
|
||||||
import org.apache.pulsar.client.api.Message;
|
import org.apache.pulsar.client.api.Message;
|
||||||
import org.apache.pulsar.client.api.MessageId;
|
import org.apache.pulsar.client.api.MessageId;
|
||||||
import org.apache.pulsar.client.api.PulsarClient;
|
import org.apache.pulsar.client.api.PulsarClient;
|
||||||
import org.apache.pulsar.client.api.Reader;
|
import org.apache.pulsar.client.api.Reader;
|
||||||
import org.apache.pulsar.client.api.SubscriptionInitialPosition;
|
|
||||||
import org.apache.pulsar.client.api.SubscriptionType;
|
|
||||||
import org.apache.pulsar.client.impl.schema.StringSchema;
|
import org.apache.pulsar.client.impl.schema.StringSchema;
|
||||||
|
import org.eclipse.collections.api.list.ImmutableList;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@@ -51,6 +54,9 @@ public class ReadPulsarSourceReader implements SourceReader<RecordView, ReadPuls
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void start() {
|
public void start() {
|
||||||
|
if (readQueue.isEmpty()) {
|
||||||
|
readerContext.sendSplitRequest();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private RecordView parsePulsarMessage(Message<String> message) {
|
private RecordView parsePulsarMessage(Message<String> message) {
|
||||||
@@ -65,6 +71,7 @@ public class ReadPulsarSourceReader implements SourceReader<RecordView, ReadPuls
|
|||||||
@Override
|
@Override
|
||||||
public InputStatus pollNext(ReaderOutput<RecordView> output) throws Exception {
|
public InputStatus pollNext(ReaderOutput<RecordView> output) throws Exception {
|
||||||
logger.info("t{} Poll Next", readerContext.getIndexOfSubtask());
|
logger.info("t{} Poll Next", readerContext.getIndexOfSubtask());
|
||||||
|
readerContext.sendSourceEventToCoordinator(new StartEvent());
|
||||||
if (ObjectUtil.isNotNull(currentSplit)) {
|
if (ObjectUtil.isNotNull(currentSplit)) {
|
||||||
logger.info("t{} Read split: {}", readerContext.getIndexOfSubtask(), currentSplit.getSplitId());
|
logger.info("t{} Read split: {}", readerContext.getIndexOfSubtask(), currentSplit.getSplitId());
|
||||||
long startTimestamp = currentSplit.getStartTimestamp();
|
long startTimestamp = currentSplit.getStartTimestamp();
|
||||||
@@ -86,6 +93,7 @@ public class ReadPulsarSourceReader implements SourceReader<RecordView, ReadPuls
|
|||||||
.startMessageId(MessageId.earliest)
|
.startMessageId(MessageId.earliest)
|
||||||
.create()) {
|
.create()) {
|
||||||
reader.seek(startTimestamp);
|
reader.seek(startTimestamp);
|
||||||
|
long count = 0;
|
||||||
Message<String> message = reader.readNext(10, TimeUnit.SECONDS);
|
Message<String> message = reader.readNext(10, TimeUnit.SECONDS);
|
||||||
while (ObjectUtil.isNotNull(message)) {
|
while (ObjectUtil.isNotNull(message)) {
|
||||||
long publishTime = message.getPublishTime();
|
long publishTime = message.getPublishTime();
|
||||||
@@ -93,11 +101,32 @@ public class ReadPulsarSourceReader implements SourceReader<RecordView, ReadPuls
|
|||||||
logger.info("t{} Break for {} -> {}, Queue rest: {}", readerContext.getIndexOfSubtask(), message.getPublishTime(), currentSplit.getEndTimestamp(), readQueue.size());
|
logger.info("t{} Break for {} -> {}, Queue rest: {}", readerContext.getIndexOfSubtask(), message.getPublishTime(), currentSplit.getEndTimestamp(), readQueue.size());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
if (++count > 500000) {
|
||||||
|
ImmutableList<TimeRangeHelper.TimeRange> range = TimeRangeHelper.range(publishTime, endTimestamp, currentSplit.getGap() / 5);
|
||||||
|
if (ObjectUtil.isNotEmpty(range)) {
|
||||||
|
readerContext.sendSourceEventToCoordinator(new AddEvent(
|
||||||
|
range.collect(r -> new ReadPulsarSplit(
|
||||||
|
currentSplit.getTaskId(),
|
||||||
|
IdUtil.nanoId(10),
|
||||||
|
currentSplit.getPulsarUrl(),
|
||||||
|
currentSplit.getPulsarTopic(),
|
||||||
|
currentSplit.getLatestMessageId(),
|
||||||
|
r.getStart(),
|
||||||
|
r.getEnd(),
|
||||||
|
r.getGap()
|
||||||
|
)).toList()
|
||||||
|
));
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
count = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
output.collect(parsePulsarMessage(message));
|
output.collect(parsePulsarMessage(message));
|
||||||
message = reader.readNext(10, TimeUnit.SECONDS);
|
message = reader.readNext(10, TimeUnit.SECONDS);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
readerContext.sendSourceEventToCoordinator(new EndEvent(currentSplit));
|
||||||
}
|
}
|
||||||
|
|
||||||
currentSplit = null;
|
currentSplit = null;
|
||||||
|
|||||||
@@ -0,0 +1,28 @@
|
|||||||
|
package com.lanyuanxiaoyao.service.executor.task.functions.pulsar.event;
|
||||||
|
|
||||||
|
import com.lanyuanxiaoyao.service.executor.task.functions.pulsar.ReadPulsarSplit;
|
||||||
|
import java.util.List;
|
||||||
|
import org.apache.flink.api.connector.source.SourceEvent;
|
||||||
|
import org.eclipse.collections.api.list.ImmutableList;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author lanyuanxiaoyao
|
||||||
|
*/
|
||||||
|
public class AddEvent implements SourceEvent {
|
||||||
|
private final List<ReadPulsarSplit> splits;
|
||||||
|
|
||||||
|
public AddEvent(List<ReadPulsarSplit> splits) {
|
||||||
|
this.splits = splits;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<ReadPulsarSplit> getSplits() {
|
||||||
|
return splits;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "AddSplitEvent{" +
|
||||||
|
"splits=" + splits +
|
||||||
|
'}';
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,26 @@
|
|||||||
|
package com.lanyuanxiaoyao.service.executor.task.functions.pulsar.event;
|
||||||
|
|
||||||
|
import com.lanyuanxiaoyao.service.executor.task.functions.pulsar.ReadPulsarSplit;
|
||||||
|
import org.apache.flink.api.connector.source.SourceEvent;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author lanyuanxiaoyao
|
||||||
|
*/
|
||||||
|
public class EndEvent implements SourceEvent {
|
||||||
|
private final ReadPulsarSplit split;
|
||||||
|
|
||||||
|
public EndEvent(ReadPulsarSplit split) {
|
||||||
|
this.split = split;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ReadPulsarSplit getSplit() {
|
||||||
|
return split;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "EndEvent{" +
|
||||||
|
"split=" + split +
|
||||||
|
'}';
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,9 @@
|
|||||||
|
package com.lanyuanxiaoyao.service.executor.task.functions.pulsar.event;
|
||||||
|
|
||||||
|
import org.apache.flink.api.connector.source.SourceEvent;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author lanyuanxiaoyao
|
||||||
|
*/
|
||||||
|
public class StartEvent implements SourceEvent {
|
||||||
|
}
|
||||||
@@ -12,10 +12,12 @@ public class TimeRangeHelper {
|
|||||||
public static final class TimeRange {
|
public static final class TimeRange {
|
||||||
private final long start;
|
private final long start;
|
||||||
private final long end;
|
private final long end;
|
||||||
|
private final long gap;
|
||||||
|
|
||||||
public TimeRange(long start, long end) {
|
public TimeRange(long start, long end, long gap) {
|
||||||
this.start = start;
|
this.start = start;
|
||||||
this.end = end;
|
this.end = end;
|
||||||
|
this.gap = gap;
|
||||||
}
|
}
|
||||||
|
|
||||||
public long getStart() {
|
public long getStart() {
|
||||||
@@ -25,13 +27,19 @@ public class TimeRangeHelper {
|
|||||||
public long getEnd() {
|
public long getEnd() {
|
||||||
return end;
|
return end;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public long getGap() {
|
||||||
|
return gap;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static ImmutableList<TimeRange> range(long start, long end, long gap) {
|
public static ImmutableList<TimeRange> range(long start, long end, long gap) {
|
||||||
gap = Math.max(TimeUnit.MINUTES.toMillis(1), gap);
|
if (gap < TimeUnit.MINUTES.toMillis(5)) {
|
||||||
|
return Lists.immutable.empty();
|
||||||
|
}
|
||||||
MutableList<TimeRange> ranges = Lists.mutable.empty();
|
MutableList<TimeRange> ranges = Lists.mutable.empty();
|
||||||
while (start <= end) {
|
while (start <= end) {
|
||||||
ranges.add(new TimeRange(start, Math.min(end, start + gap)));
|
ranges.add(new TimeRange(start, Math.min(end, start + gap), gap));
|
||||||
start += gap;
|
start += gap;
|
||||||
}
|
}
|
||||||
return ranges.toImmutable();
|
return ranges.toImmutable();
|
||||||
|
|||||||
@@ -130,7 +130,7 @@ hoodie.datasource.write.hive_style_partitioning=false
|
|||||||
hoodie.table.checksum=989688289
|
hoodie.table.checksum=989688289
|
||||||
|
|
||||||
### Test police
|
### Test police
|
||||||
GET http://{{username}}:{{password}}@b12s10.hdp.dc:33535/task/law_enforcement?
|
GET http://{{username}}:{{password}}@b12s10.hdp.dc:31719/task/law_enforcement?
|
||||||
pulsar_url=pulsar://132.122.115.158:16650,132.122.115.159:16650,132.122.115.160:16650,132.122.115.161:16650,132.122.115.167:16650,132.122.115.168:16650&
|
pulsar_url=pulsar://132.122.115.158:16650,132.122.115.159:16650,132.122.115.160:16650,132.122.115.161:16650,132.122.115.167:16650,132.122.115.168:16650&
|
||||||
pulsar_topic=persistent://odcp/acct_sz/acct_item_755&start_time=1716858000000&end_time=1716861600000&
|
pulsar_topic=persistent://odcp/acct_sz/acct_item_755&start_time=1716858000000&end_time=1716861600000&
|
||||||
primary_keys=ACCT_ITEM_ID&partition_keys=ACCT_ID
|
primary_keys=ACCT_ITEM_ID&partition_keys=ACCT_ID
|
||||||
Reference in New Issue
Block a user