refactor(executor-task): 优化pulsar扫描任务

调整pulsar source并行度设置,优化pulsar时间分段长度
This commit is contained in:
2024-01-22 09:43:57 +08:00
parent 99e636d55d
commit ff72583d5d
8 changed files with 155 additions and 60 deletions

View File

@@ -1,3 +1,21 @@
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@132.126.207.130:35690/hudi_services/service_scheduler/schedule/all
Connection: Keep-Alive
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.9)
Cookie: JSESSIONID=048A49CB9FD03402D9AA27CD2726B892
Accept-Encoding: br,deflate,gzip,x-gzip
###
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@132.126.207.130:35690/hudi_services/service_web/cloud/list
Connection: Keep-Alive
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.9)
Cookie: JSESSIONID=360A7282DB9D80CB6448B7D777A775FB
Accept-Encoding: br,deflate,gzip,x-gzip
<> 2024-01-17T120430.200.json
###
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@132.126.207.130:35690/hudi_services/hudi_api/api/sync_checkpoint_state?flink_job_id=1542097996099055616&alias=acct_acct_item_zs&message_id=861976:46933:-1&publish_time=1705373846898
Connection: Keep-Alive
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.9)
@@ -464,23 +482,3 @@ Accept-Encoding: br,deflate,gzip,x-gzip
###
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@132.122.116.142:14041/queue/poll/compaction-queue
Connection: Keep-Alive
User-Agent: Apache-HttpClient/4.5.13 (Java/17.0.5)
Cookie: JSESSIONID=98CC709B9ED7F9CC70C5138E6350AB73
Accept-Encoding: br,deflate,gzip,x-gzip
<> 2023-05-07T174749.200.json
###
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@132.122.116.142:14041/queue/poll/compaction-queue
Connection: Keep-Alive
User-Agent: Apache-HttpClient/4.5.13 (Java/17.0.5)
Cookie: JSESSIONID=98CC709B9ED7F9CC70C5138E6350AB73
Accept-Encoding: br,deflate,gzip,x-gzip
<> 2023-05-07T174739.200.json
###

View File

@@ -27,6 +27,11 @@
<artifactId>service-configuration</artifactId>
<version>1.0.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>com.lanyuanxiaoyao</groupId>
<artifactId>service-forest</artifactId>
<version>1.0.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-logging</artifactId>

View File

@@ -15,6 +15,8 @@ import java.io.IOException;
import java.nio.charset.Charset;
import java.time.Duration;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.flink.client.cli.ClientOptions;
import org.apache.flink.configuration.*;
import org.apache.flink.yarn.configuration.YarnConfigOptions;
@@ -176,9 +178,17 @@ public class TaskService {
}
}
}
Pattern dateRegex = Pattern.compile("(\\w+) (\\d{17}) (.+)");
return results
.reject(StrUtil::isBlank)
.collect(StrUtil::trim)
.sortThisBy(line -> {
Matcher matcher = dateRegex.matcher(line);
if (matcher.matches()) {
return Long.valueOf(matcher.group(2));
}
return 0L;
})
.toImmutable();
}
}

View File

@@ -82,7 +82,7 @@ public class DataScanner {
StreamExecutionEnvironment environment = FlinkHelper.getBatchEnvironment();
DataStream<RecordView> source = null;
int totalParallelism = 20;
int totalParallelism = 30;
if (scanQueue) {
ArgumentsHelper.checkMetadata(taskContext, "pulsar");
String pulsarUrl = (String) metadata.get("pulsar");
@@ -90,8 +90,8 @@ public class DataScanner {
String pulsarTopic = (String) metadata.get("pulsar_topic");
logger.info("Scan queue topic: {} url: {}", pulsarTopic, pulsarUrl);
DataStream<RecordView> stream = environment
.fromSource(new ReadPulsarSource(taskContext, pulsarUrl, pulsarTopic, 50), WatermarkStrategy.noWatermarks(), "Read pulsar")
.setParallelism(50)
.fromSource(new ReadPulsarSource(taskContext, pulsarUrl, pulsarTopic), WatermarkStrategy.noWatermarks(), "Read pulsar")
.setParallelism(totalParallelism)
.disableChaining();
if (ObjectUtil.isNull(source)) {
source = stream;

View File

@@ -27,13 +27,13 @@ import org.slf4j.LoggerFactory;
*/
public class ReadPulsarSource implements Source<RecordView, ReadPulsarSplit, Collection<ReadPulsarSplit>>, ResultTypeQueryable<RecordView>, Serializable {
private static final Logger logger = LoggerFactory.getLogger(ReadPulsarSource.class);
private static final Long TASK_GAP = TimeUnit.MINUTES.toMillis(30);
private static final Long TASK_GAP = TimeUnit.MINUTES.toMillis(60);
private static final DateTimeFormatter FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSS")
.withLocale(Locale.CHINA)
.withZone(ZoneId.systemDefault());
private final Collection<ReadPulsarSplit> splits;
public ReadPulsarSource(TaskContext taskContext, String pulsarUrl, String pulsarTopic, Integer parallelism) throws PulsarClientException {
public ReadPulsarSource(TaskContext taskContext, String pulsarUrl, String pulsarTopic) throws PulsarClientException {
try (PulsarClient client = PulsarClient.builder()
.serviceUrl(pulsarUrl)
.build()) {
@@ -61,7 +61,7 @@ public class ReadPulsarSource implements Source<RecordView, ReadPulsarSplit, Col
));
startTimestamp += TASK_GAP;
}
logger.info("Gap: {}, Parallelism: {}, Splits: {}", TASK_GAP, parallelism, tasks.size());
logger.info("Gap: {}, Splits: {}", TASK_GAP, tasks.size());
for (ReadPulsarSplit split : tasks) {
logger.info("Read split: {} -> {}", covertTimestamp(split.getStartTimestamp()), covertTimestamp(split.getEndTimestamp()));
}

View File

@@ -20,6 +20,7 @@ import org.apache.flink.api.connector.source.SourceReader;
import org.apache.flink.api.connector.source.SourceReaderContext;
import org.apache.flink.core.io.InputStatus;
import org.apache.pulsar.client.api.*;
import org.apache.pulsar.client.impl.schema.StringSchema;
import org.apache.pulsar.client.internal.DefaultImplementation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -55,10 +56,10 @@ public class ReadPulsarSourceReader implements SourceReader<RecordView, ReadPuls
}
}
private RecordView parsePulsarMessage(Message<byte[]> message) {
private RecordView parsePulsarMessage(Message<String> message) {
return new RecordView(
RecordView.Operation.QUEUE,
new String(message.getValue()),
message.getValue(),
FORMATTER.format(Instant.ofEpochMilli(message.getPublishTime())),
message.getMessageId().toString()
);
@@ -66,24 +67,15 @@ public class ReadPulsarSourceReader implements SourceReader<RecordView, ReadPuls
@Override
public InputStatus pollNext(ReaderOutput<RecordView> output) throws Exception {
logger.info("t{} Poll Next", readerContext.getIndexOfSubtask());
if (ObjectUtil.isNotNull(currentSplit)) {
logger.info("t{} Read split: {}", readerContext.getIndexOfSubtask(), currentSplit.getStartTimestamp());
try (PulsarClient client = PulsarClient.builder()
.serviceUrl(currentSplit.getPulsarUrl())
.build()) {
try (Consumer<byte[]> consumer = client.newConsumer()
try (Reader<String> reader = client.newReader(new StringSchema())
.topic(currentSplit.getPulsarTopic())
.batchReceivePolicy(
BatchReceivePolicy.builder()
.timeout(1, TimeUnit.SECONDS)
.maxNumMessages(0)
.maxNumBytes(0)
.build()
)
.receiverQueueSize(50000)
.subscriptionInitialPosition(SubscriptionInitialPosition.Earliest)
.subscriptionMode(SubscriptionMode.NonDurable)
.subscriptionType(SubscriptionType.Exclusive)
.subscriptionName(StrUtil.format(
"Task_Reader_{}_{}_{}_{}",
currentSplit.getTaskId(),
@@ -92,23 +84,17 @@ public class ReadPulsarSourceReader implements SourceReader<RecordView, ReadPuls
currentSplit.getEndTimestamp()
))
.startMessageIdInclusive()
.subscribe()) {
consumer.seek(currentSplit.getStartTimestamp());
Messages<byte[]> messages = consumer.batchReceive();
while (ObjectUtil.isNotNull(messages)) {
long currentTimestamp = 0;
for (Message<byte[]> message : messages) {
currentTimestamp = message.getPublishTime();
}
if (currentTimestamp > currentSplit.getEndTimestamp()) {
logger.info("t{} Break for {} -> {}, Queue rest: {}", readerContext.getIndexOfSubtask(), currentTimestamp, currentSplit.getEndTimestamp(), readQueue.size());
.startMessageId(MessageId.earliest)
.create()) {
reader.seek(currentSplit.getStartTimestamp());
Message<String> message = reader.readNext(10, TimeUnit.SECONDS);
while (ObjectUtil.isNotNull(message)) {
if (message.getPublishTime() > currentSplit.getEndTimestamp()) {
logger.info("t{} Break for {} -> {}, Queue rest: {}", readerContext.getIndexOfSubtask(), message.getPublishTime(), currentSplit.getEndTimestamp(), readQueue.size());
break;
}
for (Message<byte[]> message : messages) {
output.collect(parsePulsarMessage(message));
}
consumer.acknowledge(messages);
messages = consumer.batchReceive();
output.collect(parsePulsarMessage(message));
message = reader.readNext(10, TimeUnit.SECONDS);
}
}
}

View File

@@ -1,8 +1,10 @@
package com.test;
import club.kingon.sql.builder.SqlBuilder;
import club.kingon.sql.builder.entry.Alias;
import club.kingon.sql.builder.entry.Column;
import cn.hutool.core.util.ObjectUtil;
import cn.hutool.core.util.StrUtil;
import cn.hutool.db.sql.SqlUtil;
import com.eshore.odcp.hudi.connector.SQLConstants;
@@ -21,11 +23,105 @@ public class SqlBuilderTests {
String STATUS_Y = "y";
String STATUS_N = "n";
System.out.println(SqlUtil.formatSql(
SqlBuilder
.select(TbAppHudiSyncState.MESSAGE_ID_A)
.from(TbAppHudiSyncState._alias_)
.whereEq(TbAppHudiSyncState.ID_A, null)
.precompileSql()
SqlBuilder.select(
SQLConstants.IapDatahub.DataSource.DS_NAME_A,
SQLConstants.IapDatahub.DataSource.SCHEMA_NAME_A,
SQLConstants.IapDatahub.DataSourceTable.TABLE_NAME_A,
SQLConstants.IapDatahub.DataSourceTable.TABLE_TYPE_A,
SQLConstants.IapDatahub.DataSourceTableField.FIELD_NAME_A,
SQLConstants.IapDatahub.DataSourceTableField.FIELD_SEQ_A,
SQLConstants.IapDatahub.DataSourceTableField.FIELD_TYPE_A,
SQLConstants.IapDatahub.DataSourceTableField.PRIMARY_KEY_A,
SQLConstants.IapDatahub.DataSourceTableField.PARTITION_KEY_A,
SQLConstants.IapDatahub.DataSourceTableField.LENGTH_A,
TbAppCollectTableInfo.TGT_DB_A,
TbAppCollectTableInfo.TGT_TABLE_A,
TbAppCollectTableInfo.TGT_TABLE_TYPE_A,
TbAppCollectTableInfo.TGT_HDFS_PATH_A,
TbAppHudiJobConfig.WRITE_TASKS_A,
TbAppHudiJobConfig.WRITE_OPERATION_A,
TbAppHudiJobConfig.WRITE_TASK_MAX_MEMORY_A,
TbAppHudiJobConfig.WRITE_BATCH_SIZE_A,
TbAppHudiJobConfig.WRITE_RATE_LIMIT_A,
TbAppCollectTableInfo.BUCKET_NUMBER_A,
TbAppHudiJobConfig.COMPACTION_STRATEGY_A,
TbAppHudiJobConfig.COMPACTION_TASKS_A,
TbAppHudiJobConfig.COMPACTION_DELTA_COMMITS_A,
TbAppHudiJobConfig.COMPACTION_DELTA_SECONDS_A,
TbAppHudiJobConfig.COMPACTION_ASYNC_ENABLED_A,
TbAppHudiJobConfig.COMPACTION_MAX_MEMORY_A,
TbAppHudiJobConfig.CONFIGS_A,
TbAppCollectTableInfo.FILTER_FIELD_A,
TbAppCollectTableInfo.FILTER_VALUES_A,
TbAppCollectTableInfo.FILTER_TYPE_A,
TbAppCollectTableInfo.SRC_TOPIC_A,
TbAppCollectTableInfo.SRC_PULSAR_ADDR_A,
Alias.of("tayjc_sync.job_manager_memory", "sync_job_manager_memory"),
Alias.of("tayjc_sync.task_manager_memory", "sync_task_manager_memory"),
Alias.of("tayjc_compaction.job_manager_memory", "compaction_job_manager_memory"),
Alias.of("tayjc_compaction.task_manager_memory", "compaction_task_manger_momory"),
TbAppCollectTableInfo.PARTITION_FIELD_A,
TbAppHudiSyncState.MESSAGE_ID_A,
TbAppGlobalConfig.METRIC_PUBLISH_URL_A,
TbAppGlobalConfig.METRIC_PROMETHEUS_URL_A,
TbAppGlobalConfig.METRIC_API_URL_A,
TbAppGlobalConfig.METRIC_PUBLISH_DELAY_A,
TbAppGlobalConfig.METRIC_PUBLISH_PERIOD_A,
TbAppGlobalConfig.METRIC_PUBLISH_TIMEOUT_A,
TbAppGlobalConfig.METRIC_PUBLISH_BATCH_A,
Alias.of(TbAppFlinkJobConfig.ID_A, "job_id"),
Alias.of(TbAppFlinkJobConfig.NAME_A, "job_name"),
TbAppGlobalConfig.CHECKPOINT_ROOT_PATH_A,
TbAppHudiJobConfig.SOURCE_TASKS_A,
TbAppCollectTableInfo.ALIAS_A,
SQLConstants.IapDatahub.DataSource.CONNECTION_A,
TbAppCollectTableInfo.PRIORITY_A,
SQLConstants.IapDatahub.DataSource.DS_TYPE_A,
TbAppHudiJobConfig.KEEP_FILE_VERSION_A,
TbAppHudiJobConfig.KEEP_COMMIT_VERSION_A,
TbAppCollectTableInfo.TAGS_A,
TbAppGlobalConfig.ZK_URL_A,
TbAppCollectTableInfo.VERSION_A,
SQLConstants.IapDatahub.DataSourceTableField.SCALE_A
)
.from(
SQLConstants.IapDatahub.DataSource._alias_,
SQLConstants.IapDatahub.DataSourceTable._alias_,
SQLConstants.IapDatahub.DataSourceTableField._alias_,
TbAppFlinkJobConfig._alias_,
TbAppHudiJobConfig._alias_,
Alias.of(TbAppYarnJobConfig._origin_, "tayjc_sync"),
Alias.of(TbAppYarnJobConfig._origin_, "tayjc_compaction"),
TbAppGlobalConfig._alias_,
TbAppCollectTableInfo._alias_,
TbAppHudiSyncState._alias_
)
.whereEq(SQLConstants.IapDatahub.DataSource.DS_ROLE_A, "src")
.andEq(SQLConstants.IapDatahub.DataSource.DS_STATE_A, STATUS_Y)
.andEq(SQLConstants.IapDatahub.DataSource.RECORD_STATE_A, STATUS_Y)
.andEq(SQLConstants.IapDatahub.DataSourceTable.DS_ID_A, Column.as(SQLConstants.IapDatahub.DataSource.DS_ID_A))
.andEq(SQLConstants.IapDatahub.DataSourceTable.RECORD_STATE_A, STATUS_Y)
.andEq(SQLConstants.IapDatahub.DataSourceTableField.TABLE_ID_A, Column.as(SQLConstants.IapDatahub.DataSourceTable.TABLE_ID_A))
.andEq(SQLConstants.IapDatahub.DataSourceTableField.RECORD_STATE_A, STATUS_Y)
.andIn(SQLConstants.IapDatahub.DataSource.DS_TYPE_A, "udal", "telepg")
.andEq(SQLConstants.IapDatahub.DataSource.DS_NAME_A, Column.as(TbAppCollectTableInfo.SRC_DB_A))
.andEq(SQLConstants.IapDatahub.DataSource.SCHEMA_NAME_A, Column.as(TbAppCollectTableInfo.SRC_SCHEMA_A))
.andEq(SQLConstants.IapDatahub.DataSourceTable.TABLE_NAME_A, Column.as(TbAppCollectTableInfo.SRC_TABLE_A))
.andEq(TbAppCollectTableInfo.FLINK_JOB_ID_A, Column.as(TbAppFlinkJobConfig.ID_A))
.andEq(TbAppCollectTableInfo.HUDI_JOB_ID_A, Column.as(TbAppHudiJobConfig.ID_A))
.andEq(TbAppCollectTableInfo.SYNC_YARN_JOB_ID_A, Column.as("tayjc_sync.id"))
.andEq(TbAppCollectTableInfo.COMPACTION_YARN_JOB_ID_A, Column.as("tayjc_compaction.id"))
.andEq(TbAppCollectTableInfo.CONFIG_ID_A, Column.as(TbAppGlobalConfig.ID_A))
.andEq(TbAppHudiSyncState.ID_A, Column.as(StrUtil.format("concat({}, '-', {})", TbAppFlinkJobConfig.ID_A, TbAppCollectTableInfo.ALIAS_A)))
.andEq(ObjectUtil.isNotNull(flinkJobId), TbAppFlinkJobConfig.ID_A, flinkJobId)
.andEq(StrUtil.isNotBlank(alias), TbAppCollectTableInfo.ALIAS_A, alias)
.andEq(TbAppCollectTableInfo.STATUS_A, STATUS_Y)
.andEq(TbAppFlinkJobConfig.STATUS_A, STATUS_Y)
.andEq(TbAppHudiJobConfig.STATUS_A, STATUS_Y)
.andEq("tayjc_sync.status", STATUS_Y)
.andEq("tayjc_compaction.status", STATUS_Y)
.orderBy(SQLConstants.IapDatahub.DataSourceTableField.FIELD_SEQ_A)
.build()
));
}
}

View File

@@ -1315,7 +1315,7 @@ function tableMetaDialog() {
dialog: {
title: '队列详情',
actions: [],
size: 'lg',
size: 'xl',
body: {
type: 'service',
api: {