refactor(executor-task): 优化pulsar扫描任务

调整pulsar source并行度设置,优化pulsar时间分段长度
This commit is contained in:
2024-01-22 09:43:57 +08:00
parent 99e636d55d
commit ff72583d5d
8 changed files with 155 additions and 60 deletions

View File

@@ -1,8 +1,10 @@
package com.test;
import club.kingon.sql.builder.SqlBuilder;
import club.kingon.sql.builder.entry.Alias;
import club.kingon.sql.builder.entry.Column;
import cn.hutool.core.util.ObjectUtil;
import cn.hutool.core.util.StrUtil;
import cn.hutool.db.sql.SqlUtil;
import com.eshore.odcp.hudi.connector.SQLConstants;
@@ -21,11 +23,105 @@ public class SqlBuilderTests {
String STATUS_Y = "y";
String STATUS_N = "n";
System.out.println(SqlUtil.formatSql(
SqlBuilder
.select(TbAppHudiSyncState.MESSAGE_ID_A)
.from(TbAppHudiSyncState._alias_)
.whereEq(TbAppHudiSyncState.ID_A, null)
.precompileSql()
SqlBuilder.select(
SQLConstants.IapDatahub.DataSource.DS_NAME_A,
SQLConstants.IapDatahub.DataSource.SCHEMA_NAME_A,
SQLConstants.IapDatahub.DataSourceTable.TABLE_NAME_A,
SQLConstants.IapDatahub.DataSourceTable.TABLE_TYPE_A,
SQLConstants.IapDatahub.DataSourceTableField.FIELD_NAME_A,
SQLConstants.IapDatahub.DataSourceTableField.FIELD_SEQ_A,
SQLConstants.IapDatahub.DataSourceTableField.FIELD_TYPE_A,
SQLConstants.IapDatahub.DataSourceTableField.PRIMARY_KEY_A,
SQLConstants.IapDatahub.DataSourceTableField.PARTITION_KEY_A,
SQLConstants.IapDatahub.DataSourceTableField.LENGTH_A,
TbAppCollectTableInfo.TGT_DB_A,
TbAppCollectTableInfo.TGT_TABLE_A,
TbAppCollectTableInfo.TGT_TABLE_TYPE_A,
TbAppCollectTableInfo.TGT_HDFS_PATH_A,
TbAppHudiJobConfig.WRITE_TASKS_A,
TbAppHudiJobConfig.WRITE_OPERATION_A,
TbAppHudiJobConfig.WRITE_TASK_MAX_MEMORY_A,
TbAppHudiJobConfig.WRITE_BATCH_SIZE_A,
TbAppHudiJobConfig.WRITE_RATE_LIMIT_A,
TbAppCollectTableInfo.BUCKET_NUMBER_A,
TbAppHudiJobConfig.COMPACTION_STRATEGY_A,
TbAppHudiJobConfig.COMPACTION_TASKS_A,
TbAppHudiJobConfig.COMPACTION_DELTA_COMMITS_A,
TbAppHudiJobConfig.COMPACTION_DELTA_SECONDS_A,
TbAppHudiJobConfig.COMPACTION_ASYNC_ENABLED_A,
TbAppHudiJobConfig.COMPACTION_MAX_MEMORY_A,
TbAppHudiJobConfig.CONFIGS_A,
TbAppCollectTableInfo.FILTER_FIELD_A,
TbAppCollectTableInfo.FILTER_VALUES_A,
TbAppCollectTableInfo.FILTER_TYPE_A,
TbAppCollectTableInfo.SRC_TOPIC_A,
TbAppCollectTableInfo.SRC_PULSAR_ADDR_A,
Alias.of("tayjc_sync.job_manager_memory", "sync_job_manager_memory"),
Alias.of("tayjc_sync.task_manager_memory", "sync_task_manager_memory"),
Alias.of("tayjc_compaction.job_manager_memory", "compaction_job_manager_memory"),
Alias.of("tayjc_compaction.task_manager_memory", "compaction_task_manger_momory"),
TbAppCollectTableInfo.PARTITION_FIELD_A,
TbAppHudiSyncState.MESSAGE_ID_A,
TbAppGlobalConfig.METRIC_PUBLISH_URL_A,
TbAppGlobalConfig.METRIC_PROMETHEUS_URL_A,
TbAppGlobalConfig.METRIC_API_URL_A,
TbAppGlobalConfig.METRIC_PUBLISH_DELAY_A,
TbAppGlobalConfig.METRIC_PUBLISH_PERIOD_A,
TbAppGlobalConfig.METRIC_PUBLISH_TIMEOUT_A,
TbAppGlobalConfig.METRIC_PUBLISH_BATCH_A,
Alias.of(TbAppFlinkJobConfig.ID_A, "job_id"),
Alias.of(TbAppFlinkJobConfig.NAME_A, "job_name"),
TbAppGlobalConfig.CHECKPOINT_ROOT_PATH_A,
TbAppHudiJobConfig.SOURCE_TASKS_A,
TbAppCollectTableInfo.ALIAS_A,
SQLConstants.IapDatahub.DataSource.CONNECTION_A,
TbAppCollectTableInfo.PRIORITY_A,
SQLConstants.IapDatahub.DataSource.DS_TYPE_A,
TbAppHudiJobConfig.KEEP_FILE_VERSION_A,
TbAppHudiJobConfig.KEEP_COMMIT_VERSION_A,
TbAppCollectTableInfo.TAGS_A,
TbAppGlobalConfig.ZK_URL_A,
TbAppCollectTableInfo.VERSION_A,
SQLConstants.IapDatahub.DataSourceTableField.SCALE_A
)
.from(
SQLConstants.IapDatahub.DataSource._alias_,
SQLConstants.IapDatahub.DataSourceTable._alias_,
SQLConstants.IapDatahub.DataSourceTableField._alias_,
TbAppFlinkJobConfig._alias_,
TbAppHudiJobConfig._alias_,
Alias.of(TbAppYarnJobConfig._origin_, "tayjc_sync"),
Alias.of(TbAppYarnJobConfig._origin_, "tayjc_compaction"),
TbAppGlobalConfig._alias_,
TbAppCollectTableInfo._alias_,
TbAppHudiSyncState._alias_
)
.whereEq(SQLConstants.IapDatahub.DataSource.DS_ROLE_A, "src")
.andEq(SQLConstants.IapDatahub.DataSource.DS_STATE_A, STATUS_Y)
.andEq(SQLConstants.IapDatahub.DataSource.RECORD_STATE_A, STATUS_Y)
.andEq(SQLConstants.IapDatahub.DataSourceTable.DS_ID_A, Column.as(SQLConstants.IapDatahub.DataSource.DS_ID_A))
.andEq(SQLConstants.IapDatahub.DataSourceTable.RECORD_STATE_A, STATUS_Y)
.andEq(SQLConstants.IapDatahub.DataSourceTableField.TABLE_ID_A, Column.as(SQLConstants.IapDatahub.DataSourceTable.TABLE_ID_A))
.andEq(SQLConstants.IapDatahub.DataSourceTableField.RECORD_STATE_A, STATUS_Y)
.andIn(SQLConstants.IapDatahub.DataSource.DS_TYPE_A, "udal", "telepg")
.andEq(SQLConstants.IapDatahub.DataSource.DS_NAME_A, Column.as(TbAppCollectTableInfo.SRC_DB_A))
.andEq(SQLConstants.IapDatahub.DataSource.SCHEMA_NAME_A, Column.as(TbAppCollectTableInfo.SRC_SCHEMA_A))
.andEq(SQLConstants.IapDatahub.DataSourceTable.TABLE_NAME_A, Column.as(TbAppCollectTableInfo.SRC_TABLE_A))
.andEq(TbAppCollectTableInfo.FLINK_JOB_ID_A, Column.as(TbAppFlinkJobConfig.ID_A))
.andEq(TbAppCollectTableInfo.HUDI_JOB_ID_A, Column.as(TbAppHudiJobConfig.ID_A))
.andEq(TbAppCollectTableInfo.SYNC_YARN_JOB_ID_A, Column.as("tayjc_sync.id"))
.andEq(TbAppCollectTableInfo.COMPACTION_YARN_JOB_ID_A, Column.as("tayjc_compaction.id"))
.andEq(TbAppCollectTableInfo.CONFIG_ID_A, Column.as(TbAppGlobalConfig.ID_A))
.andEq(TbAppHudiSyncState.ID_A, Column.as(StrUtil.format("concat({}, '-', {})", TbAppFlinkJobConfig.ID_A, TbAppCollectTableInfo.ALIAS_A)))
.andEq(ObjectUtil.isNotNull(flinkJobId), TbAppFlinkJobConfig.ID_A, flinkJobId)
.andEq(StrUtil.isNotBlank(alias), TbAppCollectTableInfo.ALIAS_A, alias)
.andEq(TbAppCollectTableInfo.STATUS_A, STATUS_Y)
.andEq(TbAppFlinkJobConfig.STATUS_A, STATUS_Y)
.andEq(TbAppHudiJobConfig.STATUS_A, STATUS_Y)
.andEq("tayjc_sync.status", STATUS_Y)
.andEq("tayjc_compaction.status", STATUS_Y)
.orderBy(SQLConstants.IapDatahub.DataSourceTableField.FIELD_SEQ_A)
.build()
));
}
}