feat(all): 迁移common、sync、executor项目

This commit is contained in:
2024-02-29 15:32:14 +08:00
parent 0683068a02
commit 5a2e9fdfb8
73 changed files with 10204 additions and 1 deletions

4
bin/build-sync.sh Executable file
View File

@@ -0,0 +1,4 @@
#!/bin/bash
mvn -pl service-common clean deploy -D skipTests -P local -s ~/.m2/settings-development.xml
mvn -pl utils/sync clean package -D skipTests -s ~/.m2/settings-development.xml
ytp-transfer2 /Users/lanyuanxiaoyao/Project/IdeaProjects/hudi-service/utils/sync/target/sync-1.0.0-SNAPSHOT.jar

View File

@@ -9,6 +9,7 @@
<version>1.0.0-SNAPSHOT</version>
<packaging>pom</packaging>
<modules>
<module>service-common</module>
<module>service-configuration</module>
<module>service-gateway</module>
<module>service-queue</module>
@@ -32,6 +33,8 @@
<module>service-scheduler</module>
<module>service-launcher</module>
<module>service-command</module>
<module>utils/executor</module>
<module>utils/sync</module>
</modules>
<properties>
@@ -39,7 +42,7 @@
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<build-tag>b1e11</build-tag>
<build-tag>b2b12</build-tag>
<spring-boot.version>2.6.8</spring-boot.version>
<spring-cloud.version>2021.0.3</spring-cloud.version>

41
service-common/pom.xml Normal file
View File

@@ -0,0 +1,41 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.lanyuanxiaoyao</groupId>
<artifactId>hudi-service</artifactId>
<version>1.0.0-SNAPSHOT</version>
</parent>
<artifactId>service-common</artifactId>
<dependencies>
<!-- Common 包不要引入第三方依赖,避免冲突,一些简单的工具类自己手动实现,复杂或不必要 common 的流程不要放在 common 包里实现 -->
<!-- hutool 系列是一个无三方依赖的工具包,建议使用,但同样也是能不用就不用,保持纯净 -->
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
</dependency>
<!-- 用于提供有限的SQL构造避免引入复杂的ORM框架 -->
<dependency>
<groupId>io.github.dragons96</groupId>
<artifactId>sql-builder</artifactId>
<version>0.0.5.3</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-deploy-plugin</artifactId>
</plugin>
</plugins>
</build>
</project>

View File

@@ -0,0 +1,211 @@
package com.lanyuanxiaoyao.service.common;
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
import java.time.format.DateTimeFormatter;
import java.util.function.BiFunction;
/**
* 常量
*
* @author ZhangJiacheng
* @version 0.0.1
* @date 2021-12-03
*/
public interface Constants {
// String DATABASE_NAME = "hudi_collect_build";
// String DATABASE_NAME = "hudi_collect_build_2";
String DATABASE_NAME = "hudi_collect_build_b12";
String API_HEADER_NAME = "Api-Version";
String API_VERSION = "1";
/**
* 联合主键
*/
String UNION_KEY_NAME = "_key";
/**
* 源端最后操作时间
*/
String LATEST_OPERATION_TIMESTAMP_KEY_NAME = "latest_op_ts";
/**
* 记录下游入库时间
*/
String UPDATE_TIMESTAMP_KEY_NAME = "update_ts";
/**
* Hudi 删除标记字段
*/
String HUDI_DELETE_KEY_NAME = "_hoodie_is_deleted";
String PULSAR_SUBSCRIPTION_NAME_PREFIX = "Hudi_Sync_Pulsar_Reader";
String VERSION_UPDATE_KEY = "versionUpdate";
String VERSION_KEY = "version";
String DELETE = "D";
String INSERT = "I";
String UPDATE = "U";
String DDL = "ddl";
String UNKNOWN = "unknown";
String CITY_ID = "CITY_ID";
String INCLUDE = "INCLUDE";
String EXCLUDE = "EXCLUDE";
String JOB_ID = "job-id";
String SERVICE_MODE = "service-mode";
String FLINK_JOB = "flink-job";
String TABLE_META = "table-meta";
String TABLE_META_LIST = "table-meta-list";
String MESSAGE_ID = "message-id";
String INSTANTS = "instants";
String BETA = "beta";
String CLUSTER = "cluster";
String COW = "COPY_ON_WRITE";
String MOR = "MERGE_ON_READ";
String FLINK_JOB_OPTION = "-" + FLINK_JOB;
String TABLE_META_OPTION = "-" + TABLE_META;
String TABLE_META_LIST_OPTION = "-" + TABLE_META_LIST;
String INSTANTS_OPTION = "-" + INSTANTS;
String BETA_OPTION = "-" + BETA;
String CLUSTER_OPTION = "-" + CLUSTER;
String SPRING_SECURITY_AUTHORITY = "Anonymous";
String SPRING_SECURITY_USERNAME = "AxhEbscwsJDbYMH2";
String SPRING_SECURITY_PASSWORD = "{noop}cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4";
String SPRING_SECURITY_PASSWORD_PLAIN = "cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4";
String VICTORIA_USERNAME = "EsCFVuNkiDWv7PKmcF";
String VICTORIA_PASSWORD = "Abf%x9ocS^iKr3tgrd";
String SCHEMA_NAME = "schema";
String TABLE_NAME = "table";
String DATA_TIME = "data-time";
String DATA_PARENT_PATH = "data-parent-path";
String METRICS_PREFIX = "metrics_hudi";
String METRICS_YARN_PREFIX = METRICS_PREFIX + "_yarn";
String METRICS_YARN_JOB = METRICS_YARN_PREFIX + "_job";
String METRICS_YARN_TABLE = METRICS_YARN_PREFIX + "_table";
String METRICS_SYNC_PREFIX = METRICS_PREFIX + "_sync";
String METRICS_SYNC_SOURCE_LATENCY = METRICS_SYNC_PREFIX + "_source_latency";
String METRICS_SYNC_LATENCY = METRICS_SYNC_PREFIX + "_latency";
String METRICS_SYNC_FLINK_JOB_ID = METRICS_SYNC_PREFIX + "_flink_job_id";
String METRICS_SYNC_SOURCE_MESSAGE_RECEIVE = METRICS_SYNC_PREFIX + "_source_message_receive";
String METRICS_SYNC_SOURCE_MESSAGE_SIZE_RECEIVE_BYTES = METRICS_SYNC_PREFIX + "_source_message_receive_bytes";
String METRICS_SYNC_SOURCE_OPERATION_TYPE_RECEIVE = METRICS_SYNC_PREFIX + "_source_operation_type_receive";
String METRICS_SYNC_SOURCE_CHANGE_FILTER = METRICS_SYNC_PREFIX + "_source_change_filter";
String METRICS_SYNC_SOURCE_CHANGE_PARTITION = METRICS_SYNC_PREFIX + "_source_change_partition";
String METRICS_SYNC_SOURCE_BACK_LOGS = METRICS_SYNC_PREFIX + "_source_back_logs";
String METRICS_LABEL_FLINK_JOB_ID = "flink_job_id";
String METRICS_LABEL_FLINK_JOB_NAME = "flink_job_name";
String METRICS_LABEL_FLINK_NATIVE_JOB_ID = "flink_native_job_id";
String METRICS_LABEL_FLINK_NATIVE_TASK_NAME = "flink_native_task_name";
String METRICS_LABEL_FLINK_PARALLEL_ID = "flink_parallel_id";
String METRICS_LABEL_RUN_TYPE = "run_type";
String METRICS_LABEL_EXECUTOR_VERSION = "executor_version";
String METRICS_LABEL_CLUSTER = "cluster";
String METRICS_RUN_TYPE_SYNC = "sync";
String METRICS_RUN_TYPE_COMPACTION = "compaction";
String METRICS_LABEL_SCHEMA = "schema";
String METRICS_LABEL_TABLE = "table";
String METRICS_LABEL_STATUS = "status";
String METRICS_LABEL_TOPIC = "topic";
String METRICS_LABEL_BATCH_ID = "batch_id";
String METRICS_LABEL_ALIAS = "alias";
String METRICS_LABEL_APPLICATION_ID = "application_id";
String METRICS_STATUS_RUNNING = "running";
String METRICS_STATUS_STOPPED = "stopped";
String METRICS_LABEL_TYPE = "type";
String LOKI_PUSH_URL = "loki_push_url";
DateTimeFormatter FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
String OPERATION_DONE = "操作完成";
String OPERATION_CANCEL = "操作取消";
String FETCHING_DATA = "Fetching Data";
String COMPACTION_STATUS_SCHEDULE = "SCHEDULE";
String COMPACTION_STATUS_START = "START";
String COMPACTION_STATUS_FINISH = "FINISH";
String COMPACTION_STATUS_FAILURE = "FAILURE";
long SECOND = 1000;
long HALF_MINUTE = 30 * SECOND;
long MINUTE = 60 * SECOND;
long HALF_HOUR = 30 * MINUTE;
long HOUR = 60 * MINUTE;
long KB = 1024;
long MB = 1024 * KB;
long GB = 1024 * MB;
long TB = 1024 * GB;
String TAG_SPLIT = ";";
String TAG_OPERATOR = "=";
String EVENT = "event";
String FROM_COMMAND_UTIL = "command util";
String FROM_COMPACTOR = "compactor";
int COMMAND_RENDER_WIDTH = 500;
String LOG_FLINK_JOB_ID_LABEL = "LOG_FLINK_JOB_ID_LABEL";
String LOG_FLINK_JOB_ID = "flink_job_id";
String LOG_ALIAS_LABEL = "LOG_ALIAS_LABEL";
String LOG_ALIAS = "alias";
String LOG_JOB_ID_LABEL = "LOG_JOB_ID_LABEL";
String LOG_JOB_ID = "job_id";
String LOG_POINT_PREFIX = "LOP-";
String LOG_POINT_MESSAGE_ID_EMPTY = LOG_POINT_PREFIX + "000001";
String LOG_POINT_CHECKPOINT_INITIAL = LOG_POINT_PREFIX + "000002";
String LOG_POINT_CHECKPOINT_INITIAL_MESSAGE_ID = LOG_POINT_PREFIX + "000003";
String LOG_POINT_PULSAR_SOURCE_BOOTSTRAP_MESSAGE_ID = LOG_POINT_PREFIX + "000004";
String LOG_POINT_PULSAR_SOURCE_GET_MESSAGE_ID_ERROR = LOG_POINT_PREFIX + "000005";
String LOG_POINT_FIELD_TYPE_NOT_FOUND = LOG_POINT_PREFIX + "000006";
String TAGS_NO_COMPACT = "NO_COMPACT";
String TAGS_PULSAR_BACKUP = "PULSAR_BACKUP";
String TAGS_NO_PRE_COMBINE = "NO_PRE_COMBINE";
String TAGS_PRE_COMBINE = "PRE_COMBINE";
String TAGS_NO_IGNORE_FAILED = "NO_IGNORE_FAILED";
String TAGS_DISABLE_CHAINING = "DISABLE_CHAINING";
String TAGS_TRACE_LATEST_OP_TS = "TRACE_LATEST_OP_TS";
String TAGS_SOURCE_READER = "SOURCE_READER";
String TAGS_USE_TEST_JAR = "USE_TEST_JAR";
String TAGS_ODS = "ODS";
String TAGS_ODS_FOCUS = "ODS_FOCUS";
String COMPACTION_QUEUE_PRE = "compaction-queue-pre";
String COMPACTION_QUEUE_B1 = "compaction-queue-b1";
String COMPACTION_QUEUE_B5 = "compaction-queue-b5";
String COMPACTION_QUEUE_A4 = "compaction-queue-a4";
String COMPACTION_QUEUE_B12 = "compaction-queue-b12";
String CLUSTER_B1 = "b1";
String CLUSTER_B5 = "b5";
String CLUSTER_A4 = "a4";
String CLUSTER_B12 = "b12";
String SCHEDULE_JOB_FAIL_COUNT = "SCHEDULE_JOB_FAIL_COUNT";
String SCHEDULE_RECOMMEND = "schedule_recommend";
String SCHEDULE_FORCE = "schedule_force";
BiFunction<TableMeta, String, String> FIELD_COVERT = (tableMeta, field) -> {
if (TableMeta.SourceType.TELEPG.equals(tableMeta.getSourceType())) {
return field.toLowerCase();
} else {
return field;
}
};
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,123 @@
package com.lanyuanxiaoyao.service.common.entity;
import java.io.Serializable;
/**
* Flink Job
*
* @author ZhangJiacheng
* @version 0.0.1
* @date 2021-12-08
*/
public class FlinkJob implements Serializable {
private Long id;
private String name;
private RunMode runMode;
private TableMeta.YarnMeta oneInOneSyncYarn;
public FlinkJob() {
}
public FlinkJob(Builder builder) {
this.id = builder.id;
this.name = builder.name;
this.runMode = builder.runMode;
this.oneInOneSyncYarn = builder.oneInOneSyncYarn;
}
public static Builder builder() {
return new Builder();
}
public Long getId() {
return id;
}
public void setId(Long id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public RunMode getRunMode() {
return runMode;
}
public void setRunMode(RunMode runMode) {
this.runMode = runMode;
}
public TableMeta.YarnMeta getOneInOneSyncYarn() {
return oneInOneSyncYarn;
}
public void setOneInOneSyncYarn(TableMeta.YarnMeta oneInOneSyncYarn) {
this.oneInOneSyncYarn = oneInOneSyncYarn;
}
@Override
public String toString() {
return "FlinkJob{" +
"id=" + id +
", name='" + name + '\'' +
", runMode=" + runMode +
", oneInOneSyncYarn=" + oneInOneSyncYarn +
'}';
}
public enum RunMode {
/**
* 所有表放在同一个任务中
*/
ALL_IN_ONE,
/**
* 每个表放在单独的任务中
*/
ONE_IN_ONE,
/**
* 针对 ACCT 小表,将同一个表放在同一个子任务中
*/
ALL_IN_ONE_BY_TABLE,
ALL_IN_ONE_BY_SCHEMA,
}
public static final class Builder {
private Long id;
private String name;
private RunMode runMode;
private TableMeta.YarnMeta oneInOneSyncYarn;
private Builder() {}
public Builder id(Long id) {
this.id = id;
return this;
}
public Builder name(String name) {
this.name = name;
return this;
}
public Builder runMode(RunMode runMode) {
this.runMode = runMode;
return this;
}
public Builder oneInOneSyncYarn(TableMeta.YarnMeta oneInOneSyncYarn) {
this.oneInOneSyncYarn = oneInOneSyncYarn;
return this;
}
public FlinkJob build() {
return new FlinkJob(this);
}
}
}

View File

@@ -0,0 +1,197 @@
package com.lanyuanxiaoyao.service.common.entity;
import java.io.Serializable;
import java.util.Map;
/**
* 从 Pulsar 读取的消息封装
*
* @author lanyuanxiaoyao
* @version 0.0.1
* @date 2021-11-25
*/
public class Record implements Serializable {
private Source source;
private Statement statement;
public Record() {
}
public Source getSource() {
return source;
}
public void setSource(Source source) {
this.source = source;
}
public Statement getStatement() {
return statement;
}
public void setStatement(Statement statement) {
this.statement = statement;
}
@Override
public String toString() {
return "Record{" +
"source=" + source +
", statement=" + statement +
'}';
}
public static class Source implements Serializable {
private String sourceId;
private String sourceType;
private String sourcePos;
private String currentTs;
public Source() {
}
public String getSourceId() {
return sourceId;
}
public void setSourceId(String sourceId) {
this.sourceId = sourceId;
}
public String getSourceType() {
return sourceType;
}
public void setSourceType(String sourceType) {
this.sourceType = sourceType;
}
public String getSourcePos() {
return sourcePos;
}
public void setSourcePos(String sourcePos) {
this.sourcePos = sourcePos;
}
public String getCurrentTs() {
return currentTs;
}
public void setCurrentTs(String currentTs) {
this.currentTs = currentTs;
}
@Override
public String toString() {
return "Source{" +
"sourceId='" + sourceId + '\'' +
", sourceType='" + sourceType + '\'' +
", sourcePos='" + sourcePos + '\'' +
", currentTs='" + currentTs + '\'' +
'}';
}
}
public static class Statement implements Serializable {
private String schema;
private String table;
private String opStatement;
private String opType;
private String op;
private String opTs;
private String version;
private Map<String, Object> before;
private Map<String, Object> after;
public Statement() {
}
public String getSchema() {
return schema;
}
public void setSchema(String schema) {
this.schema = schema;
}
public String getTable() {
return table;
}
public void setTable(String table) {
this.table = table;
}
public String getOpStatement() {
return opStatement;
}
public void setOpStatement(String opStatement) {
this.opStatement = opStatement;
}
public String getOpType() {
return opType;
}
public void setOpType(String opType) {
this.opType = opType;
}
public String getOp() {
return op;
}
public void setOp(String op) {
this.op = op;
}
public String getOpTs() {
return opTs;
}
public void setOpTs(String opTs) {
this.opTs = opTs;
}
public String getVersion() {
return version;
}
public void setVersion(String version) {
this.version = version;
}
public Map<String, Object> getBefore() {
return before;
}
public void setBefore(Map<String, Object> before) {
this.before = before;
}
public Map<String, Object> getAfter() {
return after;
}
public void setAfter(Map<String, Object> after) {
this.after = after;
}
@Override
public String toString() {
return "Statement{" +
"schema='" + schema + '\'' +
", table='" + table + '\'' +
", opStatement='" + opStatement + '\'' +
", opType='" + opType + '\'' +
", op='" + op + '\'' +
", opTs='" + opTs + '\'' +
", version='" + version + '\'' +
", before=" + before +
", after=" + after +
'}';
}
}
}

View File

@@ -0,0 +1,121 @@
package com.lanyuanxiaoyao.service.common.entity;
import com.lanyuanxiaoyao.service.common.utils.NameHelper;
import java.io.Serializable;
/**
* 运行时参数
*
* @author ZhangJiacheng
* @date 2023-05-11
*/
@SuppressWarnings("FieldMayBeFinal")
public class RunMeta implements Serializable {
private String cluster;
private Long flinkJobId;
private String alias;
private String flinkJobName;
private String host;
private String applicationId;
private String containerId;
private String containerPath;
private String runType;
private String executorVersion;
private String jvmPid;
private String applicationProxy;
private String subscriptionName;
public RunMeta() {
this.flinkJobName = System.getenv("flink_job_name");
this.host = System.getenv("NM_HOST");
this.applicationId = System.getenv("_APP_ID");
this.containerId = System.getenv("CONTAINER_ID");
this.containerPath = System.getenv("PWD");
this.runType = System.getenv("run_type");
this.executorVersion = System.getenv("executor_version");
this.jvmPid = System.getenv("JVM_PID");
this.applicationProxy = System.getenv("APPLICATION_WEB_PROXY_BASE");
}
public RunMeta(String cluster, Long flinkJobId) {
this();
this.cluster = cluster;
this.flinkJobId = flinkJobId;
}
public RunMeta(String cluster, Long flinkJobId, String alias) {
this(cluster, flinkJobId);
this.alias = alias;
this.subscriptionName = NameHelper.pulsarSubscriptionName(flinkJobId, alias);
}
public String getCluster() {
return cluster;
}
public Long getFlinkJobId() {
return flinkJobId;
}
public String getAlias() {
return alias;
}
public String getFlinkJobName() {
return flinkJobName;
}
public String getHost() {
return host;
}
public String getApplicationId() {
return applicationId;
}
public String getContainerId() {
return containerId;
}
public String getContainerPath() {
return containerPath;
}
public String getRunType() {
return runType;
}
public String getExecutorVersion() {
return executorVersion;
}
public String getJvmPid() {
return jvmPid;
}
public String getApplicationProxy() {
return applicationProxy;
}
public String getSubscriptionName() {
return subscriptionName;
}
@Override
public String toString() {
return "RunMeta{" +
"cluster='" + cluster + '\'' +
", flinkJobId='" + flinkJobId + '\'' +
", flinkJobName='" + flinkJobName + '\'' +
", host='" + host + '\'' +
", applicationId='" + applicationId + '\'' +
", containerId='" + containerId + '\'' +
", containerPath='" + containerPath + '\'' +
", runType='" + runType + '\'' +
", executorVersion='" + executorVersion + '\'' +
", jvmPid='" + jvmPid + '\'' +
", applicationProxy='" + applicationProxy + '\'' +
", subscriptionName='" + subscriptionName + '\'' +
'}';
}
}

View File

@@ -0,0 +1,259 @@
package com.lanyuanxiaoyao.service.common.entity;
import java.io.Serializable;
/**
* 同步压缩状态表类
*
* @author ZhangJiacheng
* @date 2023-04-24
*/
public class SyncState implements Serializable {
private Long flinkJobId;
private String alias;
private String messageId;
private Long sourceStartTime;
private Long sourceCheckpointTime;
private Long sourcePublishTime;
private Long sourceOperationTime;
private Long compactionStartTime;
private Long compactionFinishTime;
private String compactionApplicationId;
private String compactionStatus;
private Long compactionStatusTime;
private Long compactionLatestOperationTime;
public SyncState() {
}
public SyncState(Builder builder) {
this.flinkJobId = builder.flinkJobId;
this.alias = builder.alias;
this.messageId = builder.messageId;
this.sourceStartTime = builder.sourceStartTime;
this.sourceCheckpointTime = builder.sourceCheckpointTime;
this.sourcePublishTime = builder.sourcePublishTime;
this.sourceOperationTime = builder.sourceOperationTime;
this.compactionStartTime = builder.compactionStartTime;
this.compactionFinishTime = builder.compactionFinishTime;
this.compactionApplicationId = builder.compactionApplicationId;
this.compactionStatus = builder.compactionStatus;
this.compactionStatusTime = builder.compactionStatusTime;
this.compactionLatestOperationTime = builder.compactionLatestOperationTime;
}
public static Builder builder() {
return new Builder();
}
public Long getFlinkJobId() {
return flinkJobId;
}
public void setFlinkJobId(Long flinkJobId) {
this.flinkJobId = flinkJobId;
}
public String getAlias() {
return alias;
}
public void setAlias(String alias) {
this.alias = alias;
}
public String getMessageId() {
return messageId;
}
public void setMessageId(String messageId) {
this.messageId = messageId;
}
public Long getSourceStartTime() {
return sourceStartTime;
}
public void setSourceStartTime(Long sourceStartTime) {
this.sourceStartTime = sourceStartTime;
}
public Long getSourceCheckpointTime() {
return sourceCheckpointTime;
}
public void setSourceCheckpointTime(Long sourceCheckpointTime) {
this.sourceCheckpointTime = sourceCheckpointTime;
}
public Long getSourcePublishTime() {
return sourcePublishTime;
}
public void setSourcePublishTime(Long sourcePublishTime) {
this.sourcePublishTime = sourcePublishTime;
}
public Long getSourceOperationTime() {
return sourceOperationTime;
}
public void setSourceOperationTime(Long sourceOperationTime) {
this.sourceOperationTime = sourceOperationTime;
}
public Long getCompactionStartTime() {
return compactionStartTime;
}
public void setCompactionStartTime(Long compactionStartTime) {
this.compactionStartTime = compactionStartTime;
}
public Long getCompactionFinishTime() {
return compactionFinishTime;
}
public void setCompactionFinishTime(Long compactionFinishTime) {
this.compactionFinishTime = compactionFinishTime;
}
public String getCompactionApplicationId() {
return compactionApplicationId;
}
public void setCompactionApplicationId(String compactionApplicationId) {
this.compactionApplicationId = compactionApplicationId;
}
public String getCompactionStatus() {
return compactionStatus;
}
public void setCompactionStatus(String compactionStatus) {
this.compactionStatus = compactionStatus;
}
public Long getCompactionStatusTime() {
return compactionStatusTime;
}
public void setCompactionStatusTime(Long compactionStatusTime) {
this.compactionStatusTime = compactionStatusTime;
}
public Long getCompactionLatestOperationTime() {
return compactionLatestOperationTime;
}
public void setCompactionLatestOperationTime(Long compactionLatestOperationTime) {
this.compactionLatestOperationTime = compactionLatestOperationTime;
}
@Override
public String toString() {
return "SyncState{" +
"flinkJobId=" + flinkJobId +
", alias='" + alias + '\'' +
", messageId='" + messageId + '\'' +
", sourceStartTime=" + sourceStartTime +
", sourceCheckpointTime=" + sourceCheckpointTime +
", sourcePublishTime=" + sourcePublishTime +
", sourceOperationTime=" + sourceOperationTime +
", compactionStartTime=" + compactionStartTime +
", compactionFinishTime=" + compactionFinishTime +
", compactionApplicationId='" + compactionApplicationId + '\'' +
", compactionStatus='" + compactionStatus + '\'' +
", compactionStatusTime=" + compactionStatusTime +
", compactionLatestOperationTime=" + compactionLatestOperationTime +
'}';
}
public static final class Builder {
private Long flinkJobId;
private String alias;
private String messageId;
private Long sourceStartTime;
private Long sourceCheckpointTime;
private Long sourcePublishTime;
private Long sourceOperationTime;
private Long compactionStartTime;
private Long compactionFinishTime;
private String compactionApplicationId;
private String compactionStatus;
private Long compactionStatusTime;
private Long compactionLatestOperationTime;
private Builder() {
}
public Builder flinkJobId(Long flinkJobId) {
this.flinkJobId = flinkJobId;
return this;
}
public Builder alias(String alias) {
this.alias = alias;
return this;
}
public Builder messageId(String messageId) {
this.messageId = messageId;
return this;
}
public Builder sourceStartTime(Long sourceStartTime) {
this.sourceStartTime = sourceStartTime;
return this;
}
public Builder sourceCheckpointTime(Long sourceCheckpointTime) {
this.sourceCheckpointTime = sourceCheckpointTime;
return this;
}
public Builder sourcePublishTime(Long sourcePublishTime) {
this.sourcePublishTime = sourcePublishTime;
return this;
}
public Builder sourceOperationTime(Long sourceOperationTime) {
this.sourceOperationTime = sourceOperationTime;
return this;
}
public Builder compactionStartTime(Long compactionStartTime) {
this.compactionStartTime = compactionStartTime;
return this;
}
public Builder compactionFinishTime(Long compactionFinishTime) {
this.compactionFinishTime = compactionFinishTime;
return this;
}
public Builder compactionApplicationId(String compactionApplicationId) {
this.compactionApplicationId = compactionApplicationId;
return this;
}
public Builder compactionStatus(String compactionStatus) {
this.compactionStatus = compactionStatus;
return this;
}
public Builder compactionStatusTime(Long compactionStatusTime) {
this.compactionStatusTime = compactionStatusTime;
return this;
}
public Builder compactionLatestOperationTime(Long compactionLatestOperationTime) {
this.compactionLatestOperationTime = compactionLatestOperationTime;
return this;
}
public SyncState build() {
return new SyncState(this);
}
}
}

View File

@@ -0,0 +1,144 @@
package com.lanyuanxiaoyao.service.common.entity.compaction;
/**
* 压缩调度任务
*
* @author ZhangJiacheng
* @date 2022-09-26
*/
public class ScheduleJob {
private String id;
private Long flinkJobId;
private String alias;
private String batch;
private String status;
private String comment;
public ScheduleJob() {
}
public ScheduleJob(String id, Long flinkJobId, String alias, String batch, String status, String comment) {
this.id = id;
this.flinkJobId = flinkJobId;
this.alias = alias;
this.batch = batch;
this.status = status;
this.comment = comment;
}
public ScheduleJob(Builder builder) {
this.id = builder.id;
this.flinkJobId = builder.flinkJobId;
this.alias = builder.alias;
this.batch = builder.batch;
this.status = builder.status;
this.comment = builder.comment;
}
public static Builder builder() {
return new Builder();
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public Long getFlinkJobId() {
return flinkJobId;
}
public void setFlinkJobId(Long flinkJobId) {
this.flinkJobId = flinkJobId;
}
public String getAlias() {
return alias;
}
public void setAlias(String alias) {
this.alias = alias;
}
public String getBatch() {
return batch;
}
public void setBatch(String batch) {
this.batch = batch;
}
public String getStatus() {
return status;
}
public void setStatus(String status) {
this.status = status;
}
public String getComment() {
return comment;
}
public void setComment(String comment) {
this.comment = comment;
}
@Override
public String toString() {
return "ScheduleJob{" +
"id=" + id +
", flinkJobId=" + flinkJobId +
", alias='" + alias + '\'' +
'}';
}
public static final class Builder {
private String id;
private Long flinkJobId;
private String alias;
private String batch;
private String status;
private String comment;
private Builder() {
}
public Builder id(String id) {
this.id = id;
return this;
}
public Builder flinkJobId(Long flinkJobId) {
this.flinkJobId = flinkJobId;
return this;
}
public Builder alias(String alias) {
this.alias = alias;
return this;
}
public Builder batch(String batch) {
this.batch = batch;
return this;
}
public Builder status(String status) {
this.status = status;
return this;
}
public Builder comment(String comment) {
this.comment = comment;
return this;
}
public ScheduleJob build() {
return new ScheduleJob(this);
}
}
}

View File

@@ -0,0 +1,15 @@
package com.lanyuanxiaoyao.service.common.entity.compaction;
import java.util.Deque;
/**
* 队列
*
* @author ZhangJiacheng
* @date 2022-09-26
*/
public interface ScheduleQueue extends Deque<ScheduleJob> {
Iterable<ScheduleJob> poll(int limit);
Iterable<ScheduleJob> pollWithoutSame(int limit);
}

View File

@@ -0,0 +1,11 @@
package com.lanyuanxiaoyao.service.common.exception;
/**
* @author ZhangJiacheng
* @date 2022-05-23
*/
public class CheckpointRootPathNotFoundException extends RuntimeException{
public CheckpointRootPathNotFoundException() {
super("Checkpoint root path not found");
}
}

View File

@@ -0,0 +1,29 @@
package com.lanyuanxiaoyao.service.common.exception;
import java.util.function.Supplier;
/**
* 配置异常
*
* @author ZhangJiacheng
* @date 2022-05-16
*/
public class ConfigException extends Exception {
public ConfigException(String message) {
super(message);
}
public static void check(String message, Supplier<Boolean> checkFunction) throws ConfigException {
if (checkFunction.get()) {
throw new ConfigException(message);
}
}
public static void checkQuiet(String message, Supplier<Boolean> checkFunction) {
try {
check(message, checkFunction);
} catch (ConfigException e) {
throw new RuntimeException(e);
}
}
}

View File

@@ -0,0 +1,15 @@
package com.lanyuanxiaoyao.service.common.exception;
/**
* @author ZhangJiacheng
* @date 2022-05-23
*/
public class FlinkJobNotFoundException extends RuntimeException {
public FlinkJobNotFoundException() {
super("Flink job not found");
}
public FlinkJobNotFoundException(Long flinkJobId) {
super("Flink job " + flinkJobId + " not found");
}
}

View File

@@ -0,0 +1,14 @@
package com.lanyuanxiaoyao.service.common.exception;
import com.lanyuanxiaoyao.service.common.Constants;
/**
* @author ZhangJiacheng
* @date 2022-05-23
*/
public class MessageIdEmptyException extends RuntimeException {
public MessageIdEmptyException() {
super(Constants.LOG_POINT_MESSAGE_ID_EMPTY + " Message id is empty");
}
}

View File

@@ -0,0 +1,13 @@
package com.lanyuanxiaoyao.service.common.exception;
/**
* 缺参数异常
*
* @author ZhangJiacheng
* @date 2022-06-20
*/
public class MissingArgumentException extends Exception {
public MissingArgumentException(String argumentName) {
super("Argument: '" + argumentName + "' is not found");
}
}

View File

@@ -0,0 +1,11 @@
package com.lanyuanxiaoyao.service.common.exception;
/**
* @author ZhangJiacheng
* @date 2022-05-23
*/
public class PartitionPathNotFoundException extends RuntimeException{
public PartitionPathNotFoundException() {
super("Partition path not found");
}
}

View File

@@ -0,0 +1,11 @@
package com.lanyuanxiaoyao.service.common.exception;
/**
* @author ZhangJiacheng
* @date 2023-04-28
*/
public class PulsarInfoNotFoundException extends RuntimeException {
public PulsarInfoNotFoundException(String message) {
super(message);
}
}

View File

@@ -0,0 +1,11 @@
package com.lanyuanxiaoyao.service.common.exception;
/**
* @author ZhangJiacheng
* @date 2022-05-23
*/
public class SyncStateNotFoundException extends RuntimeException{
public SyncStateNotFoundException() {
super("Sync state not found");
}
}

View File

@@ -0,0 +1,11 @@
package com.lanyuanxiaoyao.service.common.exception;
/**
* @author ZhangJiacheng
* @date 2022-05-23
*/
public class TableMetaNotFoundException extends RuntimeException{
public TableMetaNotFoundException() {
super("Table meta not found");
}
}

View File

@@ -0,0 +1,11 @@
package com.lanyuanxiaoyao.service.common.exception;
/**
* @author ZhangJiacheng
* @date 2022-05-23
*/
public class ZookeeperUrlNotFoundException extends RuntimeException{
public ZookeeperUrlNotFoundException() {
super("Zookeeper url not found");
}
}

View File

@@ -0,0 +1,86 @@
package com.lanyuanxiaoyao.service.common.utils;
import com.lanyuanxiaoyao.service.common.entity.FlinkJob;
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
/**
* Flink Job 工具类
*
* @author ZhangJiacheng
* @version 0.0.1
* @date 2021-12-08
*/
public class FlinkJobHelper {
public static String allFlinkJobSql(String database) {
// language=MySQL
return "select tafjc.id,\n" +
" tafjc.name,\n" +
" tafjc.run_mode,\n" +
" tayjc.job_manager_memory,\n" +
" tayjc.task_manager_memory\n" +
"from `" + database + "`.tb_app_flink_job_config tafjc\n" +
" left join\n" +
" `" + database + "`.tb_app_yarn_job_config tayjc on tafjc.one_in_one_yarn_job_id = tayjc.id and tayjc.status = 'y'\n" +
"where tafjc.status = 'y'";
}
public static String flinkJobSql(String database) {
// language=MySQL
return "select tafjc.id,\n" +
" tafjc.name,\n" +
" tafjc.run_mode,\n" +
" tayjc.job_manager_memory,\n" +
" tayjc.task_manager_memory\n" +
"from `" + database + "`.tb_app_flink_job_config tafjc\n" +
" left join\n" +
" `" + database + "`.tb_app_yarn_job_config tayjc on tafjc.one_in_one_yarn_job_id = tayjc.id and tayjc.status = 'y'\n" +
"where tafjc.id = ?\n" +
" and tafjc.status = 'y'";
}
public static List<FlinkJob> from(ResultSet rs) throws SQLException {
List<FlinkJob> results = new ArrayList<>();
while (rs.next()) {
String runModeText = rs.getString(3);
FlinkJob.RunMode mode;
try {
mode = FlinkJob.RunMode.valueOf(runModeText);
} catch (IllegalArgumentException e) {
mode = FlinkJob.RunMode.ALL_IN_ONE;
}
TableMeta.YarnMeta yarnMeta = TableMeta.YarnMeta.builder()
.jobManagerMemory(rs.getInt(4))
.taskManagerMemory(rs.getInt(5))
.build();
results.add(
FlinkJob.builder()
.id(rs.getLong(1))
.name(rs.getString(2))
.runMode(mode)
.oneInOneSyncYarn(yarnMeta)
.build()
);
}
return results;
}
public static Optional<FlinkJob> fromOne(ResultSet rs) throws SQLException {
List<FlinkJob> results = from(rs);
if (results.size() < 1) {
return Optional.empty();
} else if (results.size() > 1) {
throw new SQLException("Found more than 1 records");
} else {
return Optional.of(results.get(0));
}
}
public static boolean isOneInOneMode(FlinkJob job) {
return FlinkJob.RunMode.ONE_IN_ONE.equals(job.getRunMode());
}
}

View File

@@ -0,0 +1,117 @@
package com.lanyuanxiaoyao.service.common.utils;
import cn.hutool.core.collection.ListUtil;
import cn.hutool.core.map.MapBuilder;
import cn.hutool.core.map.MapUtil;
import cn.hutool.core.util.ObjectUtil;
import cn.hutool.core.util.StrUtil;
import com.lanyuanxiaoyao.service.common.Constants;
import java.util.List;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.MDC;
/**
* 日志相关
*
* @author ZhangJiacheng
* @date 2023-12-25
*/
public class LogHelper {
private static String generateLog(LogPoint point, String template, Object[] args) {
if (ObjectUtil.isEmpty(template)) {
return point.toString();
}
if (ObjectUtil.isEmpty(args)) {
return point + " " + template;
}
Object[] items = new Object[args.length + 1];
items[0] = point;
System.arraycopy(args, 0, items, 1, args.length);
return StrUtil.format("{} " + template, items);
}
public static void info(Logger logger, LogPoint point) {
logger.info(generateLog(point, null, null));
}
public static void info(Logger logger, LogPoint point, String template) {
logger.info(generateLog(point, template, null));
}
public static void info(Logger logger, LogPoint point, String template, Object... args) {
logger.info(generateLog(point, template, args));
}
public static void debug(Logger logger, LogPoint point, String template, Object... args) {
logger.debug(generateLog(point, template, args));
}
public static void warn(Logger logger, LogPoint point, String template, Object... args) {
logger.warn(generateLog(point, template, args));
}
public static void error(Logger logger, LogPoint point, String template, Object... args) {
logger.error(generateLog(point, template, args));
}
public static void setMdc(Map<String, String> mdcList) {
mdcList.forEach(MDC::put);
}
public static void setMdc(String... items) {
if (items.length % 2 != 0) {
throw new IllegalArgumentException("Items must key-value");
}
MapBuilder<String, String> builder = MapUtil.builder();
for (int i = 0, j = 1; j < items.length; i++, j++) {
builder.put(items[i], items[j]);
}
setMdc(builder.build());
}
public static void removeMdc(List<String> mdcList) {
mdcList.forEach(MDC::remove);
}
public static void removeMdc(String... names) {
removeMdc(ListUtil.of(names));
}
public static void setMdcFlinkJobAndAlias(Long flinkJobId, String alias) {
setMdc(MapUtil.<String, String>builder()
.put(Constants.LOG_FLINK_JOB_ID_LABEL, flinkJobId.toString())
.put(Constants.LOG_ALIAS_LABEL, alias)
.build());
}
public static void removeMdcFlinkJobAndAlias(Long flinkJobId, String alias) {
removeMdc(ListUtil.of(Constants.LOG_FLINK_JOB_ID_LABEL, Constants.LOG_ALIAS_LABEL));
}
public enum LogPoint {
PULSAR_SOURCE_CHECKPOINT_INITIAL(100),
PULSAR_SOURCE_CHECKPOINT_INITIAL_MESSAGE_ID(101),
MESSAGE_ID_EMPTY(1),
CHECKPOINT_INITIAL(2),
CHECKPOINT_INITIAL_MESSAGE_ID(3),
PULSAR_SOURCE_BOOTSTRAP_MESSAGE_ID(4),
PULSAR_SOURCE_BOOTSTRAP_GET_MESSAGE_ERROR(5),
FIELD_TYPE_NOT_FOUND(6),
VERSION_UPDATE(7),
CHECKPOINT_START(8),
CHECKPOINT_COMPLETE(9);
private final Integer code;
LogPoint(Integer code) {
this.code = code;
}
@Override
public String toString() {
return String.format("LOP-%06d", code);
}
}
}

View File

@@ -0,0 +1,37 @@
package com.lanyuanxiaoyao.service.common.utils;
import java.util.Map;
/**
* Map工具类
*
* @author ZhangJiacheng
* @date 2023-03-20
*/
public class MapHelper {
// 如果大小写同时存在,那么这个忽略大小写的方案就会首先返回大写键的值
public static Object getWithoutCase(Map<String, ?> map, String key) {
String upperKey = key.toUpperCase(), lowerKey = key.toLowerCase();
return map.containsKey(upperKey) ? map.get(upperKey) : map.containsKey(lowerKey) ? map.get(lowerKey) : map.get(key);
}
public static String getStringWithoutCase(Map<String, ?> map, String key) {
return (String) getWithoutCase(map, key);
}
public static Integer getIntWithoutCase(Map<String, ?> map, String key) {
return (Integer) getWithoutCase(map, key);
}
public static Long getLongWithoutCase(Map<String, ?> map, String key) {
return (Long) getWithoutCase(map, key);
}
public static Double getDoubleWithoutCase(Map<String, ?> map, String key) {
return (Double) getWithoutCase(map, key);
}
public static Float getFloatWithoutCase(Map<String, ?> map, String key) {
return (Float) getWithoutCase(map, key);
}
}

View File

@@ -0,0 +1,103 @@
package com.lanyuanxiaoyao.service.common.utils;
import cn.hutool.core.util.StrUtil;
import com.lanyuanxiaoyao.service.common.Constants;
/**
* 命名相关工具
*
* @author ZhangJiacheng
* @date 2022-06-24
*/
public class NameHelper {
public static String pulsarSubscriptionName(Long flinkJobId, String alias) {
return Constants.PULSAR_SUBSCRIPTION_NAME_PREFIX + "_" + flinkJobId + "_" + alias + "_20230425";
}
// Sync job name
public static final String SYNC_JOB_NAME_REGEX = "^Sync_(\\d+?)_(.+)$";
public static boolean isSyncJob(String name) {
return StrUtil.isNotBlank(name) && name.matches(SYNC_JOB_NAME_REGEX);
}
public static String syncJobName(Long flinkJobId, String flinkJobName) {
return "Sync_" + flinkJobId + "_" + flinkJobName;
}
public static final String COMPACTION_JOB_NAME_REGEX = "^Compaction_(\\d+?)_(.+?)$";
public static boolean isCompactionJob(String name) {
return StrUtil.isNotBlank(name) && name.matches(COMPACTION_JOB_NAME_REGEX);
}
public static String compactionJobName(Long flinkJobId, String alias) {
return "Compaction_" + flinkJobId + "_" + alias;
}
// flink job name
public static String syncFlinkName(Long flinkJobId, String flinkJobName) {
return flinkJobName + " (ID: " + flinkJobId + ")";
}
public static String syncFlinkName(Long flinkJobId, String flinkJobName, String alias) {
return flinkJobName + " " + alias + " (ID: " + flinkJobId + ")";
}
public static String compactionFlinkName(Long flinkJobId, String schema, String alias) {
return schema + " " + alias + " (ID: " + flinkJobId + ")";
}
// sync state name
public static String syncStateName(Long flinkJobId, String alias) {
return flinkJobId + "-" + alias;
}
// zk lock name
public static final String ZK_ROOT_PATH = "/hudi";
public static final String ZK_LOCK_PATH = ZK_ROOT_PATH + "/lock";
public static final String ZK_LAUNCHER_LOCK_PATH = ZK_LOCK_PATH + "/launcher";
public static final String ZK_RUNNING_LOCK_PATH = ZK_LOCK_PATH + "/running";
public static final String ZK_SYNC_SUFFIX_PATH = "/sync";
public static final String ZK_SYNC_LAUNCHER_LOCK_PATH = ZK_LAUNCHER_LOCK_PATH + ZK_SYNC_SUFFIX_PATH;
public static final String ZK_SYNC_RUNNING_LOCK_PATH = ZK_RUNNING_LOCK_PATH + ZK_SYNC_SUFFIX_PATH;
public static String syncLockName(Long flinkJobId, String alias) {
if (StrUtil.isNotBlank(alias)) {
return "sync_lock_" + flinkJobId + "_" + alias;
}
return "sync_lock_" + flinkJobId;
}
public static String syncLauncherLockPath(Long flinkJobId) {
return ZK_SYNC_LAUNCHER_LOCK_PATH + "/" + syncLockName(flinkJobId, null);
}
public static String syncRunningLockPath(Long flinkJobId) {
return syncRunningLockPath(flinkJobId, null);
}
public static String syncRunningLockPath(Long flinkJobId, String alias) {
return ZK_SYNC_RUNNING_LOCK_PATH + "/" + syncLockName(flinkJobId, alias);
}
public static final String ZK_COMPACTION_SUFFIX_PATH = "/compaction";
public static final String ZK_COMPACTION_LAUNCHER_LOCK_PATH = ZK_LAUNCHER_LOCK_PATH + ZK_COMPACTION_SUFFIX_PATH;
public static final String ZK_COMPACTION_RUNNING_LOCK_PATH = ZK_RUNNING_LOCK_PATH + ZK_COMPACTION_SUFFIX_PATH;
public static String compactionLockName(Long flinkJobId, String alias) {
return "compaction_lock_" + flinkJobId + "_" + alias;
}
public static String compactionLauncherLockPath(Long flinkJobId, String alias) {
return ZK_COMPACTION_LAUNCHER_LOCK_PATH + "/" + compactionLockName(flinkJobId, alias);
}
public static String compactionRunningLockPath(Long flinkJobId, String alias) {
return ZK_COMPACTION_RUNNING_LOCK_PATH + "/" + compactionLockName(flinkJobId, alias);
}
}

View File

@@ -0,0 +1,127 @@
package com.lanyuanxiaoyao.service.common.utils;
import com.lanyuanxiaoyao.service.common.Constants;
import com.lanyuanxiaoyao.service.common.entity.Record;
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
import java.util.*;
import java.util.function.Function;
import java.util.stream.Collectors;
/**
* Record 工具类
*
* @author ZhangJiacheng
* @version 0.0.1
* @date 2021-12-03
*/
public class RecordHelper {
public static Boolean isNotVersionUpdateRecord(String record) {
return !isVersionUpdateRecord(record);
}
public static Boolean isVersionUpdateRecord(String record) {
return record.contains(Constants.VERSION_UPDATE_KEY);
}
public static Boolean isNotVersionUpdateRecord(Record record) {
return !isVersionUpdateRecord(record);
}
public static Boolean isVersionUpdateRecord(Record record) {
// Record{source=Source{sourceId='versionUpdate', sourceType='null', sourcePos='null', currentTs='2022-11-15 22:17:44'}, statement=Statement{schema='crm_ivpn_cust', table='customer', opStatement='null', opType='version', op='null', opTs='2022-11-15 00:17:43', version='20220925', before=null, after=null}}
return Constants.VERSION_UPDATE_KEY.equals(record.getSource().getSourceId())
&& Constants.VERSION_KEY.equals(record.getStatement().getOpType());
}
/**
* 获取当前的 Statement, 即 source 和 target 选不为空的操作
*
* @param record 消息
* @return 更新的字段内容
*/
public static Map<String, Object> getCurrentStatement(Record record) {
Map<String, Object> before = record.getStatement().getBefore();
Map<String, Object> after = record.getStatement().getAfter();
return (after == null ? before : after.isEmpty() ? before : after);
}
public static Optional<Map<String, Object>> getBeforeStatement(Record record) {
return Optional.ofNullable(record.getStatement().getBefore());
}
public static Optional<Map<String, Object>> getAfterStatement(Record record) {
return Optional.ofNullable(record.getStatement().getAfter());
}
private static Boolean isMapEmpty(Map<?, ?> map) {
return map == null || map.isEmpty();
}
public static Boolean isChangeField(TableMeta meta, Record record, Function<TableMeta, Optional<String>> fieldGetter) {
Map<String, Object> before = record.getStatement().getBefore();
Map<String, Object> after = record.getStatement().getAfter();
Optional<String> field = fieldGetter.apply(meta);
if (isMapEmpty(before) || isMapEmpty(after) || !field.isPresent()) {
return false;
}
Object beforeField = before.getOrDefault(field.get(), null);
Object afterField = after.getOrDefault(field.get(), null);
if (beforeField == null || afterField == null) {
return false;
}
return !Objects.equals(String.valueOf(beforeField), String.valueOf(afterField));
}
public static Map<String, Object> addExtraMetadata(Map<String, Object> current, TableMeta tableMeta, Record record) {
String operationType = record.getStatement().getOpType();
return addExtraMetadata(current, tableMeta, record, Constants.DELETE.equals(operationType));
}
public static Map<String, Object> addExtraMetadata(Map<String, Object> current, TableMeta tableMeta, Record record, Boolean isDelete) {
Map<String, Object> newMap = new HashMap<>(current);
newMap.put(Constants.UNION_KEY_NAME, RecordHelper.createUnionKey(tableMeta, current));
newMap.put(Constants.UPDATE_TIMESTAMP_KEY_NAME, SnowFlakeHelper.next());
newMap.put(Constants.LATEST_OPERATION_TIMESTAMP_KEY_NAME, record.getStatement().getOpTs());
newMap.put(Constants.HUDI_DELETE_KEY_NAME, isDelete);
return newMap;
}
public static String createUnionKey(TableMeta tableMeta, Record record) {
return createUnionKey(tableMeta, getCurrentStatement(record));
}
private static final String PRIMARY_KEY_NOT_FOUND = "Primary Key Not Found";
/**
* 生成基于主键和分片键的联合主键
*
* @param tableMeta 表信息
* @param fields 字段
* @return 联合主键值
*/
public static String createUnionKey(TableMeta tableMeta, Map<String, Object> fields) {
if (tableMeta.getPrimaryKeys().isEmpty()) {
throw new RuntimeException(PRIMARY_KEY_NOT_FOUND);
}
if (Objects.isNull(fields)) {
throw new RuntimeException("Fields cannot be null");
}
List<String> primaryKeys = tableMeta.getPrimaryKeys()
.stream()
.map(key -> Optional.ofNullable(fields.get(Constants.FIELD_COVERT.apply(tableMeta, key.getName())))
.orElseThrow(() -> new RuntimeException(PRIMARY_KEY_NOT_FOUND + " " + fields))
.toString())
.collect(Collectors.toList());
String primaryKey = String.join("-", primaryKeys);
if (tableMeta.getPartitionKeys().isEmpty()) {
return primaryKey;
} else {
List<String> partitionKeys = tableMeta.getPartitionKeys()
.stream()
.map(key -> fields.get(Constants.FIELD_COVERT.apply(tableMeta, key.getName())).toString())
.collect(Collectors.toList());
String partitionKey = String.join("-", partitionKeys);
return primaryKey + "_" + partitionKey;
}
}
}

View File

@@ -0,0 +1,72 @@
package com.lanyuanxiaoyao.service.common.utils;
/**
* 雪花算(pi)法(jiu)
*
* @author ZhangJiacheng
* @date 2020-06-05
*/
public class SnowFlakeHelper {
/**
* 起始的时间戳
*/
private final static long START_TIMESTAMP = 1;
/**
* 序列号占用的位数
*/
private final static long SEQUENCE_BIT = 11;
/**
* 序列号最大值
*/
private final static long MAX_SEQUENCE_BIT = ~(-1 << SEQUENCE_BIT);
/**
* 时间戳值向左位移
*/
private final static long TIMESTAMP_OFFSET = SEQUENCE_BIT;
/**
* 序列号
*/
private static long sequence = 0;
/**
* 上一次时间戳
*/
private static long lastTimestamp = -1;
public static synchronized long next() {
long currentTimestamp = nowTimestamp();
if (currentTimestamp < lastTimestamp) {
throw new RuntimeException("Clock have moved backwards.");
}
if (currentTimestamp == lastTimestamp) {
// 相同毫秒内, 序列号自增
sequence = (sequence + 1) & MAX_SEQUENCE_BIT;
// 同一毫秒的序列数已经达到最大
if (sequence == 0) {
currentTimestamp = nextTimestamp();
}
} else {
// 不同毫秒内, 序列号置为0
sequence = 0;
}
lastTimestamp = currentTimestamp;
return (currentTimestamp - START_TIMESTAMP) << TIMESTAMP_OFFSET | sequence;
}
private static long nextTimestamp() {
long milli = nowTimestamp();
while (milli <= lastTimestamp) {
milli = nowTimestamp();
}
return milli;
}
private static long nowTimestamp() {
return System.currentTimeMillis();
}
}

View File

@@ -0,0 +1,82 @@
package com.lanyuanxiaoyao.service.common.utils;
import com.lanyuanxiaoyao.service.common.entity.SyncState;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.List;
import java.util.function.Function;
/**
* @author ZhangJiacheng
* @date 2023-04-24
*/
public class SyncStateHelper {
public static String allSyncStateSql(String database) {
// language=MySQL
return "select info.flink_job_id,\n" +
" info.alias,\n" +
" state.message_id,\n" +
" state.source_start_time,\n" +
" state.source_checkpoint_time,\n" +
" state.source_publish_time,\n" +
" state.source_op_time,\n" +
" state.compaction_start_time,\n" +
" state.compaction_finish_time,\n" +
" state.compaction_application_id,\n" +
" state.compaction_status,\n" +
" state.compaction_status_time,\n" +
" state.compaction_latest_op_ts\n" +
"from " + database + ".tb_app_hudi_sync_state state,\n" +
" " + database + ".tb_app_collect_table_info info\n" +
"where state.id = concat(info.flink_job_id, '-', info.alias)\n" +
" and info.status = 'y'";
}
public static String syncStateSql(String database) {
// language=MySQL
return "select info.flink_job_id,\n" +
" info.alias,\n" +
" state.message_id,\n" +
" state.source_start_time,\n" +
" state.source_checkpoint_time,\n" +
" state.source_publish_time,\n" +
" state.source_op_time,\n" +
" state.compaction_start_time,\n" +
" state.compaction_finish_time,\n" +
" state.compaction_application_id,\n" +
" state.compaction_status,\n" +
" state.compaction_status_time,\n" +
" state.compaction_latest_op_ts\n" +
"from " + database + ".tb_app_hudi_sync_state state,\n" +
" " + database + ".tb_app_collect_table_info info\n" +
"where state.id = concat(info.flink_job_id, '-', info.alias)\n" +
" and info.flink_job_id = ?\n" +
" and info.alias = ?\n" +
" and info.status = 'y'";
}
public static List<SyncState> from(ResultSet rs) throws SQLException {
List<SyncState> results = new ArrayList<>();
Function<Timestamp, Long> dateConvertor = timestamp -> timestamp == null ? 0 : timestamp.getTime();
while (rs.next()) {
results.add(SyncState.builder()
.flinkJobId(rs.getLong(1))
.alias(rs.getString(2))
.messageId(rs.getString(3))
.sourceStartTime(dateConvertor.apply(rs.getTimestamp(4)))
.sourceCheckpointTime(dateConvertor.apply(rs.getTimestamp(5)))
.sourcePublishTime(dateConvertor.apply(rs.getTimestamp(6)))
.sourceOperationTime(dateConvertor.apply(rs.getTimestamp(7)))
.compactionStartTime(dateConvertor.apply(rs.getTimestamp(8)))
.compactionFinishTime(dateConvertor.apply(rs.getTimestamp(9)))
.compactionApplicationId(rs.getString(10))
.compactionStatus(rs.getString(11))
.compactionStatusTime(dateConvertor.apply(rs.getTimestamp(12)))
.compactionLatestOperationTime(dateConvertor.apply(rs.getTimestamp(13)))
.build());
}
return results;
}
}

View File

@@ -0,0 +1,576 @@
package com.lanyuanxiaoyao.service.common.utils;
import cn.hutool.json.JSONObject;
import cn.hutool.json.JSONUtil;
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
import com.lanyuanxiaoyao.service.common.exception.ConfigException;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.*;
import java.util.stream.Collectors;
/**
* Table Meta 工具类
*
* @author ZhangJiacheng
* @version 0.0.1
* @date 2021-12-01
*/
public class TableMetaHelper {
//private static final AES AES = new AES(Mode.CBC, Padding.NoPadding, "6fa22c779ec14b98".getBytes(), "6fa22c779ec14b98".getBytes());
public static String tableMetaSql(String database) {
return tableMetaSql(database, true, false);
}
/*
SqlBuilder.select(
DataSource.DS_NAME,
DataSource.SCHEMA_NAME,
DataSourceTable.TABLE_NAME,
DataSourceTable.TABLE_TYPE,
DataSourceTableField.FIELD_NAME,
DataSourceTableField.FIELD_SEQ,
DataSourceTableField.FIELD_TYPE,
DataSourceTableField.PRIMARY_KEY,
DataSourceTableField.PARTITION_KEY,
DataSourceTableField.LENGTH,
TbAppCollectTableInfo.TGT_DB,
TbAppCollectTableInfo.TGT_TABLE,
TbAppCollectTableInfo.TGT_TABLE_TYPE,
TbAppCollectTableInfo.TGT_HDFS_PATH,
TbAppHudiJobConfig.WRITE_TASKS,
TbAppHudiJobConfig.WRITE_OPERATION,
TbAppHudiJobConfig.WRITE_TASK_MAX_MEMORY,
TbAppHudiJobConfig.WRITE_BATCH_SIZE,
TbAppHudiJobConfig.WRITE_RATE_LIMIT,
TbAppCollectTableInfo.BUCKET_NUMBER,
TbAppHudiJobConfig.COMPACTION_STRATEGY,
TbAppHudiJobConfig.COMPACTION_TASKS,
TbAppHudiJobConfig.COMPACTION_DELTA_COMMITS,
TbAppHudiJobConfig.COMPACTION_DELTA_SECONDS,
TbAppHudiJobConfig.COMPACTION_ASYNC_ENABLED,
TbAppHudiJobConfig.COMPACTION_MAX_MEMORY,
TbAppHudiJobConfig.CONFIGS,
TbAppCollectTableInfo.FILTER_FIELD,
TbAppCollectTableInfo.FILTER_VALUES,
TbAppCollectTableInfo.FILTER_TYPE,
TbAppCollectTableInfo.SRC_TOPIC,
TbAppCollectTableInfo.SRC_PULSAR_ADDR,
Alias.of(TbAppYarnJobConfigSync.JOB_MANAGER_MEMORY, "sync_job_manager_memory"),
Alias.of(TbAppYarnJobConfigSync.TASK_MANAGER_MEMORY, "sync_task_manager_memory"),
Alias.of(TbAppYarnJobConfigCompaction.JOB_MANAGER_MEMORY, "compaction_job_manager_memory"),
Alias.of(TbAppYarnJobConfigCompaction.TASK_MANAGER_MEMORY, "compaction_task_manger_momory"),
TbAppCollectTableInfo.PARTITION_FIELD,
TbAppHudiSyncState.MESSAGE_ID,
TbAppGlobalConfig.METRIC_PUBLISH_URL,
TbAppGlobalConfig.METRIC_PROMETHEUS_URL,
TbAppGlobalConfig.METRIC_API_URL,
TbAppGlobalConfig.METRIC_PUBLISH_DELAY,
TbAppGlobalConfig.METRIC_PUBLISH_PERIOD,
TbAppGlobalConfig.METRIC_PUBLISH_TIMEOUT,
TbAppGlobalConfig.METRIC_PUBLISH_BATCH,
Alias.of(TbAppFlinkJobConfig.ID, "job_id"),
Alias.of(TbAppFlinkJobConfig.NAME, "job_name"),
TbAppGlobalConfig.CHECKPOINT_ROOT_PATH,
TbAppHudiJobConfig.SOURCE_TASKS,
TbAppCollectTableInfo.ALIAS,
DataSource.CONNECTION,
TbAppCollectTableInfo.PRIORITY,
DataSource.DS_TYPE,
TbAppHudiJobConfig.KEEP_FILE_VERSION,
TbAppHudiJobConfig.KEEP_COMMIT_VERSION,
TbAppCollectTableInfo.TAGS,
TbAppGlobalConfig.ZK_URL
)
.from(
DataSource._alias_,
DataSourceTable._alias_,
DataSourceTableField._alias_,
TbAppFlinkJobConfig._alias_,
TbAppHudiJobConfig._alias_,
TbAppYarnJobConfigSync._alias_,
TbAppYarnJobConfigCompaction._alias_,
TbAppGlobalConfig._alias_,
TbAppCollectTableInfo._alias_
)
.leftJoin(TbAppHudiSyncState._alias_)
.onEq(TbAppHudiSyncState.ID, Column.as(StrUtil.format("concat({}, '-', {})", TbAppCollectTableInfo.FLINK_JOB_ID, TbAppCollectTableInfo.ALIAS)))
.whereEq(DataSource.DS_ROLE, "src")
.andEq(DataSource.DS_STATE, "y")
.andEq(DataSource.RECORD_STATE, "y")
.andEq(DataSourceTable.DS_ID, Column.as(DataSource.DS_ID))
.andEq(DataSourceTable.RECORD_STATE, "y")
.andEq(DataSourceTableField.TABLE_ID, Column.as(DataSourceTable.TABLE_ID))
.andEq(DataSourceTableField.RECORD_STATE, "y")
.andIn(DataSource.DS_TYPE, "udal", "telepg")
.andEq(DataSource.DS_NAME, Column.as(TbAppCollectTableInfo.SRC_DB))
.andEq(DataSource.SCHEMA_NAME, Column.as(TbAppCollectTableInfo.SRC_SCHEMA))
.andEq(DataSourceTable.TABLE_NAME, Column.as(TbAppCollectTableInfo.SRC_TABLE))
.andEq(TbAppCollectTableInfo.FLINK_JOB_ID, Column.as(TbAppFlinkJobConfig.ID))
.andEq(TbAppCollectTableInfo.HUDI_JOB_ID, Column.as(TbAppHudiJobConfig.ID))
.andEq(TbAppCollectTableInfo.SYNC_YARN_JOB_ID, Column.as(TbAppYarnJobConfigSync.ID))
.andEq(TbAppCollectTableInfo.COMPACTION_YARN_JOB_ID, Column.as(TbAppYarnJobConfigCompaction.ID))
.andEq(TbAppCollectTableInfo.CONFIG_ID, Column.as(TbAppGlobalConfig.ID))
.andEq(TbAppFlinkJobConfig.ID, 1542097984132706304L)
.andEq(TbAppCollectTableInfo.ALIAS, "crm_cfguse_channel")
.andEq(TbAppCollectTableInfo.STATUS, "y")
.andEq(TbAppFlinkJobConfig.STATUS, "y")
.andEq(TbAppHudiJobConfig.STATUS, "y")
.andEq(TbAppYarnJobConfigSync.STATUS, "y")
.andEq(TbAppYarnJobConfigCompaction.STATUS, "y")
.orderBy(DataSourceTableField.FIELD_SEQ)
.build()
*/
public static String tableMetaSql(String database, Boolean filterByFlinkJobId, Boolean filterByAlias) {
// language=MySQL
return "select dst.ds_name,\n" +
" dst.schema_name,\n" +
" dst.table_name,\n" +
" dst.table_type,\n" +
" dstf.field_name,\n" +
" dstf.field_seq,\n" +
" dstf.field_type,\n" +
" dstf.primary_key,\n" +
" dstf.partition_key,\n" +
" dstf.length,\n" +
" tacti.tgt_db,\n" +
" tacti.tgt_table,\n" +
" tacti.tgt_table_type,\n" +
" tacti.tgt_hdfs_path,\n" +
" tajhc.write_tasks,\n" +
" tajhc.write_operation,\n" +
" tajhc.write_task_max_memory,\n" +
" tajhc.write_batch_size,\n" +
" tajhc.write_rate_limit,\n" +
" tacti.bucket_number,\n" +
" tajhc.compaction_strategy,\n" +
" tajhc.compaction_tasks,\n" +
" tajhc.compaction_delta_commits,\n" +
" tajhc.compaction_delta_seconds,\n" +
" tajhc.compaction_async_enabled,\n" +
" tajhc.compaction_max_memory,\n" +
" tajhc.configs,\n" +
" tacti.filter_field,\n" +
" tacti.filter_values,\n" +
" tacti.filter_type,\n" +
" tacti.src_topic,\n" +
" tacti.src_pulsar_addr,\n" +
" tayjc_sync.job_manager_memory as sync_job_manager_memory,\n" +
" tayjc_sync.task_manager_memory as sync_task_manager_memory,\n" +
" tayjc_compaction.job_manager_memory as compaction_job_manager_memory,\n" +
" tayjc_compaction.task_manager_memory as compaction_task_manger_momory,\n" +
" tacti.partition_field,\n" +
" tahss.message_id,\n" +
" tagc.metric_publish_url,\n" +
" tagc.metric_prometheus_url,\n" +
" tagc.metric_api_url,\n" +
" tagc.metric_publish_delay,\n" +
" tagc.metric_publish_period,\n" +
" tagc.metric_publish_timeout,\n" +
" tagc.metric_publish_batch,\n" +
" tafjc.id as job_id,\n" +
" tafjc.name as job_name,\n" +
" tagc.checkpoint_root_path,\n" +
" tajhc.source_tasks,\n" +
" tacti.alias,\n" +
" dst.connection,\n" +
" tacti.priority,\n" +
" dst.ds_type,\n" +
" tajhc.keep_file_version,\n" +
" tajhc.keep_commit_version,\n" +
" tacti.tags,\n" +
" tagc.zk_url,\n" +
" tacti.version,\n" +
" dstf.scale\n" +
"from `" + database + "`.tb_app_collect_table_info tacti\n" +
" left join `" + database + "`.tb_app_hudi_sync_state tahss\n" +
" on tahss.id = concat(tacti.flink_job_id, '-', tacti.alias),\n" +
" `" + database + "`.tb_app_flink_job_config tafjc,\n" +
" `" + database + "`.tb_app_hudi_job_config tajhc,\n" +
" `" + database + "`.tb_app_yarn_job_config tayjc_sync,\n" +
" `" + database + "`.tb_app_yarn_job_config tayjc_compaction,\n" +
" `" + database + "`.tb_app_global_config tagc,\n" +
" `" + database + "`.tb_app_hudi_compaction_schedule tahcs,\n" +
" `iap-datahub`.data_source_table_field dstf,\n" +
" (select ds.*, dst.table_id, dst.table_name, dst.table_type\n" +
" from `iap-datahub`.data_source_table dst,\n" +
" (select ds.ds_id, ds.ds_name, ds.ds_type, ds.schema_name, ds.connection\n" +
" from `iap-datahub`.data_source ds\n" +
" where ds.ds_role = 'src'\n" +
" and ds.ds_state = 'y'\n" +
" and ds.record_state = 'y') ds\n" +
" where dst.ds_id = ds.ds_id\n" +
" and dst.record_state = 'y') dst\n" +
"where dstf.table_id = dst.table_id\n" +
" and dstf.record_state = 'y'\n" +
" and dst.ds_type in ('udal', 'telepg')\n" +
" and dst.ds_name = tacti.src_db\n" +
" and dst.schema_name = tacti.src_schema\n" +
" and dst.table_name = tacti.src_table\n" +
" and tacti.flink_job_id = tafjc.id\n" +
" and tacti.hudi_job_id = tajhc.id\n" +
" and tacti.sync_yarn_job_id = tayjc_sync.id\n" +
" and tacti.compaction_yarn_job_id = tayjc_compaction.id\n" +
" and tacti.config_id = tagc.id\n" +
" and tacti.schedule_id = tahcs.id\n" +
(filterByFlinkJobId ? " and tafjc.id = ?\n" : "") +
(filterByAlias ? " and tacti.alias = ?\n" : "") +
" and tacti.status = 'y'\n" +
" and tafjc.status = 'y'\n" +
" and tajhc.status = 'y'\n" +
" and tayjc_sync.status = 'y'\n" +
" and tayjc_compaction.status = 'y'\n" +
"order by dstf.field_seq;";
}
public static List<TableMeta> from(ResultSet rs) throws SQLException {
List<TableMeta> results = new ArrayList<>();
List<TableMeta.RowMeta> metaList = new ArrayList<>();
while (rs.next( )) {
metaList.add(
TableMeta.RowMeta.builder()
.dsName(rs.getString(1))
.schemaName(rs.getString(2))
.tableName(rs.getString(3))
.tableType(rs.getString(4))
.fieldName(rs.getString(5))
.fieldSeq(rs.getInt(6))
.fieldType(rs.getString(7))
.primaryKey(rs.getString(8))
.partitionKey(rs.getString(9))
.length(rs.getLong(10))
.tgtDb(rs.getString(11))
.tgtTable(rs.getString(12))
.tgtTableType(rs.getString(13))
.tgtHdfsPath(rs.getString(14))
.writeTasks(rs.getInt(15))
.writeOperation(rs.getString(16))
.writeTaskMaxMemory(rs.getInt(17))
.writeBatchSize(rs.getInt(18))
.writeRateLimit(rs.getInt(19))
.bucketIndexNumber(rs.getInt(20))
.compactionStrategy(rs.getString(21))
.compactionTasks(rs.getInt(22))
.compactionDeltaCommits(rs.getInt(23))
.compactionDeltaSeconds(rs.getInt(24))
.compactionAsyncEnabled(rs.getString(25))
.compactionMaxMemory(rs.getInt(26))
.configs(rs.getString(27))
.filterField(rs.getString(28))
.filterValues(rs.getString(29))
.filterType(rs.getString(30))
.topic(rs.getString(31))
.pulsarAddress(rs.getString(32))
.syncJobManagerMemory(rs.getInt(33))
.syncTaskManagerMemory(rs.getInt(34))
.compactionJobManagerMemory(rs.getInt(35))
.compactionTaskManagerMemory(rs.getInt(36))
.partitionField(rs.getString(37))
.messageId(rs.getString(38))
.metricPublishUrl(rs.getString(39))
.metricPrometheusUrl(rs.getString(40))
.metricApiUrl(rs.getString(41))
.metricPublishDelay(rs.getInt(42))
.metricPublishPeriod(rs.getInt(43))
.metricPublishTimeout(rs.getInt(44))
.metricPublishBatch(rs.getInt(45))
.jobId(rs.getLong(46))
.jobName(rs.getString(47))
.checkpointRootPath(rs.getString(48))
.sourceTasks(rs.getInt(49))
.alias(rs.getString(50))
.connection(rs.getString(51))
.priority(rs.getInt(52))
.sourceType(rs.getString(53))
.keepFileVersion(rs.getInt(54))
.keepCommitVersion(rs.getInt(55))
.tags(rs.getString(56))
.zookeeperUrl(rs.getString(57))
.version(rs.getInt(58))
.scala(rs.getInt(59))
.build()
);
}
metaList.stream()
.collect(Collectors.groupingBy(TableMeta.RowMeta::getAlias))
.values()
.stream()
.flatMap(schemaRowMetas -> schemaRowMetas
.stream()
.collect(Collectors.groupingBy(TableMeta.RowMeta::getTableName))
.values()
.stream()
.map(tableRowMetas -> {
try {
return fromRowMetas(tableRowMetas);
} catch (Exception e) {
throw new RuntimeException(e);
}
}))
.forEach(results::add);
return results;
}
private static void checkMoreThanOne(String fieldName, Collection<?> collection) throws ConfigException {
ConfigException.check(fieldName + " cannot be more than 1", () -> collection.size() > 1);
}
private static void checkEmpty(String fieldName, Collection<?> collection) throws ConfigException {
ConfigException.check(fieldName + " cannot be empty", collection::isEmpty);
}
private static void checkEmptyOrMoreThanOne(String fieldName, Collection<?> collection) throws ConfigException {
checkEmpty(fieldName, collection);
checkMoreThanOne(fieldName, collection);
}
public static TableMeta fromRowMetas(List<TableMeta.RowMeta> metaList) throws Exception {
List<String> aliasList = metaList.stream()
.map(TableMeta.RowMeta::getAlias)
.distinct()
.collect(Collectors.toList());
checkEmptyOrMoreThanOne("alias", aliasList);
String alias = aliasList.get(0);
List<String> sourceTypeList = metaList.stream()
.map(TableMeta.RowMeta::getSourceType)
.distinct()
.collect(Collectors.toList());
checkEmptyOrMoreThanOne("source_type", sourceTypeList);
String sourceTypeText = sourceTypeList.get(0).toUpperCase();
TableMeta.SourceType sourceType;
try {
sourceType = TableMeta.SourceType.valueOf(sourceTypeText);
} catch (IllegalArgumentException e) {
throw new Exception("Cannot parse source type " + sourceTypeText);
}
List<String> dsNames = metaList.stream()
.map(TableMeta.RowMeta::getDsName)
.distinct()
.collect(Collectors.toList());
checkEmptyOrMoreThanOne("ds_name", dsNames);
String dataSource = dsNames.get(0);
List<String> schemaNames = metaList.stream()
.map(TableMeta.RowMeta::getSchemaName)
.distinct()
.collect(Collectors.toList());
checkEmptyOrMoreThanOne("schema_name", schemaNames);
String schema = schemaNames.get(0);
List<String> tableNames = metaList.stream()
.map(TableMeta.RowMeta::getTableName)
.distinct()
.collect(Collectors.toList());
// 每次只能获取 1 张表的元信息
checkMoreThanOne("table_name", tableNames);
checkEmpty("table_name", tableNames);
String table = tableNames.get(0);
List<String> tableTypes = metaList.stream()
.map(TableMeta.RowMeta::getTableType)
.distinct()
.collect(Collectors.toList());
checkEmptyOrMoreThanOne("table_type", tableTypes);
String type = tableTypes.get(0);
List<String> filterFields = metaList.stream()
.map(TableMeta.RowMeta::getFilterField)
.distinct()
.collect(Collectors.toList());
checkEmptyOrMoreThanOne("filter_field", filterFields);
String filterField = filterFields.get(0);
List<String> filterValueList = metaList.stream()
.map(TableMeta.RowMeta::getFilterValues)
.distinct()
.collect(Collectors.toList());
checkEmptyOrMoreThanOne("filter_values", filterValueList);
String filterValuesText = filterValueList.get(0);
List<String> filterValues = (filterValuesText == null || filterValuesText.isEmpty())
? Collections.emptyList()
: Arrays.asList(filterValuesText.split(","));
List<String> filterTypes = metaList.stream()
.map(TableMeta.RowMeta::getFilterType)
.distinct()
.collect(Collectors.toList());
checkEmptyOrMoreThanOne("filter_field", filterFields);
TableMeta.FilterType filterType;
try {
filterType = TableMeta.FilterType.valueOf(filterTypes.get(0));
} catch (IllegalArgumentException e) {
filterType = TableMeta.FilterType.NONE;
}
List<String> topics = metaList.stream()
.map(TableMeta.RowMeta::getTopic)
.distinct()
.collect(Collectors.toList());
checkEmptyOrMoreThanOne("topic", topics);
String topic = topics.get(0);
List<String> pulsarAddresses = metaList.stream()
.map(TableMeta.RowMeta::getPulsarAddress)
.distinct()
.collect(Collectors.toList());
checkEmptyOrMoreThanOne("pulsar address", pulsarAddresses);
String pulsarAddress = pulsarAddresses.get(0);
List<Integer> priorities = metaList.stream()
.map(TableMeta.RowMeta::getPriority)
.distinct()
.collect(Collectors.toList());
checkEmptyOrMoreThanOne("priority", priorities);
Integer priority = priorities.get(0);
List<String> tagTexts = metaList.stream()
.map(TableMeta.RowMeta::getTags)
.distinct()
.collect(Collectors.toList());
checkEmptyOrMoreThanOne("tags", tagTexts);
String tagText = tagTexts.get(0) == null ? "" : tagTexts.get(0);
List<String> tags = Arrays.asList(tagText.split(","));
List<Integer> versions = metaList.stream()
.map(TableMeta.RowMeta::getVersion)
.distinct()
.collect(Collectors.toList());
checkEmptyOrMoreThanOne("version", versions);
Integer version = versions.get(0);
// 获取 Hudi 配置, 因为查出来同一张表的配置都相同, 所以直接取第一条即可
TableMeta.RowMeta example = metaList.get(0);
TableMeta.HudiMeta hudiMeta = TableMeta.HudiMeta.builder()
.targetDataSource(example.getTgtDb())
.targetTable(example.getTgtTable())
.targetTableType(example.getTgtTableType())
.targetHdfsPath(example.getTgtHdfsPath())
.sourceTasks(example.getSourceTasks())
.writeTasks(example.getWriteTasks())
.writeOperation(example.getWriteOperation())
.writeTaskMaxMemory(example.getWriteTaskMaxMemory())
.writeBatchSize(example.getWriteBatchSize())
.writeRateLimit(example.getWriteRateLimit())
.bucketIndexNumber(example.getBucketIndexNumber())
.compactionStrategy(example.getCompactionStrategy())
.compactionTasks(example.getCompactionTasks())
.compactionDeltaCommits(example.getCompactionDeltaCommits())
.compactionDeltaSeconds(example.getCompactionDeltaSeconds())
.compactionAsyncEnabled(example.getCompactionAsyncEnabled())
.compactionMaxMemory(example.getCompactionMaxMemory())
.configs(example.getConfigs())
.keepFileVersion(example.getKeepFileVersion())
.keepCommitVersion(example.getKeepCommitVersion())
.build();
TableMeta.YarnMeta syncYarnMeta = TableMeta.YarnMeta.builder()
.jobManagerMemory(example.getSyncJobManagerMemory())
.taskManagerMemory(example.getSyncTaskManagerMemory())
.build();
TableMeta.YarnMeta compactionYarnMeta = TableMeta.YarnMeta.builder()
.jobManagerMemory(example.getCompactionJobManagerMemory())
.taskManagerMemory(example.getCompactionTaskManagerMemory())
.build();
TableMeta.ConfigMeta configMeta = TableMeta.ConfigMeta.builder()
.messageId(example.getMessageId())
.metricPublishUrl(example.getMetricPublishUrl())
.metricPrometheusUrl(example.getMetricPrometheusUrl())
.metricApiUrl(example.getMetricApiUrl())
.metricPublishDelay(example.getMetricPublishDelay())
.metricPublishPeriod(example.getMetricPublishPeriod())
.metricPublishTimeout(example.getMetricPublishTimeout())
.metricPublishBatch(example.getMetricPublishBatch())
.checkpointRootPath(example.getCheckpointRootPath())
.zookeeperUrl(example.getZookeeperUrl())
.build();
TableMeta.JobMeta jobMeta = TableMeta.JobMeta.builder()
.id(example.getJobId())
.name(example.getJobName())
.build();
TableMeta.ConnectionMeta connectionMeta = null;
String connectionText = example.getConnection();
if (connectionText != null && !connectionText.isEmpty()) {
JSONObject connectionObj = JSONUtil.parseObj(connectionText);
connectionMeta = TableMeta.ConnectionMeta.builder()
.url(connectionObj.getStr("jdbc_url"))
.user(connectionObj.getStr("jdbc_user"))
.password(connectionObj.getStr("jdbc_password"))
.driver(connectionObj.getStr("jdbc_driver"))
.build();
}
List<String> partitionFields = metaList.stream()
.map(TableMeta.RowMeta::getPartitionField)
.distinct()
.collect(Collectors.toList());
checkEmptyOrMoreThanOne("partition_field", filterFields);
String partitionField = partitionFields.get(0);
List<TableMeta.FieldMeta> primaryKeys = new ArrayList<>(), partitionKeys = new ArrayList<>();
List<TableMeta.FieldMeta> fieldMetaList = new ArrayList<>(metaList.size());
for (TableMeta.RowMeta rowMeta : metaList) {
boolean isPrimaryKey = "y".equals(rowMeta.getPrimaryKey());
boolean isPartitionKey = "y".equals(rowMeta.getPartitionKey());
TableMeta.FieldMeta fieldMeta = TableMeta.FieldMeta.builder()
.name(rowMeta.getFieldName().toUpperCase(Locale.ROOT))
.sequence(rowMeta.getFieldSeq())
.type(rowMeta.getFieldType())
.isPrimaryKey(isPrimaryKey)
.partitionKey(isPartitionKey)
.length(rowMeta.getLength())
.scala(rowMeta.getScala())
.build();
if (isPrimaryKey) {
primaryKeys.add(fieldMeta);
}
if (isPartitionKey) {
partitionKeys.add(fieldMeta);
}
fieldMetaList.add(fieldMeta);
}
return TableMeta.builder()
.alias(alias)
.source(dataSource)
.schema(schema)
.table(table)
.type(type)
.primaryKeys(primaryKeys)
.partitionKeys(partitionKeys)
.hudi(hudiMeta)
.fields(fieldMetaList)
.filterField(filterField)
.filterValues(filterValues)
.filterType(filterType)
.topic(topic)
.pulsarAddress(pulsarAddress)
.syncYarn(syncYarnMeta)
.compactionYarn(compactionYarnMeta)
.partitionField(partitionField)
.config(configMeta)
.job(jobMeta)
.connection(connectionMeta)
.priority(priority)
.sourceType(sourceType)
.tags(tags)
.version(version)
.build();
}
public static Optional<String> getPartitionField(TableMeta meta) {
if (meta.getPartitionField() == null || "".equals(meta.getPartitionField())) {
return Optional.empty();
} else {
return meta.getFields()
.stream()
.map(TableMeta.FieldMeta::getName)
.filter(name -> meta.getPartitionField().equalsIgnoreCase(name))
.findFirst();
}
}
public static Optional<String> getFilterField(TableMeta meta) {
if (meta.getFilterField() == null || "".equals(meta.getFilterField())) {
return Optional.empty();
} else {
return meta.getFields()
.stream()
.map(TableMeta.FieldMeta::getName)
.filter(name -> meta.getFilterField().equalsIgnoreCase(name))
.findFirst();
}
}
public static boolean existsTag(TableMeta meta, String tag) {
return meta.getTags() != null && meta.getTags().contains(tag);
}
}

161
utils/executor/pom.xml Normal file
View File

@@ -0,0 +1,161 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<groupId>com.lanyuanxiaoyao</groupId>
<artifactId>hudi-service</artifactId>
<version>1.0.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>executor</artifactId>
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-core</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-runtime_${scala.major.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-runtime-web_${scala.major.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-optimizer_${scala.major.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_${scala.major.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_${scala.major.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_${scala.major.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-metrics-core</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-metrics-prometheus_${scala.major.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-container_${scala.major.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-yarn_${scala.major.version}</artifactId>
<version>${flink.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-hadoop-fs</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-uber_${scala.major.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-uber-blink_${scala.major.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-state-processor-api_${scala.major.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-shaded-hadoop-3-uber</artifactId>
<version>3.1.1.7.2.9.0-173-9.0</version>
</dependency>
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
</dependency>
<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-format</artifactId>
<version>2.4.0</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.3.0</version>
<configuration>
<createDependencyReducedPom>false</createDependencyReducedPom>
<promoteTransitiveDependencies>true</promoteTransitiveDependencies>
<transformers>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>com.eshore.odcp.hudi.connector.utils.executor.Runner</mainClass>
</transformer>
<transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
<resource>reference.conf</resource>
</transformer>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
</transformers>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
<exclude>log4j-surefire*.properties</exclude>
</excludes>
</filter>
</filters>
</configuration>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@@ -0,0 +1,43 @@
package com.lanyuanxiaoyao.service.executor;
import org.apache.flink.client.deployment.ClusterClientFactory;
import org.apache.flink.client.deployment.ClusterDescriptor;
import org.apache.flink.client.deployment.ClusterSpecification;
import org.apache.flink.client.deployment.DefaultClusterClientServiceLoader;
import org.apache.flink.client.deployment.application.ApplicationConfiguration;
import org.apache.flink.client.program.ClusterClient;
import org.apache.flink.client.program.ClusterClientProvider;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.runtime.security.SecurityConfiguration;
import org.apache.flink.runtime.security.SecurityUtils;
import org.apache.hadoop.yarn.api.records.ApplicationId;
/**
* 启动类 #
*
* @author ZhangJiacheng
* @date 2022-06-01
*/
public class Runner {
public static ApplicationId run(Configuration inputConfiguration, String className, String[] args) throws Exception {
Configuration configuration = new Configuration(inputConfiguration);
SecurityUtils.install(new SecurityConfiguration(configuration));
return SecurityUtils.getInstalledContext()
.runSecured(() -> {
DefaultClusterClientServiceLoader yarnServiceLoader = new DefaultClusterClientServiceLoader();
ApplicationConfiguration applicationConfiguration = new ApplicationConfiguration(args, className);
//ApplicationDeployer deployer = new ApplicationClusterDeployer(new DefaultClusterClientServiceLoader());
//deployer.run(configuration, applicationConfiguration);
final ClusterClientFactory<ApplicationId> clientFactory = yarnServiceLoader.getClusterClientFactory(configuration);
try (final ClusterDescriptor<ApplicationId> clusterDescriptor = clientFactory.createClusterDescriptor(configuration)) {
final ClusterSpecification clusterSpecification = clientFactory.getClusterSpecification(configuration);
ClusterClientProvider<ApplicationId> provider = clusterDescriptor.deployApplicationCluster(clusterSpecification, applicationConfiguration);
ClusterClient<ApplicationId> clusterClient = provider.getClusterClient();
if (clusterClient == null) {
return null;
}
return clusterClient.getClusterId();
}
});
}
}

View File

@@ -0,0 +1,48 @@
package com.lanyuanxiaoyao.service.executor.metrics;
import org.apache.flink.annotation.docs.Documentation;
import org.apache.flink.configuration.ConfigOption;
import static org.apache.flink.configuration.ConfigOptions.key;
/**
* Config options for the {@link VictoriaMetricsReporter}.
*/
@Documentation.SuffixOption
public class VictoriaMetricsOptions {
public static final ConfigOption<String> ENDPOINT =
key("endpoint")
.stringType()
.noDefaultValue()
.withDescription("Victoria metrics endpoint. eg: http://localhost:8428/api/v1/import/prometheus");
public static final ConfigOption<Integer> TIMEOUT =
key("timeout")
.intType()
.defaultValue(60000)
.withDescription("Http push timeout. Default 1 minute");
public static final ConfigOption<String> TAGS =
key("tags")
.stringType()
.defaultValue("")
.withDescription("Extra tags for every metric");
public static final ConfigOption<Boolean> ENABLE_AUTH =
key("enable.auth")
.booleanType()
.defaultValue(false)
.withDescription("Enable metric server http basic auth");
public static final ConfigOption<String> AUTH_USERNAME =
key("auth.username")
.stringType()
.defaultValue("")
.withDescription("Basic auth username");
public static final ConfigOption<String> AUTH_PASSWORD =
key("auth.password")
.stringType()
.defaultValue("")
.withDescription("Basic auth password");
}

View File

@@ -0,0 +1,64 @@
package com.lanyuanxiaoyao.service.executor.metrics;
import cn.hutool.core.util.StrUtil;
import cn.hutool.http.HttpRequest;
import cn.hutool.http.HttpResponse;
import cn.hutool.http.HttpUtil;
import io.prometheus.client.CollectorRegistry;
import io.prometheus.client.exporter.common.TextFormat;
import java.io.IOException;
import java.io.StringWriter;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.flink.metrics.prometheus.AbstractPrometheusReporter;
import org.apache.flink.metrics.reporter.Scheduled;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @author ZhangJiacheng
* @date 2022-06-22
*/
public class VictoriaMetricsReporter extends AbstractPrometheusReporter implements Scheduled {
private static final Logger logger = LoggerFactory.getLogger(VictoriaMetricsReporter.class);
private final String endpoint;
private final Integer timout;
private final Map<String, String> tags;
private final Boolean enableBasicAuth;
private final String basicAuthUsername;
private final String basicAuthPassword;
public VictoriaMetricsReporter(String endpoint, Integer timout, Map<String, String> tags, Boolean enableBasicAuth, String basicAuthUsername, String basicAuthPassword) {
this.endpoint = endpoint;
this.timout = timout;
this.tags = tags;
this.enableBasicAuth = enableBasicAuth;
this.basicAuthUsername = basicAuthUsername;
this.basicAuthPassword = basicAuthPassword;
}
@Override
public void report() {
try (StringWriter writer = new StringWriter()) {
TextFormat.write004(writer, CollectorRegistry.defaultRegistry.metricFamilySamples());
String query = tags.entrySet()
.stream()
.map(entry -> StrUtil.format("extra_label={}={}", entry.getKey(), entry.getValue()))
.collect(Collectors.joining("&"));
HttpRequest request = HttpUtil.createPost(StrUtil.format("{}?{}", endpoint, query))
.body(writer.toString())
.timeout(timout);
if (enableBasicAuth) {
request.basicAuth(basicAuthUsername, basicAuthPassword);
}
HttpResponse response = request.execute();
if (!response.isOk()) {
logger.warn("Fail to push metrics: {}, {}, endpoint: {}, tags: {}", response.getStatus(), response.body(), endpoint, tags);
}
} catch (IOException e) {
logger.error("Fail to write metrics, endpoint: {}, tags: {}, exception: {}", endpoint, tags, e);
}
}
}

View File

@@ -0,0 +1,44 @@
package com.lanyuanxiaoyao.service.executor.metrics;
import cn.hutool.core.util.StrUtil;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import org.apache.flink.metrics.MetricConfig;
import org.apache.flink.metrics.reporter.InterceptInstantiationViaReflection;
import org.apache.flink.metrics.reporter.MetricReporterFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static com.lanyuanxiaoyao.service.executor.metrics.VictoriaMetricsOptions.*;
/**
* @author ZhangJiacheng
* @date 2022-06-22
*/
@InterceptInstantiationViaReflection(
reporterClassName = "com.eshore.odcp.hudi.connector.utils.executor.metrics.VictoriaMetricsReporter")
public class VictoriaMetricsReporterFactory implements MetricReporterFactory {
private static final Logger logger = LoggerFactory.getLogger(VictoriaMetricsReporterFactory.class);
@Override
public VictoriaMetricsReporter createMetricReporter(Properties properties) {
MetricConfig metricConfig = (MetricConfig) properties;
String endpoint = metricConfig.getString(ENDPOINT.key(), ENDPOINT.defaultValue());
int timeout = metricConfig.getInteger(TIMEOUT.key(), TIMEOUT.defaultValue());
String tagsText = metricConfig.getString(TAGS.key(), TAGS.defaultValue());
Boolean enableAuth = metricConfig.getBoolean(ENABLE_AUTH.key(), ENABLE_AUTH.defaultValue());
String authUsername = metricConfig.getString(AUTH_USERNAME.key(), AUTH_USERNAME.defaultValue());
String authPassword = metricConfig.getString(AUTH_PASSWORD.key(), AUTH_PASSWORD.defaultValue());
Map<String, String> tags = new HashMap<>(10);
if (StrUtil.isNotBlank(tagsText)) {
for (String item : tagsText.split(";")) {
String[] parsed = item.split("=");
tags.put(parsed[0], parsed[1]);
}
}
logger.info("Create victoria metric reporter for endpoint {} timeout: {}, tags: {}, enable_auth: {}, auth_username: {}, auth_password: {}", endpoint, timeout, tags, enableAuth, authUsername, authPassword);
return new VictoriaMetricsReporter(endpoint, timeout, tags, enableAuth, authUsername, authPassword);
}
}

View File

@@ -0,0 +1,54 @@
################################################################################
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################
# Allows this configuration to be modified at runtime. The file will be checked every 30 seconds.
monitorInterval=30
# This affects logging for both user code and Flink
rootLogger.level = INFO
rootLogger.appenderRef.file.ref = MainAppender
# Uncomment this if you want to _only_ change Flink's logging
#logger.flink.name = org.apache.flink
#logger.flink.level = INFO
# The following lines keep the log level of common libraries/connectors on
# log level INFO. The root logger does not override this. You have to manually
# change the log levels here.
logger.akka.name = akka
logger.akka.level = INFO
logger.kafka.name= org.apache.kafka
logger.kafka.level = INFO
logger.hadoop.name = org.apache.hadoop
logger.hadoop.level = INFO
logger.zookeeper.name = org.apache.zookeeper
logger.zookeeper.level = INFO
logger.shaded_zookeeper.name = org.apache.flink.shaded.zookeeper3
logger.shaded_zookeeper.level = INFO
logger.hudi.name=org.apache.hudi
logger.hudi.level=INFO
# Log all infos in the given file
appender.main.name = MainAppender
appender.main.type = Console
appender.main.layout.type = PatternLayout
appender.main.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n
# Suppress the irrelevant (wrong) warnings from the Netty channel handler
logger.netty.name = org.jboss.netty.channel.DefaultChannelPipeline
logger.netty.level = OFF

View File

@@ -0,0 +1 @@
log4j.rootLogger=INFO

View File

@@ -0,0 +1,16 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
org.apache.flink.yarn.YarnClusterClientFactory

View File

@@ -0,0 +1,17 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
org.apache.flink.yarn.executors.YarnJobClusterExecutorFactory
org.apache.flink.yarn.executors.YarnSessionClusterExecutorFactory

View File

@@ -0,0 +1 @@
com.eshore.odcp.hudi.connector.utils.executor.metrics.VictoriaMetricsReporterFactory

View File

@@ -0,0 +1,18 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
org.apache.flink.table.planner.delegation.BlinkPlannerFactory
org.apache.flink.table.planner.delegation.BlinkExecutorFactory
org.apache.flink.table.planner.delegation.DefaultParserFactory

158
utils/sync/pom.xml Normal file
View File

@@ -0,0 +1,158 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://maven.apache.org/POM/4.0.0"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<groupId>com.lanyuanxiaoyao</groupId>
<artifactId>hudi-service</artifactId>
<version>1.0.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>sync</artifactId>
<properties>
<parquet.version>1.10.1</parquet.version>
</properties>
<dependencies>
<dependency>
<groupId>com.lanyuanxiaoyao</groupId>
<artifactId>service-common</artifactId>
<version>1.0.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-flink${flink.major.version}-bundle</artifactId>
<version>${hudi.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_${scala.major.version}</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_${scala.major.version}</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-common</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-runtime-blink_${scala.major.version}</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner-blink_${scala.major.version}</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.pulsar</groupId>
<artifactId>pulsar-client</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-shaded-guava</artifactId>
<version>30.1.1-jre-15.0</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>3.1.2</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>dev.failsafe</groupId>
<artifactId>failsafe</artifactId>
<version>3.2.4</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>pl.tkowalcz.tjahzi</groupId>
<artifactId>logback-appender</artifactId>
<version>0.9.23</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>log4j-over-slf4j</artifactId>
<version>1.7.15</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>jcl-over-slf4j</artifactId>
<version>1.7.15</version>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter</artifactId>
<version>5.7.2</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.3.0</version>
<configuration>
<createDependencyReducedPom>false</createDependencyReducedPom>
<promoteTransitiveDependencies>true</promoteTransitiveDependencies>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
<resource>reference.conf</resource>
</transformer>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
</transformers>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
<exclude>log4j-surefire*.properties</exclude>
</excludes>
</filter>
</filters>
</configuration>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@@ -0,0 +1,263 @@
package com.lanyuanxiaoyao.service.sync;
import cn.hutool.core.util.ObjectUtil;
import cn.hutool.core.util.StrUtil;
import com.lanyuanxiaoyao.service.common.entity.FlinkJob;
import com.lanyuanxiaoyao.service.common.entity.RunMeta;
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
import com.lanyuanxiaoyao.service.common.utils.NameHelper;
import com.lanyuanxiaoyao.service.sync.configuration.GlobalConfiguration;
import com.lanyuanxiaoyao.service.sync.functions.CompactionEventHandler;
import com.lanyuanxiaoyao.service.sync.utils.*;
import java.io.IOException;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.operators.ProcessOperator;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.avro.model.HoodieCompactionPlan;
import org.apache.hudi.client.HoodieFlinkWriteClient;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.TableSchemaResolver;
import org.apache.hudi.common.table.log.HoodieLogFormat;
import org.apache.hudi.common.table.log.block.HoodieAvroDataBlock;
import org.apache.hudi.common.table.log.block.HoodieDeleteBlock;
import org.apache.hudi.common.table.log.block.HoodieLogBlock;
import org.apache.hudi.common.table.log.block.HoodieParquetDataBlock;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.util.ClosableIterator;
import org.apache.hudi.common.util.CompactionUtils;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.configuration.FlinkOptions;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.org.apache.avro.Schema;
import org.apache.hudi.org.apache.avro.generic.IndexedRecord;
import org.apache.hudi.sink.compact.*;
import org.apache.hudi.sink.compact.strategy.CompactionPlanStrategies;
import org.apache.hudi.sink.compact.strategy.CompactionPlanStrategy;
import org.apache.hudi.table.HoodieFlinkTable;
import org.apache.hudi.util.CompactionUtil;
import org.apache.hudi.util.StreamerUtil;
import org.apache.parquet.avro.AvroSchemaConverter;
import org.apache.parquet.schema.MessageType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* 单表离线压缩
*
* @author ZhangJiacheng
* @date 2022-06-21
*/
public class Compactor {
private static final Logger logger = LoggerFactory.getLogger(Compactor.class);
private static final ObjectMapper mapper = JacksonUtils.getMapper();
public static void main(String[] args) throws Exception {
FlinkJob flinkJob = ArgumentsUtils.getFlinkJob(args);
TableMeta tableMeta = ArgumentsUtils.getTableMeta(args);
String selectedInstants = ArgumentsUtils.getInstants(args);
String cluster = ArgumentsUtils.getCluster(args);
logger.info("Bootstrap flink job: {}", mapper.writeValueAsString(flinkJob));
logger.info("Bootstrap table meta: {}", mapper.writeValueAsString(tableMeta));
logger.info("Bootstrap instants: {}", selectedInstants);
logger.info("Bootstrap cluster: {}", cluster);
String applicationId = System.getenv("_APP_ID");
RunMeta runMeta = new RunMeta(cluster, flinkJob.getId(), tableMeta.getAlias());
logger.info("Run meta: {}", runMeta);
ZkUtils.createCompactionLock(flinkJob, tableMeta, tableMeta.getConfig().getZookeeperUrl(), mapper.writeValueAsString(runMeta));
logger.info("Lock for {} {} success", flinkJob.getId(), tableMeta.getAlias());
StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment();
FlinkCompactionConfig config = new FlinkCompactionConfig();
if (ObjectUtil.isEmpty(selectedInstants)) {
config.compactionPlanSelectStrategy = CompactionPlanStrategy.ALL;
} else {
config.compactionPlanSelectStrategy = CompactionPlanStrategy.INSTANTS;
config.compactionPlanInstant = selectedInstants;
}
GlobalConfiguration globalConfiguration = new GlobalConfiguration(cluster, applicationId, tableMeta);
Configuration configuration = SyncUtils.getCompactionFlinkConfiguration(
globalConfiguration,
new Configuration(),
flinkJob,
tableMeta,
SyncUtils.avroSchemaWithExtraFields(tableMeta),
1
);
CompactionEventHandler eventHandler = new CompactionEventHandler(globalConfiguration, flinkJob, tableMeta);
HoodieFlinkWriteClient<?> writeClient = StreamerUtil.createWriteClient(configuration);
HoodieFlinkTable<?> table = writeClient.getHoodieTable();
table.getMetaClient().reloadActiveTimeline();
StatusUtils.compactionStart(globalConfiguration, flinkJob, tableMeta);
// 检查时间线当前状态
logger.info("{} timeline detail ({})", tableMeta.getAlias(), LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")));
table.getActiveTimeline()
.getInstants()
.forEach(instant -> logger.info("{} {} {}", instant.getTimestamp(), instant.getAction(), instant.getState()));
HoodieTimeline pendingCompactionTimeline = table.getActiveTimeline().filterPendingCompactionTimeline();
List<HoodieInstant> requested = CompactionPlanStrategies.getStrategy(config).select(pendingCompactionTimeline);
if (requested.isEmpty()) {
logger.info("No compaction plan scheduled");
eventHandler.closed("No compaction plan scheduled", null);
return;
}
List<String> compactionInstantTimes = requested.stream().map(HoodieInstant::getTimestamp).collect(Collectors.toList());
compactionInstantTimes.forEach(timestamp -> {
HoodieInstant inflightInstant = HoodieTimeline.getCompactionInflightInstant(timestamp);
if (pendingCompactionTimeline.containsInstant(inflightInstant)) {
logger.info("Rollback inflight compaction instant: [" + timestamp + "]");
table.rollbackInflightCompaction(inflightInstant);
table.getMetaClient().reloadActiveTimeline();
}
});
List<Pair<String, HoodieCompactionPlan>> compactionPlans = compactionInstantTimes.stream()
.map(timestamp -> {
try {
return Pair.of(timestamp, CompactionUtils.getCompactionPlan(table.getMetaClient(), timestamp));
} catch (IOException e) {
throw new HoodieException("Get compaction plan at instant " + timestamp + " error", e);
}
})
.filter(pair -> {
HoodieCompactionPlan plan = pair.getRight();
return plan != null && plan.getOperations() != null && plan.getOperations().size() > 0;
})
.collect(Collectors.toList());
if (compactionPlans.isEmpty()) {
logger.info("No compaction plan for instant " + String.join(",", compactionInstantTimes));
eventHandler.closed("No compaction plan for instant " + String.join(",", compactionInstantTimes), null);
return;
}
List<HoodieInstant> instants = compactionInstantTimes.stream().map(HoodieTimeline::getCompactionRequestedInstant).collect(Collectors.toList());
logger.info("Start to compaction for instant " + compactionInstantTimes);
for (HoodieInstant instant : instants) {
if (!pendingCompactionTimeline.containsInstant(instant)) {
CompactionUtil.cleanInstant(table.getMetaClient(), instant);
}
table.getActiveTimeline().transitionCompactionRequestedToInflight(instant);
}
table.getMetaClient().reloadActiveTimeline();
try {
compactionPlans.forEach(pair -> preCommit(globalConfiguration, flinkJob, tableMeta, table, pair.getLeft(), pair.getRight()));
} catch (Throwable e) {
logger.warn("Cannot submit pre-commit log");
}
environment.addSource(new CompactionPlanSourceFunction(compactionPlans))
.name("compaction_source")
.uid("uid_compaction_source")
.rebalance()
.transform("compact_task",
TypeInformation.of(CompactionCommitEvent.class),
new ProcessOperator<>(new CompactFunction(configuration)))
.setParallelism(configuration.getInteger(FlinkOptions.COMPACTION_TASKS))
.addSink(new CompactionCommitSink(configuration, eventHandler))
.name("compaction_commit")
.uid("uid_compaction_commit")
.setParallelism(1);
environment.execute(NameHelper.compactionFlinkName(flinkJob.getId(), tableMeta.getSchema(), tableMeta.getAlias()));
}
private static void preCommit(GlobalConfiguration configuration, FlinkJob flinkJob, TableMeta tableMeta, HoodieFlinkTable<?> table, String instant, HoodieCompactionPlan compactionPlan) {
HoodieTableMetaClient client = table.getMetaClient();
FileSystem fileSystem = client.getRawFs();
List<Path> deltaFilePaths = compactionPlan.getOperations()
.stream()
.flatMap(operation -> operation.getDeltaFilePaths().stream().map(path -> StrUtil.format("{}/{}", operation.getPartitionPath(), path)))
.map(path -> new Path(StrUtil.format("{}/{}", client.getBasePathV2(), path)))
.collect(Collectors.toList());
AtomicLong totalLogFilesCompacted = new AtomicLong(0);
AtomicLong totalLogFilesSize = new AtomicLong(0);
AtomicLong totalRecordsDeleted = new AtomicLong(0);
AtomicLong totalCompactedRecordsUpdated = new AtomicLong(0);
deltaFilePaths.parallelStream().forEach(path -> {
try {
FileStatus fileStatus = fileSystem.getFileStatus(path);
totalLogFilesCompacted.incrementAndGet();
totalLogFilesSize.addAndGet(fileStatus.getLen());
MessageType messageType = TableSchemaResolver.readSchemaFromLogFile(fileSystem, path);
if (ObjectUtil.isNull(messageType)) {
return;
}
Schema writerSchema = new AvroSchemaConverter().convert(messageType);
try (HoodieLogFormat.Reader reader = HoodieLogFormat.newReader(fileSystem, new HoodieLogFile(path), writerSchema)) {
while (reader.hasNext()) {
HoodieLogBlock block = reader.next();
switch (block.getBlockType()) {
case AVRO_DATA_BLOCK:
HoodieAvroDataBlock avroDataBlock = (HoodieAvroDataBlock) block;
try (ClosableIterator<IndexedRecord> avroDataBlockRecordIterator = avroDataBlock.getRecordIterator()) {
while (avroDataBlockRecordIterator.hasNext()) {
// logger.info("totalCompactedRecordsUpdated {}", totalCompactedRecordsUpdated.incrementAndGet());
totalCompactedRecordsUpdated.incrementAndGet();
avroDataBlockRecordIterator.next();
}
}
break;
case PARQUET_DATA_BLOCK:
HoodieParquetDataBlock parquetDataBlock = (HoodieParquetDataBlock) block;
try (ClosableIterator<IndexedRecord> parquetDataBlockRecordIterator = parquetDataBlock.getRecordIterator()) {
while (parquetDataBlockRecordIterator.hasNext()) {
// logger.info("totalCompactedRecordsUpdated {}", totalCompactedRecordsUpdated.incrementAndGet());
totalCompactedRecordsUpdated.incrementAndGet();
parquetDataBlockRecordIterator.next();
}
}
break;
case DELETE_BLOCK:
HoodieDeleteBlock deleteBlock = (HoodieDeleteBlock) block;
// logger.info("totalRecordsDeleted {}", totalRecordsDeleted.addAndGet(deleteBlock.getRecordsToDelete().length));
totalRecordsDeleted.addAndGet(deleteBlock.getRecordsToDelete().length);
break;
default:
break;
}
}
}
} catch (Exception e) {
logger.warn("Parse log file failure for " + path, e);
}
});
Map<String, Long> metadata = new ConcurrentHashMap<>(5);
metadata.put("totalLogFilesCompacted", totalLogFilesCompacted.get());
metadata.put("totalLogFilesSize", totalLogFilesSize.get());
metadata.put("totalRecordsDeleted", totalRecordsDeleted.get());
metadata.put("totalCompactedRecordsUpdated", totalCompactedRecordsUpdated.get());
metadata.put("totalLogRecordsCompacted", totalRecordsDeleted.get() + totalCompactedRecordsUpdated.get());
StatusUtils.compactionPreCommit(configuration, flinkJob, tableMeta, instant, metadata);
}
}

View File

@@ -0,0 +1,170 @@
package com.lanyuanxiaoyao.service.sync;
import cn.hutool.core.util.StrUtil;
import com.lanyuanxiaoyao.service.common.Constants;
import com.lanyuanxiaoyao.service.common.entity.FlinkJob;
import com.lanyuanxiaoyao.service.common.entity.Record;
import com.lanyuanxiaoyao.service.common.entity.RunMeta;
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
import com.lanyuanxiaoyao.service.common.exception.CheckpointRootPathNotFoundException;
import com.lanyuanxiaoyao.service.common.exception.ZookeeperUrlNotFoundException;
import com.lanyuanxiaoyao.service.common.utils.NameHelper;
import com.lanyuanxiaoyao.service.common.utils.TableMetaHelper;
import com.lanyuanxiaoyao.service.sync.configuration.GlobalConfiguration;
import com.lanyuanxiaoyao.service.sync.functions.PulsarMessage2RecordFunction;
import com.lanyuanxiaoyao.service.sync.functions.PulsarMessageSourceReader;
import com.lanyuanxiaoyao.service.sync.functions.ValidateRecordFilter;
import com.lanyuanxiaoyao.service.sync.utils.*;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.common.serialization.SimpleStringEncoder;
import org.apache.flink.core.fs.Path;
import org.apache.flink.runtime.state.hashmap.HashMapStateBackend;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink;
import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.DefaultRollingPolicy;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static com.lanyuanxiaoyao.service.common.Constants.*;
/**
* 同步应用
*
* @author lanyuanxiaoyao
* @version 0.0.1
* @date 2021-11-26
*/
public class Synchronizer {
private static final Logger logger = LoggerFactory.getLogger(Synchronizer.class);
private static final ObjectMapper mapper = JacksonUtils.getMapper();
private static String findConfigFromList(List<TableMeta> metas, Function<TableMeta, String> getter, Supplier<Exception> notFoundException) throws Exception {
return metas.stream()
.map(getter)
.distinct()
.findFirst()
.orElseThrow(notFoundException);
}
public static void main(String[] args) throws Exception {
FlinkJob flinkJob = ArgumentsUtils.getFlinkJob(args);
List<TableMeta> tableMetaList = ArgumentsUtils.getTableMetaList(args);
String cluster = ArgumentsUtils.getCluster(args);
logger.info("Bootstrap flink job: {}", mapper.writeValueAsString(flinkJob));
logger.info("Bootstrap table meta list: {}", mapper.writeValueAsString(tableMetaList));
logger.info("Bootstrap cluster: {}", cluster);
String applicationId = System.getenv("_APP_ID");
String zkUrl = findConfigFromList(tableMetaList, meta -> meta.getConfig().getZookeeperUrl(), ZookeeperUrlNotFoundException::new);
for (TableMeta tableMeta : tableMetaList) {
RunMeta runMeta = new RunMeta(cluster, flinkJob.getId(), tableMeta.getAlias());
logger.info("Run meta: {}", runMeta);
ZkUtils.createSynchronizerLock(flinkJob, tableMeta, zkUrl, mapper.writeValueAsString(runMeta));
}
RunMeta runMeta = new RunMeta(cluster, flinkJob.getId());
logger.info("Run meta: {}", runMeta);
ZkUtils.createSynchronizerLock(flinkJob, zkUrl, mapper.writeValueAsString(runMeta));
logger.info("Lock for {} success", flinkJob.getId());
StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment();
environment.enableCheckpointing(15 * MINUTE);
String checkpointRootPath = findConfigFromList(tableMetaList, meta -> meta.getConfig().getCheckpointRootPath(), CheckpointRootPathNotFoundException::new);
environment.getCheckpointConfig().setCheckpointStorage(new Path(checkpointRootPath + "/" + flinkJob.getId()));
environment.getCheckpointConfig().setMaxConcurrentCheckpoints(1);
environment.getCheckpointConfig().setCheckpointTimeout(2 * HOUR);
environment.getCheckpointConfig().setMinPauseBetweenCheckpoints(15 * MINUTE);
environment.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
environment.getCheckpointConfig().setForceUnalignedCheckpoints(true);
environment.getCheckpointConfig().enableUnalignedCheckpoints();
environment.getCheckpointConfig().setTolerableCheckpointFailureNumber(5);
if (tableMetaList.stream().anyMatch(meta -> TableMetaHelper.existsTag(meta, TAGS_DISABLE_CHAINING))) {
logger.warn("Disable operator chaining");
environment.disableOperatorChaining();
}
environment.setRestartStrategy(RestartStrategies.fixedDelayRestart(5, MINUTE));
environment.setStateBackend(new HashMapStateBackend());
switch (flinkJob.getRunMode()) {
case ALL_IN_ONE:
for (TableMeta tableMeta : tableMetaList) {
GlobalConfiguration globalConfiguration = new GlobalConfiguration(cluster, applicationId, tableMeta);
createFlinkJob(environment, globalConfiguration, flinkJob, tableMeta);
publishSyncStart(globalConfiguration, flinkJob, tableMeta);
}
environment.execute(NameHelper.syncFlinkName(flinkJob.getId(), flinkJob.getName()));
break;
case ONE_IN_ONE:
for (TableMeta tableMeta : tableMetaList) {
GlobalConfiguration globalConfiguration = new GlobalConfiguration(cluster, applicationId, tableMeta);
createFlinkJob(environment, globalConfiguration, flinkJob, tableMeta);
publishSyncStart(globalConfiguration, flinkJob, tableMeta);
environment.execute(NameHelper.syncFlinkName(flinkJob.getId(), flinkJob.getName(), tableMeta.getAlias()));
}
break;
case ALL_IN_ONE_BY_TABLE:
scheduleOneInOneRegistryByField(environment, cluster, applicationId, flinkJob, tableMetaList, TableMeta::getTable);
break;
case ALL_IN_ONE_BY_SCHEMA:
scheduleOneInOneRegistryByField(environment, cluster, applicationId, flinkJob, tableMetaList, TableMeta::getSchema);
break;
default:
throw new IllegalArgumentException("Unsupported run mode: " + flinkJob.getRunMode());
}
}
private static void scheduleOneInOneRegistryByField(StreamExecutionEnvironment environment, String cluster, String applicationId, FlinkJob flinkJob, List<TableMeta> tableMetaList, Function<TableMeta, String> field) throws Exception {
Map<String, List<TableMeta>> map = tableMetaList.stream()
.collect(Collectors.groupingBy(field));
for (Map.Entry<String, List<TableMeta>> entry : map.entrySet()) {
for (TableMeta tableMeta : entry.getValue()) {
GlobalConfiguration globalConfiguration = new GlobalConfiguration(cluster, applicationId, tableMeta);
createFlinkJob(environment, globalConfiguration, flinkJob, tableMeta);
publishSyncStart(globalConfiguration, flinkJob, tableMeta);
}
environment.execute(NameHelper.syncFlinkName(flinkJob.getId(), flinkJob.getName(), entry.getKey()));
}
}
private static void createFlinkJob(StreamExecutionEnvironment environment, GlobalConfiguration configuration, FlinkJob flinkJob, TableMeta tableMeta) throws IOException {
logger.info("Table meta: {}", mapper.writeValueAsString(tableMeta));
logger.info("Config meta: {}", mapper.writeValueAsString(configuration));
SingleOutputStreamOperator<String> source = environment
.addSource(new PulsarMessageSourceReader(configuration, flinkJob, tableMeta))
.setParallelism(tableMeta.getHudi().getSourceTasks());
if (TableMetaHelper.existsTag(tableMeta, Constants.TAGS_PULSAR_BACKUP)) {
Path path = new Path(StrUtil.format("hdfs://b2/apps/datalake/hive_test/source/{}/{}", String.join("_", flinkJob.getName().split("\\s")), tableMeta.getAlias()));
StreamingFileSink<String> fileSink = StreamingFileSink.<String>forRowFormat(path, new SimpleStringEncoder<>("UTF-8"))
.withRollingPolicy(DefaultRollingPolicy.builder()
.withInactivityInterval(HOUR)
.withMaxPartSize(GB)
.build())
.build();
source.addSink(fileSink).name("Backup pulsar data");
}
SingleOutputStreamOperator<Record> middle = source
.map(new PulsarMessage2RecordFunction(configuration, flinkJob, tableMeta))
.name("Json ( " + tableMeta.getSchema() + "-" + tableMeta.getAlias() + " )")
.filter(new ValidateRecordFilter(configuration, flinkJob, tableMeta))
.name("Reject json parse failure");
SyncUtils.sinkToHoodieByTable(configuration, flinkJob, tableMeta, environment, middle);
}
private static void publishSyncStart(GlobalConfiguration configuration, FlinkJob flinkJob, TableMeta tableMeta) {
StatusUtils.syncStart(configuration, flinkJob, tableMeta);
}
}

View File

@@ -0,0 +1,36 @@
package com.lanyuanxiaoyao.service.sync.configuration;
import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.util.PartitionPathEncodeUtils;
import org.apache.hudi.configuration.FlinkOptions;
import org.apache.hudi.keygen.SimpleAvroKeyGenerator;
import org.apache.hudi.org.apache.avro.generic.GenericRecord;
/**
* @author ZhangJiacheng
*/
public class DefaultPartitionNameKeyGenerator extends SimpleAvroKeyGenerator {
private final String defaultPartitionName;
public DefaultPartitionNameKeyGenerator(TypedProperties props) {
super(props);
defaultPartitionName = props.getString(FlinkOptions.PARTITION_DEFAULT_NAME.key(), FlinkOptions.PARTITION_DEFAULT_NAME.defaultValue());
}
@Override
public String getPartitionPath(GenericRecord record) {
String partitionPathField = getPartitionPathFields().get(0);
String partitionPath = HoodieAvroUtils.getNestedFieldValAsString(record, partitionPathField, true, consistentLogicalTimestampEnabled);
if (partitionPath == null || partitionPath.isEmpty()) {
partitionPath = defaultPartitionName;
}
if (encodePartitionPath) {
partitionPath = PartitionPathEncodeUtils.escapePathName(partitionPath);
}
if (hiveStylePartitioning) {
partitionPath = partitionPathField + "=" + partitionPath;
}
return partitionPath;
}
}

View File

@@ -0,0 +1,92 @@
package com.lanyuanxiaoyao.service.sync.configuration;
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
import java.io.Serializable;
/**
* 同步静态配置
*
* @author ZhangJiacheng
* @date 2022-06-13
*/
public class GlobalConfiguration implements Serializable {
private final String cluster;
private final String applicationId;
private final Boolean metricEnable = false;
private final String metricPublishUrl;
private final String metricPublishPrometheusUrl;
private final String metricsPublishCustomUrl;
private final Integer metricPublishDelay;
private final Integer metricPublishPeriod;
private final Integer metricPublishTimeout;
private final Integer metricPublishBatch;
public GlobalConfiguration(String cluster, String applicationId, TableMeta meta) {
this.cluster = cluster;
this.applicationId = applicationId;
this.metricPublishUrl = meta.getConfig().getMetricPublishUrl();
this.metricPublishPrometheusUrl = meta.getConfig().getMetricPrometheusUrl();
this.metricsPublishCustomUrl = meta.getConfig().getMetricApiUrl();
this.metricPublishDelay = meta.getConfig().getMetricPublishDelay();
this.metricPublishPeriod = meta.getConfig().getMetricPublishPeriod();
this.metricPublishTimeout = meta.getConfig().getMetricPublishTimeout();
this.metricPublishBatch = meta.getConfig().getMetricPublishBatch();
}
public String getCluster() {
return cluster;
}
public String getApplicationId() {
return applicationId;
}
public Boolean getMetricEnable() {
return metricEnable;
}
public String getMetricPublishUrl() {
return metricPublishUrl;
}
public String getMetricPublishPrometheusUrl() {
return metricPublishPrometheusUrl;
}
public String getMetricsPublishCustomUrl() {
return metricsPublishCustomUrl;
}
public Integer getMetricPublishDelay() {
return metricPublishDelay;
}
public Integer getMetricPublishPeriod() {
return metricPublishPeriod;
}
public Integer getMetricPublishTimeout() {
return metricPublishTimeout;
}
public Integer getMetricPublishBatch() {
return metricPublishBatch;
}
@Override
public String toString() {
return "GlobalConfiguration{" +
"cluster='" + cluster + '\'' +
", applicationId='" + applicationId + '\'' +
", metricEnable=" + metricEnable +
", metricPublishUrl='" + metricPublishUrl + '\'' +
", metricPublishPrometheusUrl='" + metricPublishPrometheusUrl + '\'' +
", metricsPublishCustomUrl='" + metricsPublishCustomUrl + '\'' +
", metricPublishDelay=" + metricPublishDelay +
", metricPublishPeriod=" + metricPublishPeriod +
", metricPublishTimeout=" + metricPublishTimeout +
", metricPublishBatch=" + metricPublishBatch +
'}';
}
}

View File

@@ -0,0 +1,12 @@
package com.lanyuanxiaoyao.service.sync.configuration;
import dev.failsafe.RetryPolicy;
import java.time.Duration;
public interface RetryPolicyProvider {
RetryPolicy<String> HTTP_RETRY = RetryPolicy.<String>builder()
.handle(Throwable.class)
.withDelay(Duration.ofSeconds(1))
.withMaxAttempts(10)
.build();
}

View File

@@ -0,0 +1,52 @@
package com.lanyuanxiaoyao.service.sync.configuration;
import com.lanyuanxiaoyao.service.common.Constants;
import java.util.HashMap;
import java.util.Map;
import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.org.apache.avro.generic.GenericRecord;
import org.apache.hudi.org.apache.avro.util.Utf8;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @author lanyuanxiaoyao
* @date 2023-04-18
*/
public class TraceOverwriteWithLatestAvroPayload extends OverwriteWithLatestAvroPayload {
private static final Logger logger = LoggerFactory.getLogger(TraceOverwriteWithLatestAvroPayload.class);
private final String latestOpts;
public TraceOverwriteWithLatestAvroPayload(GenericRecord record, Comparable orderingVal) {
super(record, orderingVal);
this.latestOpts = updateLatestOpts(Option.ofNullable(record));
}
public TraceOverwriteWithLatestAvroPayload(Option<GenericRecord> record) {
super(record);
this.latestOpts = updateLatestOpts(record);
}
private String updateLatestOpts(Option<GenericRecord> record) {
try {
return record
.map(r -> ((Utf8) r.get(Constants.LATEST_OPERATION_TIMESTAMP_KEY_NAME)).toString())
.orElse(null);
} catch (Throwable throwable) {
logger.error("Get latest opts failure", throwable);
}
return null;
}
@Override
public Option<Map<String, String>> getMetadata() {
if (this.latestOpts == null) {
return Option.empty();
}
Map<String, String> metadata = super.getMetadata().orElse(new HashMap<>());
metadata.put(Constants.LATEST_OPERATION_TIMESTAMP_KEY_NAME, this.latestOpts);
return Option.of(metadata);
}
}

View File

@@ -0,0 +1,58 @@
package com.lanyuanxiaoyao.service.sync.configuration;
import cn.hutool.core.util.StrUtil;
import com.lanyuanxiaoyao.service.common.Constants;
import java.time.LocalDateTime;
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
import java.util.Map;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.util.Option;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @author lanyuanxiaoyao
* @date 2023-04-17
*/
public class TraceWriteStatus extends WriteStatus {
private static final Logger logger = LoggerFactory.getLogger(TraceWriteStatus.class);
private final static DateTimeFormatter FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
private long latestOpts = 0L;
public TraceWriteStatus() {
super();
}
public TraceWriteStatus(Boolean trackSuccessRecords, Double failureFraction) {
super(trackSuccessRecords, failureFraction);
}
public long getLatestOpts() {
return latestOpts;
}
@Override
public void markSuccess(HoodieRecord record, Option<Map<String, String>> optionalRecordMetadata) {
super.markSuccess(record, optionalRecordMetadata);
try {
optionalRecordMetadata.ifPresent(map -> {
if (map.containsKey(Constants.LATEST_OPERATION_TIMESTAMP_KEY_NAME)) {
String inOpts = map.get(Constants.LATEST_OPERATION_TIMESTAMP_KEY_NAME);
if (StrUtil.isNotBlank(inOpts)) {
long current = LocalDateTime.parse(inOpts, FORMATTER).toInstant(ZoneOffset.ofHours(8)).toEpochMilli();
latestOpts = Long.max(latestOpts, current);
}
}
});
} catch (Throwable throwable) {
logger.error("Parse latest opts failure", throwable);
}
}
@Override
public void markFailure(HoodieRecord record, Throwable t, Option<Map<String, String>> optionalRecordMetadata) {
super.markFailure(record, t, optionalRecordMetadata);
}
}

View File

@@ -0,0 +1,65 @@
package com.lanyuanxiaoyao.service.sync.functions;
import com.lanyuanxiaoyao.service.common.Constants;
import com.lanyuanxiaoyao.service.common.entity.FlinkJob;
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
import com.lanyuanxiaoyao.service.common.utils.TableMetaHelper;
import com.lanyuanxiaoyao.service.sync.configuration.GlobalConfiguration;
import com.lanyuanxiaoyao.service.sync.configuration.TraceWriteStatus;
import com.lanyuanxiaoyao.service.sync.utils.StatusUtils;
import java.io.Serializable;
import java.util.List;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.sink.compact.CompactEventHandler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* 压缩事件处理
*
* @author ZhangJiacheng
* @date 2022-06-15
*/
public class CompactionEventHandler implements CompactEventHandler, Serializable {
private static final Logger logger = LoggerFactory.getLogger(CompactionEventHandler.class);
private final GlobalConfiguration configuration;
private final FlinkJob flinkJob;
private final TableMeta tableMeta;
public CompactionEventHandler(GlobalConfiguration configuration, FlinkJob flinkJob, TableMeta tableMeta) {
this.configuration = configuration;
this.flinkJob = flinkJob;
this.tableMeta = tableMeta;
}
@Override
public void failure(String instant) {
}
@Override
public void success(String instant, List<WriteStatus> statuses, HoodieCommitMetadata metadata) {
StatusUtils.compactionCommit(configuration, flinkJob, tableMeta, instant, metadata);
logger.info("WriteStatus: {}", statuses);
if (TableMetaHelper.existsTag(tableMeta, Constants.TAGS_TRACE_LATEST_OP_TS)) {
Long max = statuses.stream()
.map(status -> {
if (status instanceof TraceWriteStatus) {
TraceWriteStatus s = (TraceWriteStatus) status;
return s.getLatestOpts();
}
return 0L;
})
.max(Long::compare)
.orElse(0L);
logger.info("Latest op ts: {}", max);
StatusUtils.compactionLatestOpTs(configuration, flinkJob, tableMeta, max);
}
}
@Override
public void closed(String message, Exception exception) {
StatusUtils.compactionFinish(configuration, flinkJob, tableMeta, message, exception);
}
}

View File

@@ -0,0 +1,90 @@
package com.lanyuanxiaoyao.service.sync.functions;
import cn.hutool.core.collection.ListUtil;
import cn.hutool.core.map.MapUtil;
import com.lanyuanxiaoyao.service.common.Constants;
import com.lanyuanxiaoyao.service.common.entity.FlinkJob;
import com.lanyuanxiaoyao.service.common.entity.Record;
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
import com.lanyuanxiaoyao.service.sync.configuration.GlobalConfiguration;
import com.lanyuanxiaoyao.service.sync.metrics.CountMetric;
import com.lanyuanxiaoyao.service.sync.utils.MetricsUtils;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import org.apache.flink.api.common.functions.RichFilterFunction;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.runtime.state.FunctionInitializationContext;
import org.apache.flink.runtime.state.FunctionSnapshotContext;
import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction;
/**
* 操作类型过滤算子
*
* @author ZhangJiacheng
* @date 2022-06-12
*/
public class OperationTypeFilter extends RichFilterFunction<Record> implements CheckpointedFunction {
private final CountMetric insertRateMetric;
private final CountMetric updateRateMetric;
private final CountMetric deleteRateMetric;
private final CountMetric ddlRateMetric;
private final CountMetric unknownRateMetric;
private final List<CountMetric> metrics;
private final GlobalConfiguration globalConfiguration;
public OperationTypeFilter(GlobalConfiguration globalConfiguration, FlinkJob flinkJob, TableMeta tableMeta) {
this.globalConfiguration = globalConfiguration;
Function<String, Map<String, String>> fillTags = operator -> MapUtil.<String, String>builder()
.put(Constants.METRICS_LABEL_TYPE, operator)
.build();
insertRateMetric = new CountMetric(globalConfiguration, Constants.METRICS_SYNC_SOURCE_OPERATION_TYPE_RECEIVE, flinkJob, tableMeta, fillTags.apply(Constants.INSERT));
updateRateMetric = new CountMetric(globalConfiguration, Constants.METRICS_SYNC_SOURCE_OPERATION_TYPE_RECEIVE, flinkJob, tableMeta, fillTags.apply(Constants.UPDATE));
deleteRateMetric = new CountMetric(globalConfiguration, Constants.METRICS_SYNC_SOURCE_OPERATION_TYPE_RECEIVE, flinkJob, tableMeta, fillTags.apply(Constants.DELETE));
ddlRateMetric = new CountMetric(globalConfiguration, Constants.METRICS_SYNC_SOURCE_OPERATION_TYPE_RECEIVE, flinkJob, tableMeta, fillTags.apply(Constants.DDL));
unknownRateMetric = new CountMetric(globalConfiguration, Constants.METRICS_SYNC_SOURCE_OPERATION_TYPE_RECEIVE, flinkJob, tableMeta, fillTags.apply(Constants.UNKNOWN));
metrics = ListUtil.toList(insertRateMetric, updateRateMetric, deleteRateMetric, ddlRateMetric, unknownRateMetric);
}
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
// 初始化指标
MetricsUtils.createMakePointTimer(globalConfiguration, metrics);
}
@Override
public boolean filter(Record record) {
String opType = record.getStatement().getOpType();
switch (opType) {
case Constants.INSERT:
insertRateMetric.increment();
break;
case Constants.UPDATE:
updateRateMetric.increment();
break;
case Constants.DELETE:
deleteRateMetric.increment();
break;
case Constants.DDL:
ddlRateMetric.increment();
break;
default:
unknownRateMetric.increment();
}
return !Constants.DDL.equals(record.getStatement().getOpType());
}
@Override
public void initializeState(FunctionInitializationContext context) {
}
@Override
public void snapshotState(FunctionSnapshotContext context) {
MetricsUtils.publishAllMetrics(metrics);
}
}

View File

@@ -0,0 +1,80 @@
package com.lanyuanxiaoyao.service.sync.functions;
import cn.hutool.core.util.StrUtil;
import com.lanyuanxiaoyao.service.common.entity.FlinkJob;
import com.lanyuanxiaoyao.service.common.entity.Record;
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
import com.lanyuanxiaoyao.service.common.utils.RecordHelper;
import com.lanyuanxiaoyao.service.sync.configuration.GlobalConfiguration;
import com.lanyuanxiaoyao.service.sync.utils.JacksonUtils;
import com.lanyuanxiaoyao.service.sync.utils.StatusUtils;
import java.time.LocalDateTime;
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
import java.util.concurrent.atomic.AtomicReference;
import java.util.regex.Pattern;
import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.runtime.state.FunctionInitializationContext;
import org.apache.flink.runtime.state.FunctionSnapshotContext;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonProcessingException;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Pulsar message to object
*
* @author ZhangJiacheng
* @date 2022-06-11
*/
public class PulsarMessage2RecordFunction extends RichMapFunction<String, Record> implements CheckpointedFunction {
private static final Logger logger = LoggerFactory.getLogger(PulsarMessage2RecordFunction.class);
private static final AtomicReference<String> lastOperationTime = new AtomicReference<>("");
private final static DateTimeFormatter FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
private static final Pattern OPTS_PATTERN = Pattern.compile("^\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}$");
private final GlobalConfiguration globalConfiguration;
private final FlinkJob flinkJob;
private final TableMeta tableMeta;
private final ObjectMapper mapper = JacksonUtils.getMapper();
public PulsarMessage2RecordFunction(GlobalConfiguration globalConfiguration, FlinkJob flinkJob, TableMeta tableMeta) {
this.globalConfiguration = globalConfiguration;
this.flinkJob = flinkJob;
this.tableMeta = tableMeta;
}
@Override
public Record map(String message) throws JsonProcessingException {
Record record = null;
try {
record = mapper.readValue(message, Record.class);
if (RecordHelper.isNotVersionUpdateRecord(record)) {
lastOperationTime.set(record.getStatement().getOpTs());
}
} catch (Exception exception) {
logger.error("Message json parse failure", exception);
}
return record;
}
@Override
public void snapshotState(FunctionSnapshotContext context) {
String opTs = lastOperationTime.get();
Long timestamp = null;
try {
if (StrUtil.isNotBlank(opTs) && OPTS_PATTERN.matcher(opTs).matches()) {
timestamp = LocalDateTime.parse(opTs, FORMATTER).toInstant(ZoneOffset.ofHours(8)).toEpochMilli();
} else {
throw new Exception("Opts is not match regex " + OPTS_PATTERN.pattern());
}
} catch (Exception e) {
logger.error("Parse operation time error", e);
}
StatusUtils.syncOperation(globalConfiguration, flinkJob, tableMeta, timestamp);
}
@Override
public void initializeState(FunctionInitializationContext context) {
}
}

View File

@@ -0,0 +1,235 @@
package com.lanyuanxiaoyao.service.sync.functions;
import cn.hutool.core.util.ObjectUtil;
import cn.hutool.core.util.StrUtil;
import cn.hutool.http.HttpUtil;
import com.lanyuanxiaoyao.service.common.Constants;
import com.lanyuanxiaoyao.service.common.entity.FlinkJob;
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
import com.lanyuanxiaoyao.service.common.utils.LogHelper;
import com.lanyuanxiaoyao.service.common.utils.NameHelper;
import com.lanyuanxiaoyao.service.common.utils.RecordHelper;
import com.lanyuanxiaoyao.service.sync.configuration.GlobalConfiguration;
import com.lanyuanxiaoyao.service.sync.metrics.MessageSizeSizeMetric;
import com.lanyuanxiaoyao.service.sync.metrics.RateMetric;
import com.lanyuanxiaoyao.service.sync.utils.LoadBalance;
import com.lanyuanxiaoyao.service.sync.utils.MetricsUtils;
import com.lanyuanxiaoyao.service.sync.utils.StatusUtils;
import dev.failsafe.Failsafe;
import dev.failsafe.RetryPolicy;
import java.time.Duration;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
import org.apache.flink.api.common.state.CheckpointListener;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.runtime.state.FunctionInitializationContext;
import org.apache.flink.runtime.state.FunctionSnapshotContext;
import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction;
import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction;
import org.apache.pulsar.client.api.*;
import org.apache.pulsar.client.impl.schema.StringSchema;
import org.apache.pulsar.client.internal.DefaultImplementation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static com.lanyuanxiaoyao.service.common.utils.LogHelper.LogPoint.*;
/**
* Pulsar Reader Source
*
* @author ZhangJiacheng
* @date 2022-06-11
*/
public class PulsarMessageSourceReader extends RichParallelSourceFunction<String> implements CheckpointedFunction, CheckpointListener {
private static final Logger logger = LoggerFactory.getLogger(PulsarMessageSourceReader.class);
private static final RetryPolicy<String> MESSAGE_ID_RETRY = RetryPolicy.<String>builder()
.handle(Exception.class)
.withDelay(Duration.ofSeconds(1))
.withMaxAttempts(10)
.build();
private final String topic;
private final GlobalConfiguration globalConfiguration;
private final FlinkJob flinkJob;
private final TableMeta tableMeta;
private final AtomicReference<MessageId> lastMessageId = new AtomicReference<>();
private final AtomicLong lastPublishTime = new AtomicLong(0);
private final RateMetric messageReceiveMetric;
private final MessageSizeSizeMetric messageSizeReceiveMetric;
private final Map<Long, MessageId> messageIdMap = new ConcurrentHashMap<>();
private boolean running = true;
private PulsarClient client = null;
private Reader<String> reader = null;
public PulsarMessageSourceReader(GlobalConfiguration globalConfiguration, FlinkJob flinkJob, TableMeta tableMeta) {
logger.info("Use PulsarMessageSourceReader");
this.globalConfiguration = globalConfiguration;
this.flinkJob = flinkJob;
this.topic = tableMeta.getTopic();
this.tableMeta = tableMeta;
String messageId = tableMeta.getConfig().getMessageId();
logger.info("{} {}", Constants.LOG_POINT_PULSAR_SOURCE_BOOTSTRAP_MESSAGE_ID, messageId);
if (StrUtil.isNotBlank(messageId)) {
lastMessageId.set(parseMessageId(messageId));
} else {
logger.warn("Message id is empty");
lastMessageId.set(MessageId.earliest);
}
messageReceiveMetric = new RateMetric(
globalConfiguration,
Constants.METRICS_SYNC_SOURCE_MESSAGE_RECEIVE,
flinkJob,
tableMeta
);
messageSizeReceiveMetric = new MessageSizeSizeMetric(
globalConfiguration,
Constants.METRICS_SYNC_SOURCE_MESSAGE_SIZE_RECEIVE_BYTES,
flinkJob, tableMeta
);
}
private static MessageId parseMessageId(String messageIdText) {
return DefaultImplementation.newMessageId(Long.parseLong(messageIdText.split(":")[0]), Long.parseLong(messageIdText.split(":")[1]), -1);
}
@Override
public void initializeState(FunctionInitializationContext context) throws Exception {
LogHelper.info(logger, CHECKPOINT_INITIAL);
String queryUrl = StrUtil.format(
"{}/api/message_id?flink_job_id={}&alias={}",
LoadBalance.getCustomPublishUrl(globalConfiguration),
flinkJob.getId(),
tableMeta.getAlias()
);
logger.info("Query url: {}", queryUrl);
String messageId = Failsafe.with(MESSAGE_ID_RETRY)
.onFailure(event -> {
if (ObjectUtil.isNotNull(event.getException())) {
logger.error(StrUtil.format("{} Get message id error", Constants.LOG_POINT_PULSAR_SOURCE_GET_MESSAGE_ID_ERROR), event.getException());
}
})
.get(() ->
HttpUtil.createGet(queryUrl)
.header(Constants.API_HEADER_NAME, Constants.API_VERSION)
.execute()
.body()
);
LogHelper.info(logger, CHECKPOINT_INITIAL_MESSAGE_ID, "Get message id: {}", messageId);
if (StrUtil.isNotBlank(messageId)) {
lastMessageId.set(parseMessageId(messageId));
} else {
logger.warn(StrUtil.format("{} Message id is empty, now message id is {}", Constants.LOG_POINT_MESSAGE_ID_EMPTY, lastMessageId.get()));
}
}
@Override
public void run(SourceContext<String> context) throws Exception {
String currentValue = null;
while (running) {
Message<String> message;
try {
message = reader.readNext();
if (ObjectUtil.isNotNull(message)) {
String value = message.getValue();
currentValue = value;
if (ObjectUtil.isEmpty(value)) {
logger.warn("{} {}", message.getValue(), message.getMessageId());
}
synchronized (context.getCheckpointLock()) {
context.collect(value);
}
if (RecordHelper.isNotVersionUpdateRecord(value)) {
lastPublishTime.set(message.getPublishTime());
}
lastMessageId.set(message.getMessageId());
messageReceiveMetric.increment();
try {
messageSizeReceiveMetric.increment(message.getValue().getBytes().length);
} catch (Throwable t) {
logger.warn("Parse message size failure", t);
}
}
} catch (Throwable t) {
logger.error("Read message failure, current value: " + currentValue, t);
}
}
}
@Override
public void open(Configuration configuration) throws Exception {
super.open(configuration);
// 初始化指标
MetricsUtils.createMakePointTimer(globalConfiguration, messageReceiveMetric);
MetricsUtils.createMakePointTimer(globalConfiguration, messageSizeReceiveMetric);
try {
client = PulsarClient.builder()
.serviceUrl(tableMeta.getPulsarAddress())
.build();
reader = client.newReader(new StringSchema())
.topic(topic)
.receiverQueueSize(10000)
.subscriptionName(NameHelper.pulsarSubscriptionName(flinkJob.getId(), tableMeta.getAlias()))
.startMessageId(lastMessageId.get())
.startMessageIdInclusive()
.create();
} catch (Exception exception) {
logger.error(StrUtil.format("Connect pulsar error ({} {})", tableMeta.getPulsarAddress(), topic), exception);
throw exception;
}
logger.info("Message id set to {}", lastMessageId.get());
}
@Override
public void cancel() {
running = false;
}
@Override
public void close() throws Exception {
super.close();
if (reader != null) {
try {
reader.close();
} catch (PulsarClientException e) {
logger.error("Pulsar reader close error", e);
}
}
if (client != null) {
try {
client.close();
} catch (PulsarClientException e) {
logger.error("Pulsar client close error", e);
}
}
}
@Override
public void snapshotState(FunctionSnapshotContext context) {
MessageId messageId = lastMessageId.get();
messageIdMap.put(context.getCheckpointId(), messageId);
LogHelper.info(logger, CHECKPOINT_START, "Checkpoint start message id: {}, checkpoint id: {}", messageId, context.getCheckpointId());
messageReceiveMetric.publish();
messageSizeReceiveMetric.publish();
}
@Override
public void notifyCheckpointComplete(long checkpointId) {
MessageId messageId = messageIdMap.getOrDefault(checkpointId, MessageId.earliest);
LogHelper.info(logger, CHECKPOINT_COMPLETE, "Checkpoint complete message id: {}, checkpoint id: {}", messageId, checkpointId);
StatusUtils.syncCheckpoint(globalConfiguration, flinkJob, tableMeta, messageId.toString(), lastPublishTime.get());
messageIdMap.remove(checkpointId);
}
@Override
public void notifyCheckpointAborted(long checkpointId) throws Exception {
CheckpointListener.super.notifyCheckpointAborted(checkpointId);
}
}

View File

@@ -0,0 +1,171 @@
package com.lanyuanxiaoyao.service.sync.functions;
import cn.hutool.core.collection.ListUtil;
import com.lanyuanxiaoyao.service.common.Constants;
import com.lanyuanxiaoyao.service.common.entity.FlinkJob;
import com.lanyuanxiaoyao.service.common.entity.Record;
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
import com.lanyuanxiaoyao.service.common.utils.LogHelper;
import com.lanyuanxiaoyao.service.common.utils.MapHelper;
import com.lanyuanxiaoyao.service.common.utils.RecordHelper;
import com.lanyuanxiaoyao.service.common.utils.TableMetaHelper;
import com.lanyuanxiaoyao.service.sync.configuration.GlobalConfiguration;
import com.lanyuanxiaoyao.service.sync.functions.type.TypeConverter;
import com.lanyuanxiaoyao.service.sync.metrics.CountMetric;
import com.lanyuanxiaoyao.service.sync.utils.JacksonUtils;
import com.lanyuanxiaoyao.service.sync.utils.MetricsUtils;
import com.lanyuanxiaoyao.service.sync.utils.StatusUtils;
import com.lanyuanxiaoyao.service.sync.utils.SyncUtils;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.stream.Collectors;
import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.runtime.state.FunctionInitializationContext;
import org.apache.flink.runtime.state.FunctionSnapshotContext;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction;
import org.apache.flink.table.data.GenericRowData;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.data.StringData;
import org.apache.hudi.org.apache.avro.Schema;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static com.lanyuanxiaoyao.service.common.utils.LogHelper.LogPoint.VERSION_UPDATE;
/**
* Record 转 Rowdata
*
* @author ZhangJiacheng
* @date 2022-06-13
*/
public class Record2RowDataFunction extends RichMapFunction<Record, List<RowData>> implements CheckpointedFunction {
private static final Logger logger = LoggerFactory.getLogger(Record2RowDataFunction.class);
private final GlobalConfiguration globalConfiguration;
private final TableMeta tableMeta;
private final CountMetric changeFilterMetric;
private final CountMetric changePartitionMetric;
private final ObjectMapper mapper = JacksonUtils.getMapper();
private final FlinkJob flinkJob;
private Schema schema;
public Record2RowDataFunction(GlobalConfiguration globalConfiguration, FlinkJob flinkJob, TableMeta tableMeta) {
this.globalConfiguration = globalConfiguration;
this.flinkJob = flinkJob;
this.tableMeta = tableMeta;
changeFilterMetric = new CountMetric(
globalConfiguration,
Constants.METRICS_SYNC_SOURCE_CHANGE_FILTER,
flinkJob, tableMeta
);
changePartitionMetric = new CountMetric(
globalConfiguration,
Constants.METRICS_SYNC_SOURCE_CHANGE_PARTITION,
flinkJob, tableMeta
);
}
@Override
public void initializeState(FunctionInitializationContext context) throws Exception {
schema = SyncUtils.avroSchemaWithExtraFields(tableMeta);
}
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
// 初始化指标
MetricsUtils.createMakePointTimer(globalConfiguration, changeFilterMetric);
MetricsUtils.createMakePointTimer(globalConfiguration, changePartitionMetric);
}
@Override
public void snapshotState(FunctionSnapshotContext context) throws Exception {
MetricsUtils.publishAllMetrics(changeFilterMetric);
MetricsUtils.publishAllMetrics(changePartitionMetric);
}
private GenericRowData covert2RowData(Schema schema, Map<String, Object> current) {
List<Schema.Field> fields = schema.getFields();
GenericRowData data = new GenericRowData(fields.size());
for (int index = 0; index < fields.size(); index++) {
Schema.Field field = fields.get(index);
// 如果是telepg的话字段名就要统一改成小写上游不规范下游擦屁股
Object value = current.getOrDefault(Constants.FIELD_COVERT.apply(tableMeta, field.name()), null);
if (field.schema().getType().equals(Schema.Type.STRING)
|| (field.schema().isUnion() && field.schema().getTypes().contains(Schema.create(Schema.Type.STRING)))
|| value instanceof String) {
data.setField(index, StringData.fromString((String) value));
} else {
data.setField(index, value);
}
}
return data;
}
private Boolean isFilterOut(TableMeta tableMeta, Map<String, Object> current) {
if (!tableMeta.getFilterType().equals(TableMeta.FilterType.NONE)) {
if (current.containsKey(tableMeta.getFilterField())) {
String fieldValue = MapHelper.getStringWithoutCase(current, tableMeta.getFilterField());
if (tableMeta.getFilterType().equals(TableMeta.FilterType.EXCLUDE)) {
return tableMeta.getFilterValues().contains(fieldValue);
} else if (tableMeta.getFilterType().equals(TableMeta.FilterType.INCLUDE)) {
return !tableMeta.getFilterValues().contains(fieldValue);
}
}
}
return false;
}
@Override
public List<RowData> map(Record record) throws Exception {
List<Map<String, Object>> result = ListUtil.list(false);
if (RecordHelper.isVersionUpdateRecord(record)) {
Record.Statement statement = record.getStatement();
LogHelper.info(logger, VERSION_UPDATE, "{} {} version: {}", mapper.writeValueAsString(statement.getSchema()), statement.getVersion(), statement.getVersion());
LogHelper.info(logger, VERSION_UPDATE, "Raw: {}", mapper.writeValueAsString(record));
StatusUtils.versionUpdate(globalConfiguration, flinkJob, tableMeta, record.getStatement().getVersion(), statement.getOpTs());
return ListUtil.empty();
}
Map<String, Object> current = RecordHelper.getCurrentStatement(record);
if (Objects.isNull(current)) {
logger.error("Record: {}", mapper.writeValueAsString(record));
throw new RuntimeException("Current cannot be null");
}
// 如果 update 改变了过滤字段的值也需要先删除
boolean isChangeFilter = RecordHelper.isChangeField(tableMeta, record, TableMetaHelper::getFilterField);
if (isChangeFilter) {
logger.info("Change filter: {}", mapper.writeValueAsString(record));
changeFilterMetric.increment();
}
// 如果是 update 且 city_id 不相等就先删除旧记录
boolean isChangePartition = RecordHelper.isChangeField(tableMeta, record, TableMetaHelper::getPartitionField);
if (isChangePartition) {
logger.info("Change partition field: {}", mapper.writeValueAsString(record));
changePartitionMetric.increment();
}
if (isChangeFilter || isChangePartition) {
Map<String, Object> before = record.getStatement().getBefore();
result.add(0, RecordHelper.addExtraMetadata(before, tableMeta, record, true));
}
// 增加 Hudi 特有字段信息
result.add(RecordHelper.addExtraMetadata(current, tableMeta, record));
return result.stream()
// 按过滤字段过滤
.filter(r -> !isFilterOut(tableMeta, r))
.map(r -> TypeConverter.getInstance(tableMeta)
.convertToGenericRowData(tableMeta, schema, r))
.collect(Collectors.toList());
}
}

View File

@@ -0,0 +1,40 @@
package com.lanyuanxiaoyao.service.sync.functions;
import cn.hutool.core.util.ObjectUtil;
import com.lanyuanxiaoyao.service.common.entity.FlinkJob;
import com.lanyuanxiaoyao.service.common.entity.Record;
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
import com.lanyuanxiaoyao.service.sync.configuration.GlobalConfiguration;
import org.apache.flink.api.common.functions.RichFilterFunction;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* 判断 Record 是否正确
*
* @author ZhangJiacheng
* @date 2022-11-15
*/
public class ValidateRecordFilter extends RichFilterFunction<Record> {
private static final Logger logger = LoggerFactory.getLogger(ValidateRecordFilter.class);
public ValidateRecordFilter(GlobalConfiguration configuration, FlinkJob flinkJob, TableMeta tableMeta) {
}
@Override
public boolean filter(Record record) {
if (ObjectUtil.isNull(record)) {
logger.warn("Record is null");
return false;
}
if (ObjectUtil.isNull(record.getSource())) {
logger.warn("Record Source is null");
return false;
}
if (ObjectUtil.isNull(record.getStatement())) {
logger.warn("Record Statement is null");
return false;
}
return true;
}
}

View File

@@ -0,0 +1,43 @@
package com.lanyuanxiaoyao.service.sync.functions.type;
import com.github.benmanes.caffeine.cache.Caffeine;
import com.github.benmanes.caffeine.cache.LoadingCache;
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
import java.io.Serializable;
import java.util.Map;
import org.apache.flink.table.data.GenericRowData;
import org.apache.hudi.org.apache.avro.Schema;
/**
* 处理类型相关内容
*
* @author ZhangJiacheng
* @date 2023-07-20
*/
public interface TypeConverter extends Serializable {
LoadingCache<Integer, TypeConverter> CACHE = Caffeine.newBuilder()
.build(version -> {
switch (version) {
case 1:
return new TypeConverterV2();
case 0:
default:
return new TypeConverterV1();
}
});
static TypeConverter getInstance(TableMeta meta) {
return getInstance(meta.getVersion());
}
static TypeConverter getInstance(Integer version) {
if (version == null) {
version = 0;
}
return CACHE.get(version);
}
Schema convertToSchema(TableMeta meta);
GenericRowData convertToGenericRowData(TableMeta meta, Schema schema, Map<String, Object> data);
}

View File

@@ -0,0 +1,53 @@
package com.lanyuanxiaoyao.service.sync.functions.type;
import com.lanyuanxiaoyao.service.common.Constants;
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
import java.util.List;
import java.util.Map;
import org.apache.flink.table.data.GenericRowData;
import org.apache.flink.table.data.StringData;
import org.apache.hudi.org.apache.avro.Schema;
import org.apache.hudi.org.apache.avro.SchemaBuilder;
/**
* 第一版本的类型转换
* 所有字段类型都是 String
*
* @author ZhangJiacheng
* @date 2023-07-20
*/
public class TypeConverterV1 implements TypeConverter {
@Override
public Schema convertToSchema(TableMeta meta) {
SchemaBuilder.FieldAssembler<Schema> fieldBuilder = SchemaBuilder.builder()
.record(meta.getTable())
.fields();
fieldBuilder.nullableBoolean(Constants.HUDI_DELETE_KEY_NAME, false);
fieldBuilder.nullableString(Constants.UNION_KEY_NAME, "");
meta.getFields().forEach(fieldMeta -> fieldBuilder.optionalString(fieldMeta.getName()));
fieldBuilder.nullableLong(Constants.UPDATE_TIMESTAMP_KEY_NAME, -1);
fieldBuilder.nullableString(Constants.LATEST_OPERATION_TIMESTAMP_KEY_NAME, "");
return fieldBuilder.endRecord();
}
@Override
public GenericRowData convertToGenericRowData(TableMeta meta, Schema schema, Map<String, Object> data) {
List<Schema.Field> fields = schema.getFields();
GenericRowData row = new GenericRowData(fields.size());
for (int index = 0; index < fields.size(); index++) {
Schema.Field field = fields.get(index);
// 如果是telepg的话字段名就要统一改成小写上游不规范下游擦屁股
Object value = data.getOrDefault(Constants.FIELD_COVERT.apply(meta, field.name()), null);
if (field.schema().getType().equals(Schema.Type.STRING)
|| (field.schema().isUnion() && field.schema().getTypes().contains(Schema.create(Schema.Type.STRING)))
|| value instanceof String) {
row.setField(index, StringData.fromString((String) value));
} else {
row.setField(index, value);
}
}
return row;
}
}

View File

@@ -0,0 +1,141 @@
package com.lanyuanxiaoyao.service.sync.functions.type;
import cn.hutool.core.util.ObjectUtil;
import com.lanyuanxiaoyao.service.common.Constants;
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
import java.math.BigDecimal;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.function.BiFunction;
import java.util.function.Function;
import java.util.regex.Pattern;
import org.apache.flink.table.data.DecimalData;
import org.apache.flink.table.data.GenericRowData;
import org.apache.flink.table.data.StringData;
import org.apache.hudi.org.apache.avro.JsonProperties;
import org.apache.hudi.org.apache.avro.LogicalTypes;
import org.apache.hudi.org.apache.avro.Schema;
import static org.apache.hudi.org.apache.avro.Schema.*;
/**
* 类型转换第二版本
*
* @author ZhangJiacheng
* @date 2023-07-20
*/
public class TypeConverterV2 implements TypeConverter {
public static final Schema NULL_SCHEMA = create(Type.NULL);
public static final Schema BOOLEAN_SCHEMA = create(Type.BOOLEAN);
public static final Schema INT_SCHEMA = create(Type.INT);
public static final Schema LONG_SCHEMA = create(Type.LONG);
public static final Schema FLOAT_SCHEMA = create(Type.FLOAT);
public static final Schema DOUBLE_SCHEMA = create(Type.DOUBLE);
public static final Schema STRING_SCHEMA = create(Type.STRING);
public static final Function<Integer, Schema> FIXED_SCHEMA = length -> createFixed("decimal_" + length, null, null, length);
public static final BiFunction<Integer, Integer, Schema> DECIMAL_SCHEMA = (length, scala) -> LogicalTypes.decimal(length, scala).addToSchema(FIXED_SCHEMA.apply(length));
public static final BiFunction<Integer, Integer, Schema> NULLABLE_DECIMAL_SCHEMA = (length, scala) -> createUnion(NULL_SCHEMA, DECIMAL_SCHEMA.apply(length, scala));
public static final Schema NULLABLE_BOOLEAN_SCHEMA = createUnion(NULL_SCHEMA, BOOLEAN_SCHEMA);
public static final Schema NULLABLE_INT_SCHEMA = createUnion(NULL_SCHEMA, INT_SCHEMA);
public static final Schema NULLABLE_LONG_SCHEMA = createUnion(NULL_SCHEMA, LONG_SCHEMA);
public static final Schema NULLABLE_FLOAT_SCHEMA = createUnion(NULL_SCHEMA, FLOAT_SCHEMA);
public static final Schema NULLABLE_DOUBLE_SCHEMA = createUnion(NULL_SCHEMA, DOUBLE_SCHEMA);
public static final Schema NULLABLE_STRING_SCHEMA = createUnion(NULL_SCHEMA, STRING_SCHEMA);
public static final Function<Integer, Schema> NULLABLE_FIXED_SCHEMA = length -> createUnion(NULL_SCHEMA, FIXED_SCHEMA.apply(length));
private static final Pattern BOOLEAN_REGEX = Pattern.compile("^boolean|bool$");
private static final Pattern INT_REGEX = Pattern.compile("^(tinyint|smallint|int|smallserial|integer)(\\(\\d+\\))?$");
private static final Pattern LONG_REGEX = Pattern.compile("^(bigint unsigned)|((bigint|serial|long)(\\(\\d+\\))?)$");
private static final Pattern DATE_REGEX = Pattern.compile("^date|timestamp|timestamp without time zone|datetime|time$");
private static final Pattern FLOAT_REGEX = Pattern.compile("^float(\\(\\d+\\))?$");
private static final Pattern DOUBLE_REGEX = Pattern.compile("^double(\\(\\d+\\))?$");
private static final Pattern FIXED_REGEX = Pattern.compile("^(number|money|bigserial)(\\(\\d+\\))?$");
private static final Pattern DECIMAL_REGEX = Pattern.compile("^(double precision)|(decimal(\\(\\s*\\d+\\s*(,\\s*\\d+\\s*)?\\))?)$");
private static final Pattern NUMERIC_REGEX = Pattern.compile("^numeric(\\(\\s*\\d+\\s*(,\\s*\\d+\\s*)?\\))?$");
private static final Pattern STRING_REGEX = Pattern.compile("^(character varying|(long|medium)text)|((varchar|char|text|clob|binary|bit)(\\(\\d+\\))?)$");
private static final Pattern YYYYMMDD = Pattern.compile("\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}");
private static final DateTimeFormatter YYYYMMDD_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
@Override
public Schema convertToSchema(TableMeta meta) {
List<Field> fields = new ArrayList<>(meta.getFields().size() + 4);
fields.add(new Field(Constants.HUDI_DELETE_KEY_NAME, BOOLEAN_SCHEMA, null, false));
fields.add(new Field(Constants.UNION_KEY_NAME, STRING_SCHEMA, null, ""));
for (TableMeta.FieldMeta field : meta.getFields()) {
fields.add(new Field(field.getName(), convertType(field.getType(), field.getLength(), field.getScala()), null, JsonProperties.NULL_VALUE));
}
fields.add(new Field(Constants.UPDATE_TIMESTAMP_KEY_NAME, LONG_SCHEMA, null, -1));
fields.add(new Field(Constants.LATEST_OPERATION_TIMESTAMP_KEY_NAME, STRING_SCHEMA, null, ""));
return Schema.createRecord(meta.getTable(), null, null, false, fields);
}
private Schema convertType(String type, Long length, Integer scala) {
type = type.trim().toLowerCase();
if (BOOLEAN_REGEX.matcher(type).matches()) {
return NULLABLE_BOOLEAN_SCHEMA;
} else if (STRING_REGEX.matcher(type).matches() || DATE_REGEX.matcher(type).matches()) {
return NULLABLE_STRING_SCHEMA;
} else if (INT_REGEX.matcher(type).matches()) {
return NULLABLE_INT_SCHEMA;
} else if (LONG_REGEX.matcher(type).matches()) {
return NULLABLE_LONG_SCHEMA;
} else if (FLOAT_REGEX.matcher(type).matches()) {
return NULLABLE_FLOAT_SCHEMA;
} else if (DOUBLE_REGEX.matcher(type).matches()) {
return NULLABLE_DOUBLE_SCHEMA;
} else if (FIXED_REGEX.matcher(type).matches()) {
return NULLABLE_DECIMAL_SCHEMA.apply(length.intValue(), 0);
} else if (DECIMAL_REGEX.matcher(type).matches() || NUMERIC_REGEX.matcher(type).matches()) {
if (ObjectUtil.isNull(scala)) {
return NULLABLE_DECIMAL_SCHEMA.apply(length.intValue(), 6);
} else {
return NULLABLE_DECIMAL_SCHEMA.apply(length.intValue(), scala);
}
} else {
throw new RuntimeException(Constants.LOG_POINT_FIELD_TYPE_NOT_FOUND + " Cannot find correct type for source type: " + type + " length: " + length);
}
}
@Override
public GenericRowData convertToGenericRowData(TableMeta meta, Schema schema, Map<String, Object> data) {
List<Field> fields = schema.getFields();
GenericRowData row = new GenericRowData(fields.size());
for (int index = 0; index < fields.size(); index++) {
Field field = fields.get(index);
Object value = data.getOrDefault(Constants.FIELD_COVERT.apply(meta, field.name()), null);
row.setField(index, covertValue(field.schema(), value));
}
return row;
}
private Object covertValue(Schema schema, Object value) {
if (ObjectUtil.isNull(value)) {
return value;
} else if (NULLABLE_BOOLEAN_SCHEMA.equals(schema) || BOOLEAN_SCHEMA.equals(schema)) {
return value instanceof String ? Boolean.valueOf((String) value) : value;
} else if (NULLABLE_INT_SCHEMA.equals(schema) || INT_SCHEMA.equals(schema)) {
return value instanceof String ? Integer.valueOf((String) value) : value;
} else if (NULLABLE_LONG_SCHEMA.equals(schema) || LONG_SCHEMA.equals(schema)) {
return value instanceof String ? Long.valueOf((String) value) : value;
} else if (NULLABLE_FLOAT_SCHEMA.equals(schema) || FLOAT_SCHEMA.equals(schema)) {
return value instanceof String ? Float.valueOf((String) value) : value;
} else if (NULLABLE_DOUBLE_SCHEMA.equals(schema) || DOUBLE_SCHEMA.equals(schema)) {
return value instanceof String ? Double.valueOf((String) value) : value;
} else if (NULLABLE_STRING_SCHEMA.equals(schema) || STRING_SCHEMA.equals(schema)) {
return StringData.fromString((String) value);
} else {
for (Schema type : schema.getTypes()) {
if (type.getLogicalType() instanceof LogicalTypes.Decimal) {
LogicalTypes.Decimal decimalType = (LogicalTypes.Decimal) type.getLogicalType();
int precision = decimalType.getPrecision();
int scala = decimalType.getScale();
return DecimalData.fromBigDecimal(new BigDecimal((String) value), precision, scala);
}
}
return value;
}
}
}

View File

@@ -0,0 +1,123 @@
package com.lanyuanxiaoyao.service.sync.metrics;
import cn.hutool.core.collection.ListUtil;
import cn.hutool.http.HttpResponse;
import cn.hutool.http.HttpUtil;
import com.lanyuanxiaoyao.service.common.Constants;
import com.lanyuanxiaoyao.service.sync.configuration.GlobalConfiguration;
import com.lanyuanxiaoyao.service.sync.utils.JacksonUtils;
import dev.failsafe.Failsafe;
import dev.failsafe.RetryPolicy;
import java.time.Duration;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.atomic.LongAdder;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonProcessingException;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* 指标基础类
*
* @author ZhangJiacheng
* @date 2022-06-13
*/
public abstract class AbstractMetric implements Metric {
private static final Logger logger = LoggerFactory.getLogger(AbstractMetric.class);
private static final ObjectMapper MAPPER = JacksonUtils.getMapper();
private static final RetryPolicy<HttpResponse> PUBLISH_RETRY = RetryPolicy.<HttpResponse>builder()
.handle(Exception.class)
.withDelay(Duration.ofSeconds(1))
.withMaxAttempts(5)
.build();
private final GlobalConfiguration globalConfiguration;
private final List<String> lineCache = ListUtil.toList();
private final LongAdder autoPublishCount = new LongAdder();
private List<HttpMetricsRequest> requests = new ArrayList<>();
public AbstractMetric(GlobalConfiguration globalConfiguration) {
this.globalConfiguration = globalConfiguration;
}
public void setRequests(HttpMetricsRequest... requests) {
setRequests(ListUtil.toList(requests));
}
public void setRequests(List<HttpMetricsRequest> requests) {
this.requests = requests;
}
public void addRequest(HttpMetricsRequest request) {
this.requests.add(request);
}
public void addRequests(HttpMetricsRequest... requests) {
addRequests(ListUtil.toList(requests));
}
public void addRequests(List<HttpMetricsRequest> requests) {
this.requests.addAll(requests);
}
@Override
public void addTag(String key, String value) {
requests.forEach(request -> request.addTag(key, value));
}
@Override
public void makePoint(boolean autoPublish, int batch) {
if (!globalConfiguration.getMetricEnable()) {
return;
}
if (autoPublish) {
if (autoPublishCount.sum() >= batch) {
publish();
}
autoPublishCount.increment();
}
makePoint();
}
public synchronized void publish() {
if (!globalConfiguration.getMetricEnable()) {
return;
}
try {
requests.stream()
.filter(request -> !request.isEmpty())
.map(request -> {
try {
String data = MAPPER.writeValueAsString(request);
request.clear();
return data;
} catch (JsonProcessingException e) {
logger.warn("Parse metrics failure: " + request, e);
}
return null;
})
.filter(Objects::nonNull)
.forEach(lineCache::add);
if (lineCache.isEmpty()) {
return;
}
String lines = String.join("\n", lineCache);
logger.debug("Push metrics: \n{}", lines);
HttpResponse response = Failsafe.with(PUBLISH_RETRY)
.get(() -> HttpUtil.createPost(globalConfiguration.getMetricPublishUrl())
.body(lines)
.basicAuth(Constants.VICTORIA_USERNAME, Constants.VICTORIA_PASSWORD)
.timeout(globalConfiguration.getMetricPublishTimeout())
.execute());
if (response.isOk()) {
logger.debug("Metrics push success");
}
} catch (Throwable throwable) {
logger.warn("Push metrics failure, url: " + globalConfiguration.getMetricPublishUrl(), throwable);
} finally {
lineCache.clear();
autoPublishCount.reset();
}
}
}

View File

@@ -0,0 +1,67 @@
package com.lanyuanxiaoyao.service.sync.metrics;
import cn.hutool.core.collection.ListUtil;
import cn.hutool.core.map.MapUtil;
import com.lanyuanxiaoyao.service.common.entity.FlinkJob;
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
import com.lanyuanxiaoyao.service.sync.configuration.GlobalConfiguration;
import com.lanyuanxiaoyao.service.sync.utils.MetricsUtils;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.LongAdder;
/**
* 基础类
*
* @author ZhangJiacheng
* @date 2022-06-13
*/
public class CountMetric extends AbstractMetric {
private final LongAdder count = new LongAdder();
private final HttpMetricsRequest countMetrics;
public CountMetric(GlobalConfiguration globalConfiguration, String name) {
this(globalConfiguration, name, MapUtil.empty());
}
public CountMetric(GlobalConfiguration globalConfiguration, String name, FlinkJob job, TableMeta meta) {
this(globalConfiguration, name, MapUtil.builder(MetricsUtils.commonTags(job, meta)).build());
}
public CountMetric(GlobalConfiguration globalConfiguration, String name, FlinkJob job, TableMeta meta, String extraTagKey, String extraTagValue) {
this(globalConfiguration, name, job, meta, MapUtil.of(extraTagKey, extraTagValue));
}
public CountMetric(GlobalConfiguration globalConfiguration, String name, FlinkJob job, TableMeta meta, Map<String, String> tags) {
this(globalConfiguration, name, MapUtil.builder(MetricsUtils.commonTags(job, meta))
.putAll(tags)
.build());
}
public CountMetric(GlobalConfiguration globalConfiguration, String name, Map<String, String> tags) {
super(globalConfiguration);
countMetrics = new HttpMetricsRequest(
name + "_count",
MapUtil.<String, String>builder().putAll(tags).build()
);
setRequests(countMetrics);
}
public void increment() {
count.increment();
}
@Override
public void makePoint() {
double count = this.count.doubleValue();
if (count != 0) {
countMetrics.addMetric(count);
}
}
@Override
public List<HttpMetricsRequest> getMetrics() {
return ListUtil.toList(countMetrics);
}
}

View File

@@ -0,0 +1,88 @@
package com.lanyuanxiaoyao.service.sync.metrics;
import cn.hutool.core.collection.ListUtil;
import cn.hutool.core.map.MapUtil;
import java.beans.Transient;
import java.io.Serializable;
import java.time.Instant;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
/**
* 指标实体类
*
* @author ZhangJiacheng
* @date 2022-06-13
*/
public class HttpMetricsRequest implements Serializable {
private final Map<String, String> metric;
private final List<Double> values;
private final List<Long> timestamps;
private final Lock lock = new ReentrantLock();
public HttpMetricsRequest(String name, Map<String, String> metrics) {
this.metric = MapUtil.<String, String>builder()
.put("__name__", name)
.build();
this.metric.putAll(metrics);
this.values = Collections.synchronizedList(ListUtil.list(true));
this.timestamps = Collections.synchronizedList(ListUtil.list(true));
}
public void addTag(String key, String value) {
this.metric.put(key, value);
}
public void addMetric(Double value) {
addMetric(value, Instant.now().toEpochMilli());
}
public void addMetric(Double value, Long timestamp) {
synchronized (this) {
values.add(value);
timestamps.add(timestamp);
}
}
public void clear() {
synchronized (this) {
this.values.clear();
this.timestamps.clear();
}
}
@Transient
public boolean isEmpty() {
return this.values.isEmpty() && this.timestamps.isEmpty();
}
@Transient
public boolean isNonEmpty() {
return !isEmpty();
}
public Map<String, String> getMetric() {
return metric;
}
public List<Double> getValues() {
return values;
}
public List<Long> getTimestamps() {
return timestamps;
}
@Override
public String toString() {
return "MetricsItem{" +
"metrics=" + metric +
", values=" + values +
", timestamps=" + timestamps +
'}';
}
}

View File

@@ -0,0 +1,77 @@
package com.lanyuanxiaoyao.service.sync.metrics;
import cn.hutool.core.collection.ListUtil;
import cn.hutool.core.map.MapUtil;
import com.lanyuanxiaoyao.service.common.entity.FlinkJob;
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
import com.lanyuanxiaoyao.service.sync.configuration.GlobalConfiguration;
import com.lanyuanxiaoyao.service.sync.utils.MetricsUtils;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.DoubleAdder;
import java.util.concurrent.atomic.LongAdder;
/**
* 基础类
*
* @author ZhangJiacheng
* @date 2022-06-13
*/
public class MessageSizeSizeMetric extends AbstractMetric {
private final LongAdder count = new LongAdder();
private final DoubleAdder size = new DoubleAdder();
private final HttpMetricsRequest sizeMetrics;
private final HttpMetricsRequest perMessageSizeMetrics;
public MessageSizeSizeMetric(GlobalConfiguration globalConfiguration, String name) {
this(globalConfiguration, name, MapUtil.empty());
}
public MessageSizeSizeMetric(GlobalConfiguration globalConfiguration, String name, FlinkJob job, TableMeta meta) {
this(globalConfiguration, name, MapUtil.builder(MetricsUtils.commonTags(job, meta)).build());
}
public MessageSizeSizeMetric(GlobalConfiguration globalConfiguration, String name, FlinkJob job, TableMeta meta, String extraTagKey, String extraTagValue) {
this(globalConfiguration, name, job, meta, MapUtil.of(extraTagKey, extraTagValue));
}
public MessageSizeSizeMetric(GlobalConfiguration globalConfiguration, String name, FlinkJob job, TableMeta meta, Map<String, String> tags) {
this(globalConfiguration, name, MapUtil.builder(MetricsUtils.commonTags(job, meta))
.putAll(tags)
.build());
}
public MessageSizeSizeMetric(GlobalConfiguration globalConfiguration, String name, Map<String, String> tags) {
super(globalConfiguration);
sizeMetrics = new HttpMetricsRequest(
name + "_total",
MapUtil.<String, String>builder().putAll(tags).build()
);
perMessageSizeMetrics = new HttpMetricsRequest(
name + "_per_message",
MapUtil.<String, String>builder().putAll(tags).build()
);
setRequests(sizeMetrics, perMessageSizeMetrics);
}
public void increment(long size) {
this.count.increment();
this.size.add(size);
}
@Override
public void makePoint() {
double count = this.count.doubleValue();
double size = this.size.doubleValue();
if (size != 0 && count != 0) {
sizeMetrics.addMetric(size);
perMessageSizeMetrics.addMetric(size / count);
}
}
@Override
public List<HttpMetricsRequest> getMetrics() {
return ListUtil.toList(sizeMetrics);
}
}

View File

@@ -0,0 +1,20 @@
package com.lanyuanxiaoyao.service.sync.metrics;
import java.io.Serializable;
import java.util.List;
/**
* 指标类定义
*
* @author ZhangJiacheng
* @date 2022-06-13
*/
public interface Metric extends Serializable {
void addTag(String key, String value);
void makePoint(boolean autoPublish, int batch);
void makePoint();
List<HttpMetricsRequest> getMetrics();
}

View File

@@ -0,0 +1,87 @@
package com.lanyuanxiaoyao.service.sync.metrics;
import cn.hutool.core.collection.ListUtil;
import cn.hutool.core.map.MapUtil;
import com.lanyuanxiaoyao.service.common.entity.FlinkJob;
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
import com.lanyuanxiaoyao.service.sync.configuration.GlobalConfiguration;
import com.lanyuanxiaoyao.service.sync.utils.MetricsUtils;
import java.time.Duration;
import java.time.Instant;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.LongAdder;
/**
* 基础类
*
* @author ZhangJiacheng
* @date 2022-06-13
*/
public class RateMetric extends AbstractMetric {
private final LongAdder count = new LongAdder();
private final HttpMetricsRequest countMetrics;
private final HttpMetricsRequest millisecondMetrics;
private final HttpMetricsRequest perSecondMetrics;
private final Instant startInstant;
public RateMetric(GlobalConfiguration globalConfiguration, String name) {
this(globalConfiguration, name, MapUtil.empty());
}
public RateMetric(GlobalConfiguration globalConfiguration, String name, FlinkJob job, TableMeta meta) {
this(globalConfiguration, name, MapUtil.builder(MetricsUtils.commonTags(job, meta)).build());
}
public RateMetric(GlobalConfiguration globalConfiguration, String name, FlinkJob job, TableMeta meta, String extraTagKey, String extraTagValue) {
this(globalConfiguration, name, job, meta, MapUtil.of(extraTagKey, extraTagValue));
}
public RateMetric(GlobalConfiguration globalConfiguration, String name, FlinkJob job, TableMeta meta, Map<String, String> tags) {
this(globalConfiguration, name, MapUtil.builder(MetricsUtils.commonTags(job, meta))
.putAll(tags)
.build());
}
public RateMetric(GlobalConfiguration globalConfiguration, String name, Map<String, String> tags) {
super(globalConfiguration);
startInstant = Instant.now();
countMetrics = new HttpMetricsRequest(
name + "_count",
MapUtil.<String, String>builder().putAll(tags).build()
);
millisecondMetrics = new HttpMetricsRequest(
name + "_millisecond",
MapUtil.<String, String>builder().putAll(tags).build()
);
perSecondMetrics = new HttpMetricsRequest(
name + "_per_millisecond",
MapUtil.<String, String>builder().putAll(tags).build()
);
setRequests(countMetrics, millisecondMetrics, perSecondMetrics);
}
public void increment() {
count.increment();
}
@Override
public void makePoint() {
double count = this.count.doubleValue();
if (count != 0) {
long millis = Duration.between(startInstant, Instant.now()).toMillis();
countMetrics.addMetric(count);
millisecondMetrics.addMetric((double) millis);
perSecondMetrics.addMetric(count / millis);
}
}
@Override
public List<HttpMetricsRequest> getMetrics() {
return ListUtil.toList(countMetrics, millisecondMetrics, perSecondMetrics);
}
}

View File

@@ -0,0 +1,98 @@
package com.lanyuanxiaoyao.service.sync.utils;
import com.lanyuanxiaoyao.service.common.Constants;
import com.lanyuanxiaoyao.service.common.entity.FlinkJob;
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
import com.lanyuanxiaoyao.service.common.exception.MissingArgumentException;
import java.util.List;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonProcessingException;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.type.TypeReference;
/**
* 入参解析相关内容
*
* @author ZhangJiacheng
* @date 2022-03-10
*/
public class ArgumentsUtils {
public static long getJobId(String[] args) throws MissingArgumentException {
ParameterTool argsTool = ParameterTool.fromArgs(args);
if (!argsTool.has(Constants.JOB_ID)) {
throw new MissingArgumentException(Constants.JOB_ID);
}
return argsTool.getLong(Constants.JOB_ID);
}
public static String getTable(String[] args) throws MissingArgumentException {
ParameterTool argsTool = ParameterTool.fromArgs(args);
if (!argsTool.has(Constants.TABLE_NAME)) {
throw new MissingArgumentException(Constants.TABLE_NAME);
}
return argsTool.get(Constants.TABLE_NAME);
}
public static Boolean getServiceMode(String[] args) throws Exception {
ParameterTool argsTool = ParameterTool.fromArgs(args);
if (!argsTool.has(Constants.SERVICE_MODE)) {
return false;
}
return argsTool.getBoolean(Constants.SERVICE_MODE);
}
public static String getMessageId(String[] args) throws MissingArgumentException {
ParameterTool argsTool = ParameterTool.fromArgs(args);
if (!argsTool.has(Constants.MESSAGE_ID)) {
throw new MissingArgumentException(Constants.MESSAGE_ID);
}
return argsTool.get(Constants.MESSAGE_ID);
}
public static TableMeta getTableMeta(String[] args) throws Exception {
ParameterTool argsTool = ParameterTool.fromArgs(args);
if (!argsTool.has(Constants.TABLE_META)) {
throw new MissingArgumentException(Constants.TABLE_META);
}
return JacksonUtils.getMapper().readValue(argsTool.get(Constants.TABLE_META), TableMeta.class);
}
public static List<TableMeta> getTableMetaList(String[] args) throws Exception {
ParameterTool argsTool = ParameterTool.fromArgs(args);
if (!argsTool.has(Constants.TABLE_META_LIST)) {
throw new MissingArgumentException(Constants.TABLE_META_LIST);
}
return JacksonUtils.getMapper().readValue(argsTool.get(Constants.TABLE_META_LIST), new TypeReference<List<TableMeta>>() {});
}
public static FlinkJob getFlinkJob(String[] args) throws MissingArgumentException, JsonProcessingException {
ParameterTool argsTool = ParameterTool.fromArgs(args);
if (!argsTool.has(Constants.FLINK_JOB)) {
throw new MissingArgumentException(Constants.FLINK_JOB);
}
return JacksonUtils.getMapper().readValue(argsTool.get(Constants.FLINK_JOB), FlinkJob.class);
}
public static String getInstants(String[] args) throws JsonProcessingException {
ParameterTool argsTool = ParameterTool.fromArgs(args);
if (!argsTool.has(Constants.INSTANTS)) {
return "";
}
return argsTool.get(Constants.INSTANTS);
}
public static Boolean getBetaMode(String[] args) {
ParameterTool argsTool = ParameterTool.fromArgs(args);
if (!argsTool.has(Constants.BETA)) {
return false;
}
return argsTool.getBoolean(Constants.BETA);
}
public static String getCluster(String[] args) {
ParameterTool argsTool = ParameterTool.fromArgs(args);
if (!argsTool.has(Constants.CLUSTER)) {
return "";
}
return argsTool.get(Constants.CLUSTER);
}
}

View File

@@ -0,0 +1,22 @@
package com.lanyuanxiaoyao.service.sync.utils;
import com.lanyuanxiaoyao.service.common.Constants;
/**
* 环境变量相关参数
*
* @author ZhangJiacheng
* @date 2022-06-21
*/
public class EnvUtils {
public static void setEnv() {
}
public static void setCompactionEnv() {
System.setProperty(Constants.METRICS_LABEL_RUN_TYPE, Constants.METRICS_RUN_TYPE_COMPACTION);
}
public static void setSyncEnv() {
System.setProperty(Constants.METRICS_LABEL_RUN_TYPE, Constants.METRICS_RUN_TYPE_SYNC);
}
}

View File

@@ -0,0 +1,31 @@
package com.lanyuanxiaoyao.service.sync.utils;
import cn.hutool.core.util.ObjectUtil;
import java.io.Serializable;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.DeserializationFeature;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.MapperFeature;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Json 解析相关工具
*
* @author ZhangJiacheng
* @date 2022-06-12
*/
public class JacksonUtils implements Serializable {
private static final Logger logger = LoggerFactory.getLogger(JacksonUtils.class);
private static ObjectMapper INSTANCE = null;
public static ObjectMapper getMapper() {
if (ObjectUtil.isNull(INSTANCE)) {
INSTANCE = new ObjectMapper();
INSTANCE.configure(MapperFeature.ACCEPT_CASE_INSENSITIVE_PROPERTIES, true);
INSTANCE.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
INSTANCE.configure(DeserializationFeature.FAIL_ON_IGNORED_PROPERTIES, false);
}
return INSTANCE;
}
}

View File

@@ -0,0 +1,29 @@
package com.lanyuanxiaoyao.service.sync.utils;
import cn.hutool.core.util.ObjectUtil;
import cn.hutool.core.util.RandomUtil;
import com.lanyuanxiaoyao.service.sync.configuration.GlobalConfiguration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* 动态平衡获取指标 url
*
* @author ZhangJiacheng
* @date 2022-07-05
*/
public class LoadBalance {
private static final Logger logger = LoggerFactory.getLogger(LoadBalance.class);
private static String[] urls = null;
private static int length = 0;
public static String getCustomPublishUrl(GlobalConfiguration globalConfiguration) {
if (ObjectUtil.isNull(urls)) {
urls = globalConfiguration.getMetricsPublishCustomUrl().split(",");
length = urls.length;
}
String url = urls[RandomUtil.randomInt(length)];
logger.info("Random url: {}", url);
return url;
}
}

View File

@@ -0,0 +1,64 @@
package com.lanyuanxiaoyao.service.sync.utils;
import cn.hutool.core.collection.ListUtil;
import cn.hutool.core.map.MapUtil;
import com.lanyuanxiaoyao.service.common.Constants;
import com.lanyuanxiaoyao.service.common.entity.FlinkJob;
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
import com.lanyuanxiaoyao.service.sync.configuration.GlobalConfiguration;
import com.lanyuanxiaoyao.service.sync.metrics.AbstractMetric;
import java.io.Serializable;
import java.util.List;
import java.util.Map;
import java.util.Timer;
import java.util.TimerTask;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* 指标工具
*
* @author ZhangJiacheng
* @date 2022-06-12
*/
public class MetricsUtils implements Serializable {
private static final Logger logger = LoggerFactory.getLogger(MetricsUtils.class);
public static Map<String, String> commonTags(FlinkJob job, TableMeta meta) {
return MapUtil.<String, String>builder()
.put(Constants.METRICS_LABEL_FLINK_JOB_ID, job.getId().toString())
.put(Constants.METRICS_LABEL_FLINK_JOB_NAME, job.getName())
.put(Constants.METRICS_LABEL_SCHEMA, meta.getSchema())
.put(Constants.METRICS_LABEL_TABLE, meta.getTable())
.put(Constants.METRICS_LABEL_ALIAS, meta.getAlias())
.build();
}
@SafeVarargs
public static <T extends AbstractMetric> void createMakePointTimer(GlobalConfiguration globalConfiguration, T... metrics) {
createMakePointTimer(globalConfiguration, ListUtil.toList(metrics));
}
public static <T extends AbstractMetric> void createMakePointTimer(GlobalConfiguration globalConfiguration, List<T> metrics) {
logger.info("Create timer: {}", metrics);
new Timer().schedule(new TimerTask() {
@Override
public void run() {
for (AbstractMetric metric : metrics) {
metric.makePoint(true, globalConfiguration.getMetricPublishBatch());
}
}
}, globalConfiguration.getMetricPublishDelay(), globalConfiguration.getMetricPublishPeriod());
}
@SafeVarargs
public static <T extends AbstractMetric> void publishAllMetrics(T... metrics) {
publishAllMetrics(ListUtil.toList(metrics));
}
public static <T extends AbstractMetric> void publishAllMetrics(List<T> metrics) {
for (AbstractMetric metric : metrics) {
metric.publish();
}
}
}

View File

@@ -0,0 +1,271 @@
package com.lanyuanxiaoyao.service.sync.utils;
import cn.hutool.core.util.EnumUtil;
import cn.hutool.core.util.ObjectUtil;
import cn.hutool.core.util.StrUtil;
import cn.hutool.http.HttpUtil;
import com.lanyuanxiaoyao.service.common.Constants;
import com.lanyuanxiaoyao.service.common.entity.FlinkJob;
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
import com.lanyuanxiaoyao.service.sync.configuration.GlobalConfiguration;
import com.lanyuanxiaoyao.service.sync.configuration.RetryPolicyProvider;
import dev.failsafe.Failsafe;
import java.time.Instant;
import java.util.Map;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* 状态输出
*
* @author ZhangJiacheng
* @date 2022-07-05
*/
public class StatusUtils {
private static final Logger logger = LoggerFactory.getLogger(StatusUtils.class);
private static final ObjectMapper MAPPER = JacksonUtils.getMapper();
private static final int HTTP_TIMEOUT = (int) Constants.MINUTE;
public static void syncStart(GlobalConfiguration configuration, FlinkJob flinkJob, TableMeta tableMeta) {
logger.info("Enter method: syncStart[configuration, flinkJob, tableMeta]. " + "configuration:" + configuration + "," + "flinkJob:" + flinkJob + "," + "tableMeta:" + tableMeta);
try {
Failsafe.with(RetryPolicyProvider.HTTP_RETRY)
.run(() ->
HttpUtil.createGet(
StrUtil.format(
"{}/api/sync_start?flink_job_id={}&alias={}&database={}&schema={}&table={}&cluster={}&application_id={}",
LoadBalance.getCustomPublishUrl(configuration),
flinkJob.getId(),
tableMeta.getAlias(),
tableMeta.getSource(),
tableMeta.getSchema(),
tableMeta.getTable(),
configuration.getCluster(),
configuration.getApplicationId()
)
)
.header(Constants.API_HEADER_NAME, Constants.API_VERSION)
.basicAuth(Constants.SPRING_SECURITY_USERNAME, Constants.SPRING_SECURITY_PASSWORD_PLAIN)
.timeout(HTTP_TIMEOUT)
.execute()
);
} catch (Exception e) {
logger.warn("sync start metrics submit failure");
}
}
public static void syncCheckpoint(GlobalConfiguration configuration, FlinkJob flinkJob, TableMeta tableMeta, String messageId, Long publishTime) {
logger.info("Enter method: syncCheckpoint[configuration, flinkJob, tableMeta, messageId, publishTime]. " + "configuration:" + configuration + "," + "flinkJob:" + flinkJob + "," + "tableMeta:" + tableMeta + "," + "messageId:" + messageId + "," + "publishTime:" + publishTime);
try {
Failsafe.with(RetryPolicyProvider.HTTP_RETRY)
.run(() -> HttpUtil.createGet(
StrUtil.format(
"{}/api/sync_checkpoint_state?flink_job_id={}&alias={}&message_id={}&publish_time={}",
LoadBalance.getCustomPublishUrl(configuration),
flinkJob.getId(),
tableMeta.getAlias(),
messageId,
publishTime
)
)
.header(Constants.API_HEADER_NAME, Constants.API_VERSION)
.basicAuth(Constants.SPRING_SECURITY_USERNAME, Constants.SPRING_SECURITY_PASSWORD_PLAIN)
.timeout(HTTP_TIMEOUT)
.execute()
);
} catch (Exception e) {
logger.warn("sync checkpoint metrics submit failure");
}
}
public static void syncOperation(GlobalConfiguration configuration, FlinkJob flinkJob, TableMeta tableMeta) {
syncOperation(configuration, flinkJob, tableMeta, null);
}
public static void syncOperation(GlobalConfiguration configuration, FlinkJob flinkJob, TableMeta tableMeta, Long operationTime) {
logger.info("Enter method: syncOperation[configuration, flinkJob, tableMeta, operationTime]. " + "configuration:" + configuration + "," + "flinkJob:" + flinkJob + "," + "tableMeta:" + tableMeta + "," + "operationTime:" + operationTime);
try {
Failsafe.with(RetryPolicyProvider.HTTP_RETRY)
.run(() -> {
if (ObjectUtil.isNull(operationTime)) {
HttpUtil.createGet(
StrUtil.format(
"{}/api/sync_operation_state?flink_job_id={}&alias={}",
LoadBalance.getCustomPublishUrl(configuration),
flinkJob.getId(),
tableMeta.getAlias()
)
)
.header(Constants.API_HEADER_NAME, Constants.API_VERSION)
.basicAuth(Constants.SPRING_SECURITY_USERNAME, Constants.SPRING_SECURITY_PASSWORD_PLAIN)
.timeout(HTTP_TIMEOUT)
.execute();
} else {
HttpUtil.createGet(
StrUtil.format(
"{}/api/sync_operation_state?flink_job_id={}&alias={}&operation_time={}",
LoadBalance.getCustomPublishUrl(configuration),
flinkJob.getId(),
tableMeta.getAlias(),
operationTime
)
)
.header(Constants.API_HEADER_NAME, Constants.API_VERSION)
.basicAuth(Constants.SPRING_SECURITY_USERNAME, Constants.SPRING_SECURITY_PASSWORD_PLAIN)
.timeout(HTTP_TIMEOUT)
.execute();
}
});
} catch (Exception e) {
logger.warn("sync operation metrics submit failure");
}
}
public static void compactionStart(GlobalConfiguration configuration, FlinkJob flinkJob, TableMeta tableMeta) {
logger.info("Enter method: compactionStart[configuration, flinkJob, tableMeta]. " + "configuration:" + configuration + "," + "flinkJob:" + flinkJob + "," + "tableMeta:" + tableMeta);
try {
Failsafe.with(RetryPolicyProvider.HTTP_RETRY)
.run(() ->
HttpUtil.createGet(
StrUtil.format(
"{}/api/compaction_start?flink_job_id={}&alias={}&type={}&cluster={}&application_id={}",
LoadBalance.getCustomPublishUrl(configuration),
flinkJob.getId(),
tableMeta.getAlias(),
EnumUtil.toString(tableMeta.getSourceType()),
configuration.getCluster(),
configuration.getApplicationId()
)
)
.header(Constants.API_HEADER_NAME, Constants.API_VERSION)
.basicAuth(Constants.SPRING_SECURITY_USERNAME, Constants.SPRING_SECURITY_PASSWORD_PLAIN)
.timeout(HTTP_TIMEOUT)
.execute()
);
} catch (Exception e) {
logger.warn("compaction start metrics submit failure");
}
}
public static void compactionPreCommit(GlobalConfiguration configuration, FlinkJob flinkJob, TableMeta tableMeta, String instant, Map<String, Long> metadata) {
logger.info("Enter method: compactionPreCommit[configuration, flinkJob, tableMeta, instant, metadata]. " + "configuration:" + configuration + "," + "flinkJob:" + flinkJob + "," + "tableMeta:" + tableMeta + "," + "instant:" + instant + "," + "metadata:" + metadata);
try {
Failsafe.with(RetryPolicyProvider.HTTP_RETRY)
.run(() -> HttpUtil.createPost(
StrUtil.format(
"{}/api/compaction_pre_commit?flink_job_id={}&alias={}&instant={}&cluster={}&application_id={}",
LoadBalance.getCustomPublishUrl(configuration),
flinkJob.getId(),
tableMeta.getAlias(),
instant,
configuration.getCluster(),
configuration.getApplicationId()
)
)
.header(Constants.API_HEADER_NAME, Constants.API_VERSION)
.basicAuth(Constants.SPRING_SECURITY_USERNAME, Constants.SPRING_SECURITY_PASSWORD_PLAIN)
.body(MAPPER.writeValueAsString(metadata))
.timeout(HTTP_TIMEOUT)
.execute()
);
} catch (Exception e) {
logger.warn("compaction pre commit metrics submit failure");
}
}
public static void compactionCommit(GlobalConfiguration configuration, FlinkJob flinkJob, TableMeta tableMeta, String instant, HoodieCommitMetadata metadata) {
logger.info("Enter method: compactionCommit[configuration, flinkJob, tableMeta, instant, metadata]. " + "configuration:" + configuration + "," + "flinkJob:" + flinkJob + "," + "tableMeta:" + tableMeta + "," + "instant:" + instant + "," + "metadata:" + metadata);
try {
Failsafe.with(RetryPolicyProvider.HTTP_RETRY)
.run(() -> HttpUtil.createPost(
StrUtil.format(
"{}/api/compaction_commit?flink_job_id={}&alias={}&instant={}&cluster={}&application_id={}",
LoadBalance.getCustomPublishUrl(configuration),
flinkJob.getId(),
tableMeta.getAlias(),
instant,
configuration.getCluster(),
configuration.getApplicationId()
)
)
.header(Constants.API_HEADER_NAME, Constants.API_VERSION)
.basicAuth(Constants.SPRING_SECURITY_USERNAME, Constants.SPRING_SECURITY_PASSWORD_PLAIN)
.body(MAPPER.writeValueAsString(metadata))
.timeout(HTTP_TIMEOUT)
.execute()
);
} catch (Exception e) {
logger.warn("compaction commit metrics submit failure");
}
}
public static void compactionFinish(GlobalConfiguration configuration, FlinkJob flinkJob, TableMeta tableMeta, String message, Exception exception) {
logger.info("Enter method: compactionFinish[configuration, flinkJob, tableMeta, message, exception]. " + "configuration:" + configuration + "," + "flinkJob:" + flinkJob + "," + "tableMeta:" + tableMeta + "," + "message:" + message + "," + "exception:" + exception);
try {
Failsafe.with(RetryPolicyProvider.HTTP_RETRY)
.run(() -> {
boolean success = (exception == null);
HttpUtil.createPost(StrUtil.format(
"{}/api/compaction_finish?flink_job_id={}&alias={}&time={}&state={}",
LoadBalance.getCustomPublishUrl(configuration),
flinkJob.getId(),
tableMeta.getAlias(),
Instant.now().toEpochMilli(),
success
))
.header(Constants.API_HEADER_NAME, Constants.API_VERSION)
.basicAuth(Constants.SPRING_SECURITY_USERNAME, Constants.SPRING_SECURITY_PASSWORD_PLAIN)
.body(success ? message == null ? "" : message : exception.toString(), "text/plain")
.timeout(HTTP_TIMEOUT)
.execute()
.close();
});
} catch (Exception e) {
logger.warn("compaction finish metrics submit failure");
}
}
public static void versionUpdate(GlobalConfiguration configuration, FlinkJob flinkJob, TableMeta tableMeta, String version, String opts) {
logger.info("Enter method: versionUpdate[configuration, flinkJob, tableMeta, version, opts]. " + "configuration:" + configuration + "," + "flinkJob:" + flinkJob + "," + "tableMeta:" + tableMeta + "," + "version:" + version + "," + "opts:" + opts);
Failsafe.with(RetryPolicyProvider.HTTP_RETRY)
.run(() ->
HttpUtil.createGet(
StrUtil.format(
"{}/api/version_update?flink_job_id={}&alias={}&version={}&opts={}",
LoadBalance.getCustomPublishUrl(configuration),
flinkJob.getId(),
tableMeta.getAlias(),
version,
opts
)
)
.header(Constants.API_HEADER_NAME, Constants.API_VERSION)
.basicAuth(Constants.SPRING_SECURITY_USERNAME, Constants.SPRING_SECURITY_PASSWORD_PLAIN)
.timeout(HTTP_TIMEOUT)
.execute()
);
}
public static void compactionLatestOpTs(GlobalConfiguration configuration, FlinkJob flinkJob, TableMeta tableMeta, Long latestOpTs) {
logger.info("Enter method: compactionLatestOpTs[configuration, flinkJob, tableMeta, latestOpTs]. " + "configuration:" + configuration + "," + "flinkJob:" + flinkJob + "," + "tableMeta:" + tableMeta + "," + "latestOpTs:" + latestOpTs);
Failsafe.with(RetryPolicyProvider.HTTP_RETRY)
.run(() ->
HttpUtil.createGet(
StrUtil.format(
"{}/api/compaction_latest_operation_time?flink_job_id={}&alias={}&latest_op_ts={}",
LoadBalance.getCustomPublishUrl(configuration),
flinkJob.getId(),
tableMeta.getAlias(),
latestOpTs
)
)
.header(Constants.API_HEADER_NAME, Constants.API_VERSION)
.basicAuth(Constants.SPRING_SECURITY_USERNAME, Constants.SPRING_SECURITY_PASSWORD_PLAIN)
.timeout(HTTP_TIMEOUT)
.execute()
);
}
}

View File

@@ -0,0 +1,257 @@
package com.lanyuanxiaoyao.service.sync.utils;
import cn.hutool.core.collection.ListUtil;
import cn.hutool.core.util.EnumUtil;
import cn.hutool.core.util.StrUtil;
import com.lanyuanxiaoyao.service.common.Constants;
import com.lanyuanxiaoyao.service.common.entity.FlinkJob;
import com.lanyuanxiaoyao.service.common.entity.Record;
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
import com.lanyuanxiaoyao.service.common.utils.TableMetaHelper;
import com.lanyuanxiaoyao.service.sync.configuration.DefaultPartitionNameKeyGenerator;
import com.lanyuanxiaoyao.service.sync.configuration.GlobalConfiguration;
import com.lanyuanxiaoyao.service.sync.configuration.TraceOverwriteWithLatestAvroPayload;
import com.lanyuanxiaoyao.service.sync.configuration.TraceWriteStatus;
import com.lanyuanxiaoyao.service.sync.functions.OperationTypeFilter;
import com.lanyuanxiaoyao.service.sync.functions.Record2RowDataFunction;
import com.lanyuanxiaoyao.service.sync.functions.type.TypeConverter;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.types.logical.RowType;
import org.apache.hudi.common.model.HoodieCleaningPolicy;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
import org.apache.hudi.common.table.view.FileSystemViewStorageType;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.config.HoodieCompactionConfig;
import org.apache.hudi.config.HoodieStorageConfig;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.config.metrics.HoodieMetricsConfig;
import org.apache.hudi.config.metrics.HoodieMetricsVictoriaConfig;
import org.apache.hudi.configuration.FlinkOptions;
import org.apache.hudi.configuration.OptionsResolver;
import org.apache.hudi.index.HoodieIndex;
import org.apache.hudi.metrics.MetricsReporterType;
import org.apache.hudi.org.apache.avro.Schema;
import org.apache.hudi.sink.utils.Pipelines;
import org.apache.hudi.table.action.compact.strategy.UnBoundedCompactionStrategy;
import org.apache.hudi.util.AvroSchemaConverter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static com.lanyuanxiaoyao.service.common.Constants.HOUR;
/**
* Flink 相关的工具
*
* @author lanyuanxiaoyao
* @version 0.0.2
* @date 2022-04-20
*/
@SuppressWarnings("UnusedAssignment")
public class SyncUtils {
private static final Logger logger = LoggerFactory.getLogger(SyncUtils.class);
private static final long K = 1024;
private static final long M = 1024 * K;
private static final long G = 1024 * M;
public static Schema avroSchemaWithExtraFields(TableMeta meta) {
return TypeConverter.getInstance(meta).convertToSchema(meta);
}
public static Configuration getSyncFlinkConfiguration(GlobalConfiguration globalConfiguration, Configuration inputConfiguration, FlinkJob flinkJob, TableMeta tableMeta, Schema schema, Integer defaultParallelism) {
Configuration configuration = new Configuration();
if (inputConfiguration != null) {
configuration = inputConfiguration;
}
configuration.setBoolean(HoodieMetricsConfig.TURN_METRICS_ON.key(), false);
configuration.setString(HoodieMetricsConfig.METRICS_REPORTER_TYPE_VALUE.key(), MetricsReporterType.VICTORIA.name());
configuration.setString(HoodieMetricsVictoriaConfig.VICTORIA_ENDPOINT.key(), globalConfiguration.getMetricPublishPrometheusUrl());
configuration.setInteger(HoodieMetricsVictoriaConfig.VICTORIA_TIMEOUT.key(), 60000);
configuration.setBoolean(HoodieMetricsVictoriaConfig.VICTORIA_BASIC_AUTH_ENABLE.key(), true);
configuration.setString(HoodieMetricsVictoriaConfig.VICTORIA_BASIC_AUTH_USERNAME.key(), Constants.VICTORIA_USERNAME);
configuration.setString(HoodieMetricsVictoriaConfig.VICTORIA_BASIC_AUTH_PASSWORD.key(), Constants.VICTORIA_PASSWORD);
configuration.setString(HoodieMetricsVictoriaConfig.VICTORIA_TAGS.key(), ListUtil.toList(
Pair.of(Constants.METRICS_LABEL_RUN_TYPE, Constants.METRICS_RUN_TYPE_SYNC),
Pair.of(Constants.METRICS_LABEL_FLINK_JOB_ID, flinkJob.getId()),
Pair.of(Constants.METRICS_LABEL_FLINK_JOB_NAME, flinkJob.getName().replaceAll("\\s", "_")),
Pair.of(Constants.METRICS_LABEL_SCHEMA, tableMeta.getSchema()),
Pair.of(Constants.METRICS_LABEL_TABLE, tableMeta.getTable()),
Pair.of(Constants.METRICS_LABEL_ALIAS, tableMeta.getAlias())
).stream().map(pair -> StrUtil.format("{}={}", pair.getLeft(), pair.getRight())).collect(Collectors.joining(";")));
return getFlinkConfiguration(configuration, tableMeta, schema, defaultParallelism);
}
public static Configuration getCompactionFlinkConfiguration(GlobalConfiguration globalConfiguration, Configuration inputConfiguration, FlinkJob flinkJob, TableMeta tableMeta, Schema schema, Integer defaultParallelism) {
Configuration configuration = new Configuration();
if (inputConfiguration != null) {
configuration = inputConfiguration;
}
configuration.setBoolean(HoodieMetricsConfig.TURN_METRICS_ON.key(), false);
configuration.setString(HoodieMetricsConfig.METRICS_REPORTER_TYPE_VALUE.key(), MetricsReporterType.VICTORIA.name());
configuration.setString(HoodieMetricsVictoriaConfig.VICTORIA_ENDPOINT.key(), globalConfiguration.getMetricPublishPrometheusUrl());
configuration.setInteger(HoodieMetricsVictoriaConfig.VICTORIA_TIMEOUT.key(), 60000);
configuration.setBoolean(HoodieMetricsVictoriaConfig.VICTORIA_BASIC_AUTH_ENABLE.key(), true);
configuration.setString(HoodieMetricsVictoriaConfig.VICTORIA_BASIC_AUTH_USERNAME.key(), Constants.VICTORIA_USERNAME);
configuration.setString(HoodieMetricsVictoriaConfig.VICTORIA_BASIC_AUTH_PASSWORD.key(), Constants.VICTORIA_PASSWORD);
configuration.setString(HoodieMetricsVictoriaConfig.VICTORIA_TAGS.key(), ListUtil.toList(
Pair.of(Constants.METRICS_LABEL_RUN_TYPE, Constants.METRICS_RUN_TYPE_COMPACTION),
Pair.of(Constants.METRICS_LABEL_FLINK_JOB_ID, flinkJob.getId()),
Pair.of(Constants.METRICS_LABEL_FLINK_JOB_NAME, flinkJob.getName().replaceAll("\\s", "_")),
Pair.of(Constants.METRICS_LABEL_SCHEMA, tableMeta.getSchema()),
Pair.of(Constants.METRICS_LABEL_TABLE, tableMeta.getTable()),
Pair.of(Constants.METRICS_LABEL_ALIAS, tableMeta.getAlias())
).stream().map(pair -> StrUtil.format("{}={}", pair.getLeft(), pair.getRight())).collect(Collectors.joining(";")));
return getFlinkConfiguration(configuration, tableMeta, schema, defaultParallelism);
}
public static Configuration getFlinkConfiguration(Configuration inputConfiguration, TableMeta tableMeta, Schema schema, Integer defaultParallelism) {
Configuration configuration = new Configuration();
if (inputConfiguration != null) {
configuration = inputConfiguration;
}
String tableType = tableMeta.getHudi().getTargetTableType();
logger.info("Hudi table type: {}", tableMeta.getHudi().getTargetTableType());
// 基本信息
configuration.setString(FlinkOptions.TABLE_NAME, tableMeta.getHudi().getTargetTable());
configuration.setString(FlinkOptions.TABLE_TYPE, tableType);
configuration.setString(FlinkOptions.PATH, tableMeta.getHudi().getTargetHdfsPath());
configuration.setString(FlinkOptions.RECORD_KEY_FIELD, Constants.UNION_KEY_NAME);
configuration.setBoolean(FlinkOptions.PRE_COMBINE, false);
if (TableMetaHelper.existsTag(tableMeta, Constants.TAGS_PRE_COMBINE)) {
configuration.setBoolean(FlinkOptions.PRE_COMBINE, true);
}
configuration.setString(FlinkOptions.PRECOMBINE_FIELD, Constants.UPDATE_TIMESTAMP_KEY_NAME);
configuration.setString(FlinkOptions.SOURCE_AVRO_SCHEMA, schema.toString());
if (TableMetaHelper.existsTag(tableMeta, Constants.TAGS_NO_IGNORE_FAILED)) {
configuration.setBoolean(FlinkOptions.IGNORE_FAILED, false);
}
configuration.setString(FlinkOptions.PARTITION_DEFAULT_NAME, "default");
configuration.setString(FlinkOptions.KEYGEN_CLASS_NAME, DefaultPartitionNameKeyGenerator.class.getName());
Optional<String> partitionPath = TableMetaHelper.getPartitionField(tableMeta);
logger.info("Partition field: {}", partitionPath.orElse(""));
if (partitionPath.isPresent()) {
configuration.setString(FlinkOptions.PARTITION_PATH_FIELD, partitionPath.get());
}
if (TableMetaHelper.existsTag(tableMeta, Constants.TAGS_TRACE_LATEST_OP_TS)) {
logger.info("Enable trace latest op ts");
configuration.setString(FlinkOptions.PAYLOAD_CLASS_NAME, TraceOverwriteWithLatestAvroPayload.class.getName());
configuration.setString(HoodieWriteConfig.WRITE_STATUS_CLASS_NAME.key(), TraceWriteStatus.class.getName());
}
configuration.setBoolean(FlinkOptions.METADATA_ENABLED, false);
configuration.setInteger(HoodieStorageConfig.LOGFILE_DATA_BLOCK_MAX_SIZE.key(), Integer.MAX_VALUE);
configuration.setString(FileSystemViewStorageConfig.SECONDARY_VIEW_TYPE.key(), FileSystemViewStorageType.SPILLABLE_DISK.name());
// Write
configuration.setInteger(FlinkOptions.WRITE_TASKS, tableMeta.getHudi().getWriteTasks() == 0 ? defaultParallelism : tableMeta.getHudi().getWriteTasks());
configuration.setInteger(FlinkOptions.WRITE_MERGE_MAX_MEMORY, 0);
configuration.setDouble(FlinkOptions.WRITE_TASK_MAX_SIZE, tableMeta.getHudi().getWriteTaskMaxMemory() == 0 ? FlinkOptions.WRITE_TASK_MAX_SIZE.defaultValue() : tableMeta.getHudi().getWriteTaskMaxMemory());
configuration.setDouble(FlinkOptions.WRITE_BATCH_SIZE, tableMeta.getHudi().getWriteBatchSize() == 0 ? FlinkOptions.WRITE_BATCH_SIZE.defaultValue() : tableMeta.getHudi().getWriteBatchSize());
configuration.setLong(FlinkOptions.WRITE_RATE_LIMIT, tableMeta.getHudi().getWriteRateLimit());
configuration.setLong(FlinkOptions.WRITE_COMMIT_ACK_TIMEOUT, HOUR);
// 索引
configuration.setString(FlinkOptions.INDEX_TYPE, HoodieIndex.IndexType.BUCKET.name());
configuration.setInteger(FlinkOptions.BUCKET_INDEX_NUM_BUCKETS, tableMeta.getHudi().getBucketIndexNumber() == 0 ? 50 : tableMeta.getHudi().getBucketIndexNumber());
configuration.setString(FlinkOptions.INDEX_KEY_FIELD, Constants.UNION_KEY_NAME);
configuration.setBoolean(FlinkOptions.INDEX_BOOTSTRAP_ENABLED, false);
configuration.setBoolean(FlinkOptions.INDEX_GLOBAL_ENABLED, false);
configuration.setDouble(FlinkOptions.INDEX_STATE_TTL, -1);
// 增大 就 OOM
// configuration.setDouble(HoodieMemoryConfig.MAX_DFS_STREAM_BUFFER_SIZE.key(), 64 * M);
// 增大 就 OOM
// configuration.setDouble(HoodieStorageConfig.LOGFILE_DATA_BLOCK_MAX_SIZE.key(), 128 * M);
// Compaction
configuration.setBoolean(FlinkOptions.COMPACTION_ASYNC_ENABLED, false);
if (EnumUtil.equals(HoodieTableType.COPY_ON_WRITE, tableType)) {
configuration.setBoolean(FlinkOptions.COMPACTION_ASYNC_ENABLED, true);
}
configuration.setBoolean(FlinkOptions.COMPACTION_SCHEDULE_ENABLED, true);
configuration.setInteger(FlinkOptions.COMPACTION_TASKS, tableMeta.getHudi().getCompactionTasks());
configuration.setString(FlinkOptions.COMPACTION_TRIGGER_STRATEGY, StrUtil.isBlank(tableMeta.getHudi().getCompactionStrategy()) ? FlinkOptions.NUM_OR_TIME : tableMeta.getHudi().getCompactionStrategy());
configuration.setInteger(FlinkOptions.COMPACTION_MAX_MEMORY, 1024);
configuration.setInteger(FlinkOptions.COMPACTION_DELTA_SECONDS, tableMeta.getHudi().getCompactionDeltaSeconds() == 0 ? 15 * 60 : tableMeta.getHudi().getCompactionDeltaSeconds());
configuration.setInteger(FlinkOptions.COMPACTION_DELTA_COMMITS, tableMeta.getHudi().getCompactionDeltaCommits() == 0 ? 5 : tableMeta.getHudi().getCompactionDeltaCommits());
configuration.setString(HoodieCompactionConfig.COMPACTION_STRATEGY.key(), UnBoundedCompactionStrategy.class.getName());
// configuration.setString(HoodieCompactionConfig.COMPACTION_STRATEGY.key(), CombineAllCompactionStrategy.class.getName());
// configuration.setBoolean(FlinkOptions.COMPACTION_SCHEDULE_ENABLED, true);
// configuration.setInteger(FlinkOptions.COMPACTION_TASKS, tableMeta.getHudi().getCompactionTasks() == 0 ? defaultParallelism : tableMeta.getHudi().getCompactionTasks());
// configuration.setInteger(FlinkOptions.COMPACTION_MAX_MEMORY, tableMeta.getHudi().getCompactionMaxMemory());
// configuration.setString(FlinkOptions.COMPACTION_TRIGGER_STRATEGY, tableMeta.getHudi().getCompactionStrategy());
// configuration.setInteger(FlinkOptions.COMPACTION_DELTA_COMMITS, tableMeta.getHudi().getCompactionDeltaCommits());
// configuration.setInteger(FlinkOptions.COMPACTION_DELTA_SECONDS, tableMeta.getHudi().getCompactionDeltaSeconds());
// 时间线保留个数
configuration.setInteger(FlinkOptions.CLEAN_RETAIN_COMMITS, tableMeta.getHudi().getKeepCommitVersion());
// 时间线归档最小保留个数,要比上一个参数大
configuration.setInteger(FlinkOptions.ARCHIVE_MIN_COMMITS, tableMeta.getHudi().getKeepCommitVersion() + 50);
// 时间线归档最大保留个数,要比上一个参数大
configuration.setInteger(FlinkOptions.ARCHIVE_MAX_COMMITS, tableMeta.getHudi().getKeepCommitVersion() + 100);
// log文件和data文件保留版本数
configuration.setString(FlinkOptions.CLEAN_POLICY, HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS.name());
configuration.setInteger(FlinkOptions.CLEAN_RETAIN_FILE_VERSIONS, tableMeta.getHudi().getKeepFileVersion());
// 关闭一个内置的 http 服务
// configuration.setBoolean(HoodieWriteConfig.EMBEDDED_TIMELINE_SERVER_ENABLE.key(), false);
return configuration;
}
public static void sinkToHoodieByTable(GlobalConfiguration globalConfiguration, FlinkJob flinkJob, TableMeta tableMeta, StreamExecutionEnvironment environment, DataStream<Record> inputDataStream) {
Schema schema = avroSchemaWithExtraFields(tableMeta);
DataStream<RowData> dataStream = inputDataStream
.filter(new OperationTypeFilter(globalConfiguration, flinkJob, tableMeta))
.name("Count operation type")
.map(new Record2RowDataFunction(globalConfiguration, flinkJob, tableMeta), TypeInformation.of(new TypeHint<List<RowData>>() {
}))
.name("Covert Row ( " + tableMeta.getSchema() + "-" + tableMeta.getTable() + " )")
.flatMap((list, collector) -> list.forEach(collector::collect), TypeInformation.of(RowData.class))
.filter(Objects::nonNull)
.name("Filter not null");
RowType rowType = (RowType) AvroSchemaConverter.convertToDataType(schema).getLogicalType();
StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(
environment,
EnvironmentSettings.newInstance()
.inStreamingMode()
.useBlinkPlanner()
.build()
);
Configuration configuration = tableEnvironment.getConfig().getConfiguration();
int parallelism = configuration.getInteger("parallelism", 1);
configuration = getSyncFlinkConfiguration(globalConfiguration, configuration, flinkJob, tableMeta, schema, parallelism);
DataStream<HoodieRecord> hoodieRecordDataStream = Pipelines.bootstrap(configuration, rowType, parallelism, dataStream);
DataStream<Object> pipeline = Pipelines.hoodieStreamWrite(configuration, parallelism, hoodieRecordDataStream);
if (OptionsResolver.needsAsyncCompaction(configuration)) {
Pipelines.compact(configuration, pipeline);
} else {
Pipelines.clean(configuration, pipeline);
}
}
}

View File

@@ -0,0 +1,98 @@
package com.lanyuanxiaoyao.service.sync.utils;
import cn.hutool.core.util.ObjectUtil;
import com.lanyuanxiaoyao.service.common.Constants;
import com.lanyuanxiaoyao.service.common.entity.FlinkJob;
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
import com.lanyuanxiaoyao.service.common.utils.NameHelper;
import org.apache.flink.shaded.curator4.org.apache.curator.framework.CuratorFramework;
import org.apache.flink.shaded.curator4.org.apache.curator.framework.CuratorFrameworkFactory;
import org.apache.flink.shaded.curator4.org.apache.curator.framework.imps.CuratorFrameworkState;
import org.apache.flink.shaded.curator4.org.apache.curator.retry.ExponentialBackoffRetry;
import org.apache.flink.shaded.curator4.org.apache.curator.utils.CloseableUtils;
import org.apache.flink.shaded.zookeeper3.org.apache.zookeeper.CreateMode;
import org.apache.flink.shaded.zookeeper3.org.apache.zookeeper.KeeperException;
import org.apache.flink.shaded.zookeeper3.org.apache.zookeeper.client.ZooKeeperSaslClient;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Zk 操作
*
* @author ZhangJiacheng
* @date 2023-05-10
*/
public class ZkUtils {
private static final Logger logger = LoggerFactory.getLogger(ZkUtils.class);
private static CuratorFramework client;
private static void createClient(String url) {
System.setProperty(ZooKeeperSaslClient.ENABLE_CLIENT_SASL_KEY, "false");
if (ObjectUtil.isNull(client)) {
client = CuratorFrameworkFactory.builder()
.connectString(url)
.retryPolicy(new ExponentialBackoffRetry((int) (5 * Constants.SECOND), 5))
.sessionTimeoutMs((int) (10 * Constants.SECOND))
.connectionTimeoutMs((int) (10 * Constants.SECOND))
.build();
}
if (!CuratorFrameworkState.STARTED.equals(client.getState())) {
client.start();
}
}
public static void closeClient() {
if (ObjectUtil.isNotNull(client)) {
CloseableUtils.closeQuietly(client);
}
}
public static void createSynchronizerLock(FlinkJob job, String zookeeperUrl, String runMeta) {
createLock(zookeeperUrl, runMeta, NameHelper.syncRunningLockPath(job.getId()));
}
public static void createSynchronizerLock(FlinkJob job, TableMeta meta, String zookeeperUrl, String runMeta) {
createLock(zookeeperUrl, runMeta, NameHelper.syncRunningLockPath(job.getId(), meta.getAlias()));
}
public static void releaseSynchronizerLock(FlinkJob job, TableMeta meta) {
releaseLock(NameHelper.syncRunningLockPath(job.getId(), meta.getAlias()));
}
public static void createCompactionLock(FlinkJob job, TableMeta meta, String zookeeperUrl, String runMeta) {
createLock(zookeeperUrl, runMeta, NameHelper.compactionRunningLockPath(job.getId(), meta.getAlias()));
}
public static void releaseCompactionLock(FlinkJob job, TableMeta meta) {
releaseLock(NameHelper.compactionRunningLockPath(job.getId(), meta.getAlias()));
}
private static void createLock(String zookeeperUrl, String runMeta, String lockPath) {
try {
createClient(zookeeperUrl);
client.create()
.creatingParentsIfNeeded()
.withMode(CreateMode.EPHEMERAL)
.forPath(lockPath, runMeta.getBytes());
} catch (KeeperException.NodeExistsException e) {
logger.error("Lock exists for " + lockPath, e);
throw new RuntimeException(e);
} catch (Exception e) {
logger.error("Unknown error", e);
throw new RuntimeException(e);
}
}
private static void releaseLock(String lockPath) {
try {
if (ObjectUtil.isNotNull(client)) {
if (ObjectUtil.isNotNull(client.checkExists().forPath(lockPath))) {
client.delete().forPath(lockPath);
}
}
} catch (Exception e) {
logger.error("Unknown error", e);
throw new RuntimeException(e);
}
}
}

View File

@@ -0,0 +1,87 @@
<configuration>
<appender name="Loki" class="pl.tkowalcz.tjahzi.logback.LokiAppender">
<filter class="ch.qos.logback.classic.filter.ThresholdFilter">
<level>INFO</level>
</filter>
<url>${loki_push_url:- }</url>
<encoder>
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} %p [${HOSTNAME}] [%t] %logger #@# %m%n%ex{full}</pattern>
</encoder>
<label>
<name>app</name>
<value>hudi-${run_type:- }</value>
</label>
<label>
<name>host</name>
<value>${HOSTNAME}</value>
</label>
<label>
<name>run_type</name>
<value>${run_type:- }</value>
</label>
<label>
<name>flink_job_id</name>
<value>${flink_job_id:- }</value>
</label>
<label>
<name>flink_job_name</name>
<value>${flink_job_name:- }</value>
</label>
<label>
<name>schema</name>
<value>${schema:- }</value>
</label>
<label>
<name>table</name>
<value>${table:- }</value>
</label>
<label>
<name>batch_id</name>
<value>${batch_id:- }</value>
</label>
<label>
<name>alias</name>
<value>${alias:- }</value>
</label>
<label>
<name>app_id</name>
<value>${_APP_ID:- }</value>
</label>
<label>
<name>container_id</name>
<value>${CONTAINER_ID:- }</value>
</label>
<logLevelLabel>level</logLevelLabel>
</appender>
<appender name="File" class="ch.qos.logback.core.FileAppender">
<file>run.log</file>
<append>false</append>
<encoder>
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} %p [${HOSTNAME}] [%t] %logger #@# %m%n%ex{full}</pattern>
</encoder>
</appender>
<appender name="Console" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} %p [${HOSTNAME}] [%t] %logger #@# %m%n%ex{full}</pattern>
</encoder>
</appender>
<root level="INFO">
<appender-ref ref="Loki"/>
<appender-ref ref="File"/>
<appender-ref ref="Console"/>
</root>
<logger name="org.apache.hadoop.conf.Configuration" level="ERROR"/>
<logger name="org.apache.hadoop.util.NativeCodeLoader" level="ERROR"/>
<logger name="org.apache.hadoop.hdfs.shortcircuit.DomainSocketFactory" level="ERROR"/>
<logger name="akka" level="ERROR"/>
<logger name="org.apache.flink.runtime" level="ERROR"/>
<logger name="org.apache.flink.runtime.taskexecutor.TaskExecutor" level="WARN"/>
<logger name="org.apache.flink.core.plugin.PluginConfig" level="ERROR"/>
<logger name="org.apache.hudi" level="INFO"/>
<logger name="com.eshore.odcp.hudi.connector.sync" level="INFO"/>
</configuration>