feat(all): 迁移common、sync、executor项目
This commit is contained in:
4
bin/build-sync.sh
Executable file
4
bin/build-sync.sh
Executable file
@@ -0,0 +1,4 @@
|
||||
#!/bin/bash
|
||||
mvn -pl service-common clean deploy -D skipTests -P local -s ~/.m2/settings-development.xml
|
||||
mvn -pl utils/sync clean package -D skipTests -s ~/.m2/settings-development.xml
|
||||
ytp-transfer2 /Users/lanyuanxiaoyao/Project/IdeaProjects/hudi-service/utils/sync/target/sync-1.0.0-SNAPSHOT.jar
|
||||
5
pom.xml
5
pom.xml
@@ -9,6 +9,7 @@
|
||||
<version>1.0.0-SNAPSHOT</version>
|
||||
<packaging>pom</packaging>
|
||||
<modules>
|
||||
<module>service-common</module>
|
||||
<module>service-configuration</module>
|
||||
<module>service-gateway</module>
|
||||
<module>service-queue</module>
|
||||
@@ -32,6 +33,8 @@
|
||||
<module>service-scheduler</module>
|
||||
<module>service-launcher</module>
|
||||
<module>service-command</module>
|
||||
<module>utils/executor</module>
|
||||
<module>utils/sync</module>
|
||||
</modules>
|
||||
|
||||
<properties>
|
||||
@@ -39,7 +42,7 @@
|
||||
<maven.compiler.target>8</maven.compiler.target>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
|
||||
<build-tag>b1e11</build-tag>
|
||||
<build-tag>b2b12</build-tag>
|
||||
|
||||
<spring-boot.version>2.6.8</spring-boot.version>
|
||||
<spring-cloud.version>2021.0.3</spring-cloud.version>
|
||||
|
||||
41
service-common/pom.xml
Normal file
41
service-common/pom.xml
Normal file
@@ -0,0 +1,41 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>com.lanyuanxiaoyao</groupId>
|
||||
<artifactId>hudi-service</artifactId>
|
||||
<version>1.0.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<artifactId>service-common</artifactId>
|
||||
|
||||
<dependencies>
|
||||
<!-- Common 包不要引入第三方依赖,避免冲突,一些简单的工具类自己手动实现,复杂或不必要 common 的流程不要放在 common 包里实现 -->
|
||||
<!-- hutool 系列是一个无三方依赖的工具包,建议使用,但同样也是能不用就不用,保持纯净 -->
|
||||
<dependency>
|
||||
<groupId>cn.hutool</groupId>
|
||||
<artifactId>hutool-all</artifactId>
|
||||
</dependency>
|
||||
<!-- 用于提供有限的SQL构造,避免引入复杂的ORM框架 -->
|
||||
<dependency>
|
||||
<groupId>io.github.dragons96</groupId>
|
||||
<artifactId>sql-builder</artifactId>
|
||||
<version>0.0.5.3</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-source-plugin</artifactId>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-deploy-plugin</artifactId>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
||||
@@ -0,0 +1,211 @@
|
||||
package com.lanyuanxiaoyao.service.common;
|
||||
|
||||
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.function.BiFunction;
|
||||
|
||||
/**
|
||||
* 常量
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @version 0.0.1
|
||||
* @date 2021-12-03
|
||||
*/
|
||||
public interface Constants {
|
||||
// String DATABASE_NAME = "hudi_collect_build";
|
||||
// String DATABASE_NAME = "hudi_collect_build_2";
|
||||
String DATABASE_NAME = "hudi_collect_build_b12";
|
||||
|
||||
String API_HEADER_NAME = "Api-Version";
|
||||
String API_VERSION = "1";
|
||||
|
||||
/**
|
||||
* 联合主键
|
||||
*/
|
||||
String UNION_KEY_NAME = "_key";
|
||||
/**
|
||||
* 源端最后操作时间
|
||||
*/
|
||||
String LATEST_OPERATION_TIMESTAMP_KEY_NAME = "latest_op_ts";
|
||||
/**
|
||||
* 记录下游入库时间
|
||||
*/
|
||||
String UPDATE_TIMESTAMP_KEY_NAME = "update_ts";
|
||||
/**
|
||||
* Hudi 删除标记字段
|
||||
*/
|
||||
String HUDI_DELETE_KEY_NAME = "_hoodie_is_deleted";
|
||||
|
||||
String PULSAR_SUBSCRIPTION_NAME_PREFIX = "Hudi_Sync_Pulsar_Reader";
|
||||
|
||||
String VERSION_UPDATE_KEY = "versionUpdate";
|
||||
String VERSION_KEY = "version";
|
||||
|
||||
String DELETE = "D";
|
||||
String INSERT = "I";
|
||||
String UPDATE = "U";
|
||||
String DDL = "ddl";
|
||||
String UNKNOWN = "unknown";
|
||||
|
||||
String CITY_ID = "CITY_ID";
|
||||
String INCLUDE = "INCLUDE";
|
||||
String EXCLUDE = "EXCLUDE";
|
||||
|
||||
String JOB_ID = "job-id";
|
||||
String SERVICE_MODE = "service-mode";
|
||||
String FLINK_JOB = "flink-job";
|
||||
String TABLE_META = "table-meta";
|
||||
String TABLE_META_LIST = "table-meta-list";
|
||||
String MESSAGE_ID = "message-id";
|
||||
String INSTANTS = "instants";
|
||||
String BETA = "beta";
|
||||
String CLUSTER = "cluster";
|
||||
|
||||
String COW = "COPY_ON_WRITE";
|
||||
String MOR = "MERGE_ON_READ";
|
||||
|
||||
String FLINK_JOB_OPTION = "-" + FLINK_JOB;
|
||||
String TABLE_META_OPTION = "-" + TABLE_META;
|
||||
String TABLE_META_LIST_OPTION = "-" + TABLE_META_LIST;
|
||||
String INSTANTS_OPTION = "-" + INSTANTS;
|
||||
String BETA_OPTION = "-" + BETA;
|
||||
String CLUSTER_OPTION = "-" + CLUSTER;
|
||||
|
||||
String SPRING_SECURITY_AUTHORITY = "Anonymous";
|
||||
String SPRING_SECURITY_USERNAME = "AxhEbscwsJDbYMH2";
|
||||
String SPRING_SECURITY_PASSWORD = "{noop}cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4";
|
||||
String SPRING_SECURITY_PASSWORD_PLAIN = "cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4";
|
||||
|
||||
String VICTORIA_USERNAME = "EsCFVuNkiDWv7PKmcF";
|
||||
String VICTORIA_PASSWORD = "Abf%x9ocS^iKr3tgrd";
|
||||
|
||||
String SCHEMA_NAME = "schema";
|
||||
String TABLE_NAME = "table";
|
||||
String DATA_TIME = "data-time";
|
||||
String DATA_PARENT_PATH = "data-parent-path";
|
||||
|
||||
String METRICS_PREFIX = "metrics_hudi";
|
||||
String METRICS_YARN_PREFIX = METRICS_PREFIX + "_yarn";
|
||||
String METRICS_YARN_JOB = METRICS_YARN_PREFIX + "_job";
|
||||
String METRICS_YARN_TABLE = METRICS_YARN_PREFIX + "_table";
|
||||
String METRICS_SYNC_PREFIX = METRICS_PREFIX + "_sync";
|
||||
String METRICS_SYNC_SOURCE_LATENCY = METRICS_SYNC_PREFIX + "_source_latency";
|
||||
String METRICS_SYNC_LATENCY = METRICS_SYNC_PREFIX + "_latency";
|
||||
String METRICS_SYNC_FLINK_JOB_ID = METRICS_SYNC_PREFIX + "_flink_job_id";
|
||||
|
||||
String METRICS_SYNC_SOURCE_MESSAGE_RECEIVE = METRICS_SYNC_PREFIX + "_source_message_receive";
|
||||
String METRICS_SYNC_SOURCE_MESSAGE_SIZE_RECEIVE_BYTES = METRICS_SYNC_PREFIX + "_source_message_receive_bytes";
|
||||
String METRICS_SYNC_SOURCE_OPERATION_TYPE_RECEIVE = METRICS_SYNC_PREFIX + "_source_operation_type_receive";
|
||||
String METRICS_SYNC_SOURCE_CHANGE_FILTER = METRICS_SYNC_PREFIX + "_source_change_filter";
|
||||
String METRICS_SYNC_SOURCE_CHANGE_PARTITION = METRICS_SYNC_PREFIX + "_source_change_partition";
|
||||
String METRICS_SYNC_SOURCE_BACK_LOGS = METRICS_SYNC_PREFIX + "_source_back_logs";
|
||||
|
||||
String METRICS_LABEL_FLINK_JOB_ID = "flink_job_id";
|
||||
String METRICS_LABEL_FLINK_JOB_NAME = "flink_job_name";
|
||||
String METRICS_LABEL_FLINK_NATIVE_JOB_ID = "flink_native_job_id";
|
||||
String METRICS_LABEL_FLINK_NATIVE_TASK_NAME = "flink_native_task_name";
|
||||
String METRICS_LABEL_FLINK_PARALLEL_ID = "flink_parallel_id";
|
||||
String METRICS_LABEL_RUN_TYPE = "run_type";
|
||||
String METRICS_LABEL_EXECUTOR_VERSION = "executor_version";
|
||||
String METRICS_LABEL_CLUSTER = "cluster";
|
||||
|
||||
String METRICS_RUN_TYPE_SYNC = "sync";
|
||||
String METRICS_RUN_TYPE_COMPACTION = "compaction";
|
||||
|
||||
String METRICS_LABEL_SCHEMA = "schema";
|
||||
String METRICS_LABEL_TABLE = "table";
|
||||
String METRICS_LABEL_STATUS = "status";
|
||||
String METRICS_LABEL_TOPIC = "topic";
|
||||
String METRICS_LABEL_BATCH_ID = "batch_id";
|
||||
String METRICS_LABEL_ALIAS = "alias";
|
||||
String METRICS_LABEL_APPLICATION_ID = "application_id";
|
||||
|
||||
String METRICS_STATUS_RUNNING = "running";
|
||||
String METRICS_STATUS_STOPPED = "stopped";
|
||||
|
||||
String METRICS_LABEL_TYPE = "type";
|
||||
|
||||
String LOKI_PUSH_URL = "loki_push_url";
|
||||
|
||||
DateTimeFormatter FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
|
||||
|
||||
String OPERATION_DONE = "操作完成";
|
||||
String OPERATION_CANCEL = "操作取消";
|
||||
String FETCHING_DATA = "Fetching Data";
|
||||
|
||||
String COMPACTION_STATUS_SCHEDULE = "SCHEDULE";
|
||||
String COMPACTION_STATUS_START = "START";
|
||||
String COMPACTION_STATUS_FINISH = "FINISH";
|
||||
String COMPACTION_STATUS_FAILURE = "FAILURE";
|
||||
|
||||
long SECOND = 1000;
|
||||
long HALF_MINUTE = 30 * SECOND;
|
||||
long MINUTE = 60 * SECOND;
|
||||
long HALF_HOUR = 30 * MINUTE;
|
||||
long HOUR = 60 * MINUTE;
|
||||
|
||||
long KB = 1024;
|
||||
long MB = 1024 * KB;
|
||||
long GB = 1024 * MB;
|
||||
long TB = 1024 * GB;
|
||||
|
||||
String TAG_SPLIT = ";";
|
||||
String TAG_OPERATOR = "=";
|
||||
|
||||
String EVENT = "event";
|
||||
String FROM_COMMAND_UTIL = "command util";
|
||||
String FROM_COMPACTOR = "compactor";
|
||||
|
||||
int COMMAND_RENDER_WIDTH = 500;
|
||||
|
||||
String LOG_FLINK_JOB_ID_LABEL = "LOG_FLINK_JOB_ID_LABEL";
|
||||
String LOG_FLINK_JOB_ID = "flink_job_id";
|
||||
String LOG_ALIAS_LABEL = "LOG_ALIAS_LABEL";
|
||||
String LOG_ALIAS = "alias";
|
||||
String LOG_JOB_ID_LABEL = "LOG_JOB_ID_LABEL";
|
||||
String LOG_JOB_ID = "job_id";
|
||||
|
||||
String LOG_POINT_PREFIX = "LOP-";
|
||||
String LOG_POINT_MESSAGE_ID_EMPTY = LOG_POINT_PREFIX + "000001";
|
||||
String LOG_POINT_CHECKPOINT_INITIAL = LOG_POINT_PREFIX + "000002";
|
||||
String LOG_POINT_CHECKPOINT_INITIAL_MESSAGE_ID = LOG_POINT_PREFIX + "000003";
|
||||
String LOG_POINT_PULSAR_SOURCE_BOOTSTRAP_MESSAGE_ID = LOG_POINT_PREFIX + "000004";
|
||||
String LOG_POINT_PULSAR_SOURCE_GET_MESSAGE_ID_ERROR = LOG_POINT_PREFIX + "000005";
|
||||
String LOG_POINT_FIELD_TYPE_NOT_FOUND = LOG_POINT_PREFIX + "000006";
|
||||
|
||||
String TAGS_NO_COMPACT = "NO_COMPACT";
|
||||
String TAGS_PULSAR_BACKUP = "PULSAR_BACKUP";
|
||||
String TAGS_NO_PRE_COMBINE = "NO_PRE_COMBINE";
|
||||
String TAGS_PRE_COMBINE = "PRE_COMBINE";
|
||||
String TAGS_NO_IGNORE_FAILED = "NO_IGNORE_FAILED";
|
||||
String TAGS_DISABLE_CHAINING = "DISABLE_CHAINING";
|
||||
String TAGS_TRACE_LATEST_OP_TS = "TRACE_LATEST_OP_TS";
|
||||
String TAGS_SOURCE_READER = "SOURCE_READER";
|
||||
String TAGS_USE_TEST_JAR = "USE_TEST_JAR";
|
||||
String TAGS_ODS = "ODS";
|
||||
String TAGS_ODS_FOCUS = "ODS_FOCUS";
|
||||
|
||||
String COMPACTION_QUEUE_PRE = "compaction-queue-pre";
|
||||
String COMPACTION_QUEUE_B1 = "compaction-queue-b1";
|
||||
String COMPACTION_QUEUE_B5 = "compaction-queue-b5";
|
||||
String COMPACTION_QUEUE_A4 = "compaction-queue-a4";
|
||||
String COMPACTION_QUEUE_B12 = "compaction-queue-b12";
|
||||
|
||||
String CLUSTER_B1 = "b1";
|
||||
String CLUSTER_B5 = "b5";
|
||||
String CLUSTER_A4 = "a4";
|
||||
String CLUSTER_B12 = "b12";
|
||||
|
||||
String SCHEDULE_JOB_FAIL_COUNT = "SCHEDULE_JOB_FAIL_COUNT";
|
||||
|
||||
String SCHEDULE_RECOMMEND = "schedule_recommend";
|
||||
String SCHEDULE_FORCE = "schedule_force";
|
||||
|
||||
BiFunction<TableMeta, String, String> FIELD_COVERT = (tableMeta, field) -> {
|
||||
if (TableMeta.SourceType.TELEPG.equals(tableMeta.getSourceType())) {
|
||||
return field.toLowerCase();
|
||||
} else {
|
||||
return field;
|
||||
}
|
||||
};
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,123 @@
|
||||
package com.lanyuanxiaoyao.service.common.entity;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* Flink Job
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @version 0.0.1
|
||||
* @date 2021-12-08
|
||||
*/
|
||||
public class FlinkJob implements Serializable {
|
||||
private Long id;
|
||||
private String name;
|
||||
private RunMode runMode;
|
||||
private TableMeta.YarnMeta oneInOneSyncYarn;
|
||||
|
||||
public FlinkJob() {
|
||||
}
|
||||
|
||||
public FlinkJob(Builder builder) {
|
||||
this.id = builder.id;
|
||||
this.name = builder.name;
|
||||
this.runMode = builder.runMode;
|
||||
this.oneInOneSyncYarn = builder.oneInOneSyncYarn;
|
||||
}
|
||||
|
||||
public static Builder builder() {
|
||||
return new Builder();
|
||||
}
|
||||
|
||||
public Long getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(Long id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public RunMode getRunMode() {
|
||||
return runMode;
|
||||
}
|
||||
|
||||
public void setRunMode(RunMode runMode) {
|
||||
this.runMode = runMode;
|
||||
}
|
||||
|
||||
public TableMeta.YarnMeta getOneInOneSyncYarn() {
|
||||
return oneInOneSyncYarn;
|
||||
}
|
||||
|
||||
public void setOneInOneSyncYarn(TableMeta.YarnMeta oneInOneSyncYarn) {
|
||||
this.oneInOneSyncYarn = oneInOneSyncYarn;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "FlinkJob{" +
|
||||
"id=" + id +
|
||||
", name='" + name + '\'' +
|
||||
", runMode=" + runMode +
|
||||
", oneInOneSyncYarn=" + oneInOneSyncYarn +
|
||||
'}';
|
||||
}
|
||||
|
||||
public enum RunMode {
|
||||
/**
|
||||
* 所有表放在同一个任务中
|
||||
*/
|
||||
ALL_IN_ONE,
|
||||
/**
|
||||
* 每个表放在单独的任务中
|
||||
*/
|
||||
ONE_IN_ONE,
|
||||
/**
|
||||
* 针对 ACCT 小表,将同一个表放在同一个子任务中
|
||||
*/
|
||||
ALL_IN_ONE_BY_TABLE,
|
||||
ALL_IN_ONE_BY_SCHEMA,
|
||||
}
|
||||
|
||||
public static final class Builder {
|
||||
private Long id;
|
||||
private String name;
|
||||
private RunMode runMode;
|
||||
private TableMeta.YarnMeta oneInOneSyncYarn;
|
||||
|
||||
private Builder() {}
|
||||
|
||||
public Builder id(Long id) {
|
||||
this.id = id;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder name(String name) {
|
||||
this.name = name;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder runMode(RunMode runMode) {
|
||||
this.runMode = runMode;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder oneInOneSyncYarn(TableMeta.YarnMeta oneInOneSyncYarn) {
|
||||
this.oneInOneSyncYarn = oneInOneSyncYarn;
|
||||
return this;
|
||||
}
|
||||
|
||||
public FlinkJob build() {
|
||||
return new FlinkJob(this);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,197 @@
|
||||
package com.lanyuanxiaoyao.service.common.entity;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* 从 Pulsar 读取的消息封装
|
||||
*
|
||||
* @author lanyuanxiaoyao
|
||||
* @version 0.0.1
|
||||
* @date 2021-11-25
|
||||
*/
|
||||
public class Record implements Serializable {
|
||||
private Source source;
|
||||
private Statement statement;
|
||||
|
||||
public Record() {
|
||||
}
|
||||
|
||||
public Source getSource() {
|
||||
return source;
|
||||
}
|
||||
|
||||
public void setSource(Source source) {
|
||||
this.source = source;
|
||||
}
|
||||
|
||||
public Statement getStatement() {
|
||||
return statement;
|
||||
}
|
||||
|
||||
public void setStatement(Statement statement) {
|
||||
this.statement = statement;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Record{" +
|
||||
"source=" + source +
|
||||
", statement=" + statement +
|
||||
'}';
|
||||
}
|
||||
|
||||
public static class Source implements Serializable {
|
||||
private String sourceId;
|
||||
private String sourceType;
|
||||
private String sourcePos;
|
||||
private String currentTs;
|
||||
|
||||
public Source() {
|
||||
}
|
||||
|
||||
public String getSourceId() {
|
||||
return sourceId;
|
||||
}
|
||||
|
||||
public void setSourceId(String sourceId) {
|
||||
this.sourceId = sourceId;
|
||||
}
|
||||
|
||||
public String getSourceType() {
|
||||
return sourceType;
|
||||
}
|
||||
|
||||
public void setSourceType(String sourceType) {
|
||||
this.sourceType = sourceType;
|
||||
}
|
||||
|
||||
public String getSourcePos() {
|
||||
return sourcePos;
|
||||
}
|
||||
|
||||
public void setSourcePos(String sourcePos) {
|
||||
this.sourcePos = sourcePos;
|
||||
}
|
||||
|
||||
public String getCurrentTs() {
|
||||
return currentTs;
|
||||
}
|
||||
|
||||
public void setCurrentTs(String currentTs) {
|
||||
this.currentTs = currentTs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Source{" +
|
||||
"sourceId='" + sourceId + '\'' +
|
||||
", sourceType='" + sourceType + '\'' +
|
||||
", sourcePos='" + sourcePos + '\'' +
|
||||
", currentTs='" + currentTs + '\'' +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
|
||||
public static class Statement implements Serializable {
|
||||
private String schema;
|
||||
private String table;
|
||||
private String opStatement;
|
||||
private String opType;
|
||||
private String op;
|
||||
private String opTs;
|
||||
private String version;
|
||||
private Map<String, Object> before;
|
||||
private Map<String, Object> after;
|
||||
|
||||
public Statement() {
|
||||
}
|
||||
|
||||
public String getSchema() {
|
||||
return schema;
|
||||
}
|
||||
|
||||
public void setSchema(String schema) {
|
||||
this.schema = schema;
|
||||
}
|
||||
|
||||
public String getTable() {
|
||||
return table;
|
||||
}
|
||||
|
||||
public void setTable(String table) {
|
||||
this.table = table;
|
||||
}
|
||||
|
||||
public String getOpStatement() {
|
||||
return opStatement;
|
||||
}
|
||||
|
||||
public void setOpStatement(String opStatement) {
|
||||
this.opStatement = opStatement;
|
||||
}
|
||||
|
||||
public String getOpType() {
|
||||
return opType;
|
||||
}
|
||||
|
||||
public void setOpType(String opType) {
|
||||
this.opType = opType;
|
||||
}
|
||||
|
||||
public String getOp() {
|
||||
return op;
|
||||
}
|
||||
|
||||
public void setOp(String op) {
|
||||
this.op = op;
|
||||
}
|
||||
|
||||
public String getOpTs() {
|
||||
return opTs;
|
||||
}
|
||||
|
||||
public void setOpTs(String opTs) {
|
||||
this.opTs = opTs;
|
||||
}
|
||||
|
||||
public String getVersion() {
|
||||
return version;
|
||||
}
|
||||
|
||||
public void setVersion(String version) {
|
||||
this.version = version;
|
||||
}
|
||||
|
||||
public Map<String, Object> getBefore() {
|
||||
return before;
|
||||
}
|
||||
|
||||
public void setBefore(Map<String, Object> before) {
|
||||
this.before = before;
|
||||
}
|
||||
|
||||
public Map<String, Object> getAfter() {
|
||||
return after;
|
||||
}
|
||||
|
||||
public void setAfter(Map<String, Object> after) {
|
||||
this.after = after;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Statement{" +
|
||||
"schema='" + schema + '\'' +
|
||||
", table='" + table + '\'' +
|
||||
", opStatement='" + opStatement + '\'' +
|
||||
", opType='" + opType + '\'' +
|
||||
", op='" + op + '\'' +
|
||||
", opTs='" + opTs + '\'' +
|
||||
", version='" + version + '\'' +
|
||||
", before=" + before +
|
||||
", after=" + after +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,121 @@
|
||||
package com.lanyuanxiaoyao.service.common.entity;
|
||||
|
||||
import com.lanyuanxiaoyao.service.common.utils.NameHelper;
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* 运行时参数
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @date 2023-05-11
|
||||
*/
|
||||
@SuppressWarnings("FieldMayBeFinal")
|
||||
public class RunMeta implements Serializable {
|
||||
private String cluster;
|
||||
private Long flinkJobId;
|
||||
private String alias;
|
||||
private String flinkJobName;
|
||||
private String host;
|
||||
private String applicationId;
|
||||
private String containerId;
|
||||
private String containerPath;
|
||||
private String runType;
|
||||
private String executorVersion;
|
||||
private String jvmPid;
|
||||
private String applicationProxy;
|
||||
private String subscriptionName;
|
||||
|
||||
public RunMeta() {
|
||||
this.flinkJobName = System.getenv("flink_job_name");
|
||||
this.host = System.getenv("NM_HOST");
|
||||
this.applicationId = System.getenv("_APP_ID");
|
||||
this.containerId = System.getenv("CONTAINER_ID");
|
||||
this.containerPath = System.getenv("PWD");
|
||||
this.runType = System.getenv("run_type");
|
||||
this.executorVersion = System.getenv("executor_version");
|
||||
this.jvmPid = System.getenv("JVM_PID");
|
||||
this.applicationProxy = System.getenv("APPLICATION_WEB_PROXY_BASE");
|
||||
}
|
||||
|
||||
public RunMeta(String cluster, Long flinkJobId) {
|
||||
this();
|
||||
this.cluster = cluster;
|
||||
this.flinkJobId = flinkJobId;
|
||||
}
|
||||
|
||||
public RunMeta(String cluster, Long flinkJobId, String alias) {
|
||||
this(cluster, flinkJobId);
|
||||
this.alias = alias;
|
||||
this.subscriptionName = NameHelper.pulsarSubscriptionName(flinkJobId, alias);
|
||||
}
|
||||
|
||||
public String getCluster() {
|
||||
return cluster;
|
||||
}
|
||||
|
||||
public Long getFlinkJobId() {
|
||||
return flinkJobId;
|
||||
}
|
||||
|
||||
public String getAlias() {
|
||||
return alias;
|
||||
}
|
||||
|
||||
public String getFlinkJobName() {
|
||||
return flinkJobName;
|
||||
}
|
||||
|
||||
public String getHost() {
|
||||
return host;
|
||||
}
|
||||
|
||||
public String getApplicationId() {
|
||||
return applicationId;
|
||||
}
|
||||
|
||||
public String getContainerId() {
|
||||
return containerId;
|
||||
}
|
||||
|
||||
public String getContainerPath() {
|
||||
return containerPath;
|
||||
}
|
||||
|
||||
public String getRunType() {
|
||||
return runType;
|
||||
}
|
||||
|
||||
public String getExecutorVersion() {
|
||||
return executorVersion;
|
||||
}
|
||||
|
||||
public String getJvmPid() {
|
||||
return jvmPid;
|
||||
}
|
||||
|
||||
public String getApplicationProxy() {
|
||||
return applicationProxy;
|
||||
}
|
||||
|
||||
public String getSubscriptionName() {
|
||||
return subscriptionName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "RunMeta{" +
|
||||
"cluster='" + cluster + '\'' +
|
||||
", flinkJobId='" + flinkJobId + '\'' +
|
||||
", flinkJobName='" + flinkJobName + '\'' +
|
||||
", host='" + host + '\'' +
|
||||
", applicationId='" + applicationId + '\'' +
|
||||
", containerId='" + containerId + '\'' +
|
||||
", containerPath='" + containerPath + '\'' +
|
||||
", runType='" + runType + '\'' +
|
||||
", executorVersion='" + executorVersion + '\'' +
|
||||
", jvmPid='" + jvmPid + '\'' +
|
||||
", applicationProxy='" + applicationProxy + '\'' +
|
||||
", subscriptionName='" + subscriptionName + '\'' +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,259 @@
|
||||
package com.lanyuanxiaoyao.service.common.entity;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* 同步压缩状态表类
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @date 2023-04-24
|
||||
*/
|
||||
public class SyncState implements Serializable {
|
||||
private Long flinkJobId;
|
||||
private String alias;
|
||||
private String messageId;
|
||||
private Long sourceStartTime;
|
||||
private Long sourceCheckpointTime;
|
||||
private Long sourcePublishTime;
|
||||
private Long sourceOperationTime;
|
||||
private Long compactionStartTime;
|
||||
private Long compactionFinishTime;
|
||||
private String compactionApplicationId;
|
||||
private String compactionStatus;
|
||||
private Long compactionStatusTime;
|
||||
private Long compactionLatestOperationTime;
|
||||
|
||||
public SyncState() {
|
||||
}
|
||||
|
||||
public SyncState(Builder builder) {
|
||||
this.flinkJobId = builder.flinkJobId;
|
||||
this.alias = builder.alias;
|
||||
this.messageId = builder.messageId;
|
||||
this.sourceStartTime = builder.sourceStartTime;
|
||||
this.sourceCheckpointTime = builder.sourceCheckpointTime;
|
||||
this.sourcePublishTime = builder.sourcePublishTime;
|
||||
this.sourceOperationTime = builder.sourceOperationTime;
|
||||
this.compactionStartTime = builder.compactionStartTime;
|
||||
this.compactionFinishTime = builder.compactionFinishTime;
|
||||
this.compactionApplicationId = builder.compactionApplicationId;
|
||||
this.compactionStatus = builder.compactionStatus;
|
||||
this.compactionStatusTime = builder.compactionStatusTime;
|
||||
this.compactionLatestOperationTime = builder.compactionLatestOperationTime;
|
||||
}
|
||||
|
||||
public static Builder builder() {
|
||||
return new Builder();
|
||||
}
|
||||
|
||||
public Long getFlinkJobId() {
|
||||
return flinkJobId;
|
||||
}
|
||||
|
||||
public void setFlinkJobId(Long flinkJobId) {
|
||||
this.flinkJobId = flinkJobId;
|
||||
}
|
||||
|
||||
public String getAlias() {
|
||||
return alias;
|
||||
}
|
||||
|
||||
public void setAlias(String alias) {
|
||||
this.alias = alias;
|
||||
}
|
||||
|
||||
public String getMessageId() {
|
||||
return messageId;
|
||||
}
|
||||
|
||||
public void setMessageId(String messageId) {
|
||||
this.messageId = messageId;
|
||||
}
|
||||
|
||||
public Long getSourceStartTime() {
|
||||
return sourceStartTime;
|
||||
}
|
||||
|
||||
public void setSourceStartTime(Long sourceStartTime) {
|
||||
this.sourceStartTime = sourceStartTime;
|
||||
}
|
||||
|
||||
public Long getSourceCheckpointTime() {
|
||||
return sourceCheckpointTime;
|
||||
}
|
||||
|
||||
public void setSourceCheckpointTime(Long sourceCheckpointTime) {
|
||||
this.sourceCheckpointTime = sourceCheckpointTime;
|
||||
}
|
||||
|
||||
public Long getSourcePublishTime() {
|
||||
return sourcePublishTime;
|
||||
}
|
||||
|
||||
public void setSourcePublishTime(Long sourcePublishTime) {
|
||||
this.sourcePublishTime = sourcePublishTime;
|
||||
}
|
||||
|
||||
public Long getSourceOperationTime() {
|
||||
return sourceOperationTime;
|
||||
}
|
||||
|
||||
public void setSourceOperationTime(Long sourceOperationTime) {
|
||||
this.sourceOperationTime = sourceOperationTime;
|
||||
}
|
||||
|
||||
public Long getCompactionStartTime() {
|
||||
return compactionStartTime;
|
||||
}
|
||||
|
||||
public void setCompactionStartTime(Long compactionStartTime) {
|
||||
this.compactionStartTime = compactionStartTime;
|
||||
}
|
||||
|
||||
public Long getCompactionFinishTime() {
|
||||
return compactionFinishTime;
|
||||
}
|
||||
|
||||
public void setCompactionFinishTime(Long compactionFinishTime) {
|
||||
this.compactionFinishTime = compactionFinishTime;
|
||||
}
|
||||
|
||||
public String getCompactionApplicationId() {
|
||||
return compactionApplicationId;
|
||||
}
|
||||
|
||||
public void setCompactionApplicationId(String compactionApplicationId) {
|
||||
this.compactionApplicationId = compactionApplicationId;
|
||||
}
|
||||
|
||||
public String getCompactionStatus() {
|
||||
return compactionStatus;
|
||||
}
|
||||
|
||||
public void setCompactionStatus(String compactionStatus) {
|
||||
this.compactionStatus = compactionStatus;
|
||||
}
|
||||
|
||||
public Long getCompactionStatusTime() {
|
||||
return compactionStatusTime;
|
||||
}
|
||||
|
||||
public void setCompactionStatusTime(Long compactionStatusTime) {
|
||||
this.compactionStatusTime = compactionStatusTime;
|
||||
}
|
||||
|
||||
public Long getCompactionLatestOperationTime() {
|
||||
return compactionLatestOperationTime;
|
||||
}
|
||||
|
||||
public void setCompactionLatestOperationTime(Long compactionLatestOperationTime) {
|
||||
this.compactionLatestOperationTime = compactionLatestOperationTime;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "SyncState{" +
|
||||
"flinkJobId=" + flinkJobId +
|
||||
", alias='" + alias + '\'' +
|
||||
", messageId='" + messageId + '\'' +
|
||||
", sourceStartTime=" + sourceStartTime +
|
||||
", sourceCheckpointTime=" + sourceCheckpointTime +
|
||||
", sourcePublishTime=" + sourcePublishTime +
|
||||
", sourceOperationTime=" + sourceOperationTime +
|
||||
", compactionStartTime=" + compactionStartTime +
|
||||
", compactionFinishTime=" + compactionFinishTime +
|
||||
", compactionApplicationId='" + compactionApplicationId + '\'' +
|
||||
", compactionStatus='" + compactionStatus + '\'' +
|
||||
", compactionStatusTime=" + compactionStatusTime +
|
||||
", compactionLatestOperationTime=" + compactionLatestOperationTime +
|
||||
'}';
|
||||
}
|
||||
|
||||
public static final class Builder {
|
||||
private Long flinkJobId;
|
||||
private String alias;
|
||||
private String messageId;
|
||||
private Long sourceStartTime;
|
||||
private Long sourceCheckpointTime;
|
||||
private Long sourcePublishTime;
|
||||
private Long sourceOperationTime;
|
||||
private Long compactionStartTime;
|
||||
private Long compactionFinishTime;
|
||||
private String compactionApplicationId;
|
||||
private String compactionStatus;
|
||||
private Long compactionStatusTime;
|
||||
private Long compactionLatestOperationTime;
|
||||
|
||||
private Builder() {
|
||||
}
|
||||
|
||||
public Builder flinkJobId(Long flinkJobId) {
|
||||
this.flinkJobId = flinkJobId;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder alias(String alias) {
|
||||
this.alias = alias;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder messageId(String messageId) {
|
||||
this.messageId = messageId;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder sourceStartTime(Long sourceStartTime) {
|
||||
this.sourceStartTime = sourceStartTime;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder sourceCheckpointTime(Long sourceCheckpointTime) {
|
||||
this.sourceCheckpointTime = sourceCheckpointTime;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder sourcePublishTime(Long sourcePublishTime) {
|
||||
this.sourcePublishTime = sourcePublishTime;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder sourceOperationTime(Long sourceOperationTime) {
|
||||
this.sourceOperationTime = sourceOperationTime;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder compactionStartTime(Long compactionStartTime) {
|
||||
this.compactionStartTime = compactionStartTime;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder compactionFinishTime(Long compactionFinishTime) {
|
||||
this.compactionFinishTime = compactionFinishTime;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder compactionApplicationId(String compactionApplicationId) {
|
||||
this.compactionApplicationId = compactionApplicationId;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder compactionStatus(String compactionStatus) {
|
||||
this.compactionStatus = compactionStatus;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder compactionStatusTime(Long compactionStatusTime) {
|
||||
this.compactionStatusTime = compactionStatusTime;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder compactionLatestOperationTime(Long compactionLatestOperationTime) {
|
||||
this.compactionLatestOperationTime = compactionLatestOperationTime;
|
||||
return this;
|
||||
}
|
||||
|
||||
public SyncState build() {
|
||||
return new SyncState(this);
|
||||
}
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,144 @@
|
||||
package com.lanyuanxiaoyao.service.common.entity.compaction;
|
||||
|
||||
/**
|
||||
* 压缩调度任务
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @date 2022-09-26
|
||||
*/
|
||||
public class ScheduleJob {
|
||||
private String id;
|
||||
private Long flinkJobId;
|
||||
private String alias;
|
||||
private String batch;
|
||||
private String status;
|
||||
private String comment;
|
||||
|
||||
public ScheduleJob() {
|
||||
}
|
||||
|
||||
public ScheduleJob(String id, Long flinkJobId, String alias, String batch, String status, String comment) {
|
||||
this.id = id;
|
||||
this.flinkJobId = flinkJobId;
|
||||
this.alias = alias;
|
||||
this.batch = batch;
|
||||
this.status = status;
|
||||
this.comment = comment;
|
||||
}
|
||||
|
||||
public ScheduleJob(Builder builder) {
|
||||
this.id = builder.id;
|
||||
this.flinkJobId = builder.flinkJobId;
|
||||
this.alias = builder.alias;
|
||||
this.batch = builder.batch;
|
||||
this.status = builder.status;
|
||||
this.comment = builder.comment;
|
||||
}
|
||||
|
||||
public static Builder builder() {
|
||||
return new Builder();
|
||||
}
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public Long getFlinkJobId() {
|
||||
return flinkJobId;
|
||||
}
|
||||
|
||||
public void setFlinkJobId(Long flinkJobId) {
|
||||
this.flinkJobId = flinkJobId;
|
||||
}
|
||||
|
||||
public String getAlias() {
|
||||
return alias;
|
||||
}
|
||||
|
||||
public void setAlias(String alias) {
|
||||
this.alias = alias;
|
||||
}
|
||||
|
||||
public String getBatch() {
|
||||
return batch;
|
||||
}
|
||||
|
||||
public void setBatch(String batch) {
|
||||
this.batch = batch;
|
||||
}
|
||||
|
||||
public String getStatus() {
|
||||
return status;
|
||||
}
|
||||
|
||||
public void setStatus(String status) {
|
||||
this.status = status;
|
||||
}
|
||||
|
||||
public String getComment() {
|
||||
return comment;
|
||||
}
|
||||
|
||||
public void setComment(String comment) {
|
||||
this.comment = comment;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "ScheduleJob{" +
|
||||
"id=" + id +
|
||||
", flinkJobId=" + flinkJobId +
|
||||
", alias='" + alias + '\'' +
|
||||
'}';
|
||||
}
|
||||
|
||||
public static final class Builder {
|
||||
private String id;
|
||||
private Long flinkJobId;
|
||||
private String alias;
|
||||
private String batch;
|
||||
private String status;
|
||||
private String comment;
|
||||
|
||||
private Builder() {
|
||||
}
|
||||
|
||||
public Builder id(String id) {
|
||||
this.id = id;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder flinkJobId(Long flinkJobId) {
|
||||
this.flinkJobId = flinkJobId;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder alias(String alias) {
|
||||
this.alias = alias;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder batch(String batch) {
|
||||
this.batch = batch;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder status(String status) {
|
||||
this.status = status;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder comment(String comment) {
|
||||
this.comment = comment;
|
||||
return this;
|
||||
}
|
||||
|
||||
public ScheduleJob build() {
|
||||
return new ScheduleJob(this);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
package com.lanyuanxiaoyao.service.common.entity.compaction;
|
||||
|
||||
import java.util.Deque;
|
||||
|
||||
/**
|
||||
* 队列
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @date 2022-09-26
|
||||
*/
|
||||
public interface ScheduleQueue extends Deque<ScheduleJob> {
|
||||
Iterable<ScheduleJob> poll(int limit);
|
||||
|
||||
Iterable<ScheduleJob> pollWithoutSame(int limit);
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
package com.lanyuanxiaoyao.service.common.exception;
|
||||
|
||||
/**
|
||||
* @author ZhangJiacheng
|
||||
* @date 2022-05-23
|
||||
*/
|
||||
public class CheckpointRootPathNotFoundException extends RuntimeException{
|
||||
public CheckpointRootPathNotFoundException() {
|
||||
super("Checkpoint root path not found");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
package com.lanyuanxiaoyao.service.common.exception;
|
||||
|
||||
import java.util.function.Supplier;
|
||||
|
||||
/**
|
||||
* 配置异常
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @date 2022-05-16
|
||||
*/
|
||||
public class ConfigException extends Exception {
|
||||
public ConfigException(String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
public static void check(String message, Supplier<Boolean> checkFunction) throws ConfigException {
|
||||
if (checkFunction.get()) {
|
||||
throw new ConfigException(message);
|
||||
}
|
||||
}
|
||||
|
||||
public static void checkQuiet(String message, Supplier<Boolean> checkFunction) {
|
||||
try {
|
||||
check(message, checkFunction);
|
||||
} catch (ConfigException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
package com.lanyuanxiaoyao.service.common.exception;
|
||||
|
||||
/**
|
||||
* @author ZhangJiacheng
|
||||
* @date 2022-05-23
|
||||
*/
|
||||
public class FlinkJobNotFoundException extends RuntimeException {
|
||||
public FlinkJobNotFoundException() {
|
||||
super("Flink job not found");
|
||||
}
|
||||
|
||||
public FlinkJobNotFoundException(Long flinkJobId) {
|
||||
super("Flink job " + flinkJobId + " not found");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
package com.lanyuanxiaoyao.service.common.exception;
|
||||
|
||||
|
||||
import com.lanyuanxiaoyao.service.common.Constants;
|
||||
|
||||
/**
|
||||
* @author ZhangJiacheng
|
||||
* @date 2022-05-23
|
||||
*/
|
||||
public class MessageIdEmptyException extends RuntimeException {
|
||||
public MessageIdEmptyException() {
|
||||
super(Constants.LOG_POINT_MESSAGE_ID_EMPTY + " Message id is empty");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
package com.lanyuanxiaoyao.service.common.exception;
|
||||
|
||||
/**
|
||||
* 缺参数异常
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @date 2022-06-20
|
||||
*/
|
||||
public class MissingArgumentException extends Exception {
|
||||
public MissingArgumentException(String argumentName) {
|
||||
super("Argument: '" + argumentName + "' is not found");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
package com.lanyuanxiaoyao.service.common.exception;
|
||||
|
||||
/**
|
||||
* @author ZhangJiacheng
|
||||
* @date 2022-05-23
|
||||
*/
|
||||
public class PartitionPathNotFoundException extends RuntimeException{
|
||||
public PartitionPathNotFoundException() {
|
||||
super("Partition path not found");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
package com.lanyuanxiaoyao.service.common.exception;
|
||||
|
||||
/**
|
||||
* @author ZhangJiacheng
|
||||
* @date 2023-04-28
|
||||
*/
|
||||
public class PulsarInfoNotFoundException extends RuntimeException {
|
||||
public PulsarInfoNotFoundException(String message) {
|
||||
super(message);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
package com.lanyuanxiaoyao.service.common.exception;
|
||||
|
||||
/**
|
||||
* @author ZhangJiacheng
|
||||
* @date 2022-05-23
|
||||
*/
|
||||
public class SyncStateNotFoundException extends RuntimeException{
|
||||
public SyncStateNotFoundException() {
|
||||
super("Sync state not found");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
package com.lanyuanxiaoyao.service.common.exception;
|
||||
|
||||
/**
|
||||
* @author ZhangJiacheng
|
||||
* @date 2022-05-23
|
||||
*/
|
||||
public class TableMetaNotFoundException extends RuntimeException{
|
||||
public TableMetaNotFoundException() {
|
||||
super("Table meta not found");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
package com.lanyuanxiaoyao.service.common.exception;
|
||||
|
||||
/**
|
||||
* @author ZhangJiacheng
|
||||
* @date 2022-05-23
|
||||
*/
|
||||
public class ZookeeperUrlNotFoundException extends RuntimeException{
|
||||
public ZookeeperUrlNotFoundException() {
|
||||
super("Zookeeper url not found");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,86 @@
|
||||
package com.lanyuanxiaoyao.service.common.utils;
|
||||
|
||||
import com.lanyuanxiaoyao.service.common.entity.FlinkJob;
|
||||
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
/**
|
||||
* Flink Job 工具类
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @version 0.0.1
|
||||
* @date 2021-12-08
|
||||
*/
|
||||
public class FlinkJobHelper {
|
||||
public static String allFlinkJobSql(String database) {
|
||||
// language=MySQL
|
||||
return "select tafjc.id,\n" +
|
||||
" tafjc.name,\n" +
|
||||
" tafjc.run_mode,\n" +
|
||||
" tayjc.job_manager_memory,\n" +
|
||||
" tayjc.task_manager_memory\n" +
|
||||
"from `" + database + "`.tb_app_flink_job_config tafjc\n" +
|
||||
" left join\n" +
|
||||
" `" + database + "`.tb_app_yarn_job_config tayjc on tafjc.one_in_one_yarn_job_id = tayjc.id and tayjc.status = 'y'\n" +
|
||||
"where tafjc.status = 'y'";
|
||||
}
|
||||
|
||||
public static String flinkJobSql(String database) {
|
||||
// language=MySQL
|
||||
return "select tafjc.id,\n" +
|
||||
" tafjc.name,\n" +
|
||||
" tafjc.run_mode,\n" +
|
||||
" tayjc.job_manager_memory,\n" +
|
||||
" tayjc.task_manager_memory\n" +
|
||||
"from `" + database + "`.tb_app_flink_job_config tafjc\n" +
|
||||
" left join\n" +
|
||||
" `" + database + "`.tb_app_yarn_job_config tayjc on tafjc.one_in_one_yarn_job_id = tayjc.id and tayjc.status = 'y'\n" +
|
||||
"where tafjc.id = ?\n" +
|
||||
" and tafjc.status = 'y'";
|
||||
}
|
||||
|
||||
public static List<FlinkJob> from(ResultSet rs) throws SQLException {
|
||||
List<FlinkJob> results = new ArrayList<>();
|
||||
while (rs.next()) {
|
||||
String runModeText = rs.getString(3);
|
||||
FlinkJob.RunMode mode;
|
||||
try {
|
||||
mode = FlinkJob.RunMode.valueOf(runModeText);
|
||||
} catch (IllegalArgumentException e) {
|
||||
mode = FlinkJob.RunMode.ALL_IN_ONE;
|
||||
}
|
||||
TableMeta.YarnMeta yarnMeta = TableMeta.YarnMeta.builder()
|
||||
.jobManagerMemory(rs.getInt(4))
|
||||
.taskManagerMemory(rs.getInt(5))
|
||||
.build();
|
||||
results.add(
|
||||
FlinkJob.builder()
|
||||
.id(rs.getLong(1))
|
||||
.name(rs.getString(2))
|
||||
.runMode(mode)
|
||||
.oneInOneSyncYarn(yarnMeta)
|
||||
.build()
|
||||
);
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
public static Optional<FlinkJob> fromOne(ResultSet rs) throws SQLException {
|
||||
List<FlinkJob> results = from(rs);
|
||||
if (results.size() < 1) {
|
||||
return Optional.empty();
|
||||
} else if (results.size() > 1) {
|
||||
throw new SQLException("Found more than 1 records");
|
||||
} else {
|
||||
return Optional.of(results.get(0));
|
||||
}
|
||||
}
|
||||
|
||||
public static boolean isOneInOneMode(FlinkJob job) {
|
||||
return FlinkJob.RunMode.ONE_IN_ONE.equals(job.getRunMode());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,117 @@
|
||||
package com.lanyuanxiaoyao.service.common.utils;
|
||||
|
||||
import cn.hutool.core.collection.ListUtil;
|
||||
import cn.hutool.core.map.MapBuilder;
|
||||
import cn.hutool.core.map.MapUtil;
|
||||
import cn.hutool.core.util.ObjectUtil;
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import com.lanyuanxiaoyao.service.common.Constants;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.MDC;
|
||||
|
||||
/**
|
||||
* 日志相关
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @date 2023-12-25
|
||||
*/
|
||||
public class LogHelper {
|
||||
private static String generateLog(LogPoint point, String template, Object[] args) {
|
||||
if (ObjectUtil.isEmpty(template)) {
|
||||
return point.toString();
|
||||
}
|
||||
if (ObjectUtil.isEmpty(args)) {
|
||||
return point + " " + template;
|
||||
}
|
||||
Object[] items = new Object[args.length + 1];
|
||||
items[0] = point;
|
||||
System.arraycopy(args, 0, items, 1, args.length);
|
||||
return StrUtil.format("{} " + template, items);
|
||||
}
|
||||
|
||||
public static void info(Logger logger, LogPoint point) {
|
||||
logger.info(generateLog(point, null, null));
|
||||
}
|
||||
|
||||
public static void info(Logger logger, LogPoint point, String template) {
|
||||
logger.info(generateLog(point, template, null));
|
||||
}
|
||||
|
||||
public static void info(Logger logger, LogPoint point, String template, Object... args) {
|
||||
logger.info(generateLog(point, template, args));
|
||||
}
|
||||
|
||||
public static void debug(Logger logger, LogPoint point, String template, Object... args) {
|
||||
logger.debug(generateLog(point, template, args));
|
||||
}
|
||||
|
||||
public static void warn(Logger logger, LogPoint point, String template, Object... args) {
|
||||
logger.warn(generateLog(point, template, args));
|
||||
}
|
||||
|
||||
public static void error(Logger logger, LogPoint point, String template, Object... args) {
|
||||
logger.error(generateLog(point, template, args));
|
||||
}
|
||||
|
||||
public static void setMdc(Map<String, String> mdcList) {
|
||||
mdcList.forEach(MDC::put);
|
||||
}
|
||||
|
||||
public static void setMdc(String... items) {
|
||||
if (items.length % 2 != 0) {
|
||||
throw new IllegalArgumentException("Items must key-value");
|
||||
}
|
||||
MapBuilder<String, String> builder = MapUtil.builder();
|
||||
for (int i = 0, j = 1; j < items.length; i++, j++) {
|
||||
builder.put(items[i], items[j]);
|
||||
}
|
||||
setMdc(builder.build());
|
||||
}
|
||||
|
||||
public static void removeMdc(List<String> mdcList) {
|
||||
mdcList.forEach(MDC::remove);
|
||||
}
|
||||
|
||||
public static void removeMdc(String... names) {
|
||||
removeMdc(ListUtil.of(names));
|
||||
}
|
||||
|
||||
public static void setMdcFlinkJobAndAlias(Long flinkJobId, String alias) {
|
||||
setMdc(MapUtil.<String, String>builder()
|
||||
.put(Constants.LOG_FLINK_JOB_ID_LABEL, flinkJobId.toString())
|
||||
.put(Constants.LOG_ALIAS_LABEL, alias)
|
||||
.build());
|
||||
}
|
||||
|
||||
public static void removeMdcFlinkJobAndAlias(Long flinkJobId, String alias) {
|
||||
removeMdc(ListUtil.of(Constants.LOG_FLINK_JOB_ID_LABEL, Constants.LOG_ALIAS_LABEL));
|
||||
}
|
||||
|
||||
public enum LogPoint {
|
||||
PULSAR_SOURCE_CHECKPOINT_INITIAL(100),
|
||||
PULSAR_SOURCE_CHECKPOINT_INITIAL_MESSAGE_ID(101),
|
||||
|
||||
MESSAGE_ID_EMPTY(1),
|
||||
CHECKPOINT_INITIAL(2),
|
||||
CHECKPOINT_INITIAL_MESSAGE_ID(3),
|
||||
PULSAR_SOURCE_BOOTSTRAP_MESSAGE_ID(4),
|
||||
PULSAR_SOURCE_BOOTSTRAP_GET_MESSAGE_ERROR(5),
|
||||
FIELD_TYPE_NOT_FOUND(6),
|
||||
VERSION_UPDATE(7),
|
||||
CHECKPOINT_START(8),
|
||||
CHECKPOINT_COMPLETE(9);
|
||||
|
||||
private final Integer code;
|
||||
|
||||
LogPoint(Integer code) {
|
||||
this.code = code;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("LOP-%06d", code);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
package com.lanyuanxiaoyao.service.common.utils;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Map工具类
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @date 2023-03-20
|
||||
*/
|
||||
public class MapHelper {
|
||||
// 如果大小写同时存在,那么这个忽略大小写的方案就会首先返回大写键的值
|
||||
public static Object getWithoutCase(Map<String, ?> map, String key) {
|
||||
String upperKey = key.toUpperCase(), lowerKey = key.toLowerCase();
|
||||
return map.containsKey(upperKey) ? map.get(upperKey) : map.containsKey(lowerKey) ? map.get(lowerKey) : map.get(key);
|
||||
}
|
||||
|
||||
public static String getStringWithoutCase(Map<String, ?> map, String key) {
|
||||
return (String) getWithoutCase(map, key);
|
||||
}
|
||||
|
||||
public static Integer getIntWithoutCase(Map<String, ?> map, String key) {
|
||||
return (Integer) getWithoutCase(map, key);
|
||||
}
|
||||
|
||||
public static Long getLongWithoutCase(Map<String, ?> map, String key) {
|
||||
return (Long) getWithoutCase(map, key);
|
||||
}
|
||||
|
||||
public static Double getDoubleWithoutCase(Map<String, ?> map, String key) {
|
||||
return (Double) getWithoutCase(map, key);
|
||||
}
|
||||
|
||||
public static Float getFloatWithoutCase(Map<String, ?> map, String key) {
|
||||
return (Float) getWithoutCase(map, key);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,103 @@
|
||||
package com.lanyuanxiaoyao.service.common.utils;
|
||||
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import com.lanyuanxiaoyao.service.common.Constants;
|
||||
|
||||
/**
|
||||
* 命名相关工具
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @date 2022-06-24
|
||||
*/
|
||||
public class NameHelper {
|
||||
public static String pulsarSubscriptionName(Long flinkJobId, String alias) {
|
||||
return Constants.PULSAR_SUBSCRIPTION_NAME_PREFIX + "_" + flinkJobId + "_" + alias + "_20230425";
|
||||
}
|
||||
|
||||
// Sync job name
|
||||
|
||||
public static final String SYNC_JOB_NAME_REGEX = "^Sync_(\\d+?)_(.+)$";
|
||||
|
||||
public static boolean isSyncJob(String name) {
|
||||
return StrUtil.isNotBlank(name) && name.matches(SYNC_JOB_NAME_REGEX);
|
||||
}
|
||||
|
||||
public static String syncJobName(Long flinkJobId, String flinkJobName) {
|
||||
return "Sync_" + flinkJobId + "_" + flinkJobName;
|
||||
}
|
||||
|
||||
public static final String COMPACTION_JOB_NAME_REGEX = "^Compaction_(\\d+?)_(.+?)$";
|
||||
|
||||
public static boolean isCompactionJob(String name) {
|
||||
return StrUtil.isNotBlank(name) && name.matches(COMPACTION_JOB_NAME_REGEX);
|
||||
}
|
||||
|
||||
public static String compactionJobName(Long flinkJobId, String alias) {
|
||||
return "Compaction_" + flinkJobId + "_" + alias;
|
||||
}
|
||||
|
||||
// flink job name
|
||||
|
||||
public static String syncFlinkName(Long flinkJobId, String flinkJobName) {
|
||||
return flinkJobName + " (ID: " + flinkJobId + ")";
|
||||
}
|
||||
|
||||
public static String syncFlinkName(Long flinkJobId, String flinkJobName, String alias) {
|
||||
return flinkJobName + " " + alias + " (ID: " + flinkJobId + ")";
|
||||
}
|
||||
|
||||
public static String compactionFlinkName(Long flinkJobId, String schema, String alias) {
|
||||
return schema + " " + alias + " (ID: " + flinkJobId + ")";
|
||||
}
|
||||
|
||||
// sync state name
|
||||
|
||||
public static String syncStateName(Long flinkJobId, String alias) {
|
||||
return flinkJobId + "-" + alias;
|
||||
}
|
||||
|
||||
// zk lock name
|
||||
public static final String ZK_ROOT_PATH = "/hudi";
|
||||
public static final String ZK_LOCK_PATH = ZK_ROOT_PATH + "/lock";
|
||||
public static final String ZK_LAUNCHER_LOCK_PATH = ZK_LOCK_PATH + "/launcher";
|
||||
public static final String ZK_RUNNING_LOCK_PATH = ZK_LOCK_PATH + "/running";
|
||||
|
||||
public static final String ZK_SYNC_SUFFIX_PATH = "/sync";
|
||||
public static final String ZK_SYNC_LAUNCHER_LOCK_PATH = ZK_LAUNCHER_LOCK_PATH + ZK_SYNC_SUFFIX_PATH;
|
||||
public static final String ZK_SYNC_RUNNING_LOCK_PATH = ZK_RUNNING_LOCK_PATH + ZK_SYNC_SUFFIX_PATH;
|
||||
|
||||
public static String syncLockName(Long flinkJobId, String alias) {
|
||||
if (StrUtil.isNotBlank(alias)) {
|
||||
return "sync_lock_" + flinkJobId + "_" + alias;
|
||||
}
|
||||
return "sync_lock_" + flinkJobId;
|
||||
}
|
||||
|
||||
public static String syncLauncherLockPath(Long flinkJobId) {
|
||||
return ZK_SYNC_LAUNCHER_LOCK_PATH + "/" + syncLockName(flinkJobId, null);
|
||||
}
|
||||
|
||||
public static String syncRunningLockPath(Long flinkJobId) {
|
||||
return syncRunningLockPath(flinkJobId, null);
|
||||
}
|
||||
|
||||
public static String syncRunningLockPath(Long flinkJobId, String alias) {
|
||||
return ZK_SYNC_RUNNING_LOCK_PATH + "/" + syncLockName(flinkJobId, alias);
|
||||
}
|
||||
|
||||
public static final String ZK_COMPACTION_SUFFIX_PATH = "/compaction";
|
||||
public static final String ZK_COMPACTION_LAUNCHER_LOCK_PATH = ZK_LAUNCHER_LOCK_PATH + ZK_COMPACTION_SUFFIX_PATH;
|
||||
public static final String ZK_COMPACTION_RUNNING_LOCK_PATH = ZK_RUNNING_LOCK_PATH + ZK_COMPACTION_SUFFIX_PATH;
|
||||
|
||||
public static String compactionLockName(Long flinkJobId, String alias) {
|
||||
return "compaction_lock_" + flinkJobId + "_" + alias;
|
||||
}
|
||||
|
||||
public static String compactionLauncherLockPath(Long flinkJobId, String alias) {
|
||||
return ZK_COMPACTION_LAUNCHER_LOCK_PATH + "/" + compactionLockName(flinkJobId, alias);
|
||||
}
|
||||
|
||||
public static String compactionRunningLockPath(Long flinkJobId, String alias) {
|
||||
return ZK_COMPACTION_RUNNING_LOCK_PATH + "/" + compactionLockName(flinkJobId, alias);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,127 @@
|
||||
package com.lanyuanxiaoyao.service.common.utils;
|
||||
|
||||
import com.lanyuanxiaoyao.service.common.Constants;
|
||||
import com.lanyuanxiaoyao.service.common.entity.Record;
|
||||
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
|
||||
import java.util.*;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* Record 工具类
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @version 0.0.1
|
||||
* @date 2021-12-03
|
||||
*/
|
||||
public class RecordHelper {
|
||||
public static Boolean isNotVersionUpdateRecord(String record) {
|
||||
return !isVersionUpdateRecord(record);
|
||||
}
|
||||
|
||||
public static Boolean isVersionUpdateRecord(String record) {
|
||||
return record.contains(Constants.VERSION_UPDATE_KEY);
|
||||
}
|
||||
|
||||
public static Boolean isNotVersionUpdateRecord(Record record) {
|
||||
return !isVersionUpdateRecord(record);
|
||||
}
|
||||
|
||||
public static Boolean isVersionUpdateRecord(Record record) {
|
||||
// Record{source=Source{sourceId='versionUpdate', sourceType='null', sourcePos='null', currentTs='2022-11-15 22:17:44'}, statement=Statement{schema='crm_ivpn_cust', table='customer', opStatement='null', opType='version', op='null', opTs='2022-11-15 00:17:43', version='20220925', before=null, after=null}}
|
||||
return Constants.VERSION_UPDATE_KEY.equals(record.getSource().getSourceId())
|
||||
&& Constants.VERSION_KEY.equals(record.getStatement().getOpType());
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取当前的 Statement, 即 source 和 target 选不为空的操作
|
||||
*
|
||||
* @param record 消息
|
||||
* @return 更新的字段内容
|
||||
*/
|
||||
public static Map<String, Object> getCurrentStatement(Record record) {
|
||||
Map<String, Object> before = record.getStatement().getBefore();
|
||||
Map<String, Object> after = record.getStatement().getAfter();
|
||||
return (after == null ? before : after.isEmpty() ? before : after);
|
||||
}
|
||||
|
||||
public static Optional<Map<String, Object>> getBeforeStatement(Record record) {
|
||||
return Optional.ofNullable(record.getStatement().getBefore());
|
||||
}
|
||||
|
||||
public static Optional<Map<String, Object>> getAfterStatement(Record record) {
|
||||
return Optional.ofNullable(record.getStatement().getAfter());
|
||||
}
|
||||
|
||||
private static Boolean isMapEmpty(Map<?, ?> map) {
|
||||
return map == null || map.isEmpty();
|
||||
}
|
||||
|
||||
public static Boolean isChangeField(TableMeta meta, Record record, Function<TableMeta, Optional<String>> fieldGetter) {
|
||||
Map<String, Object> before = record.getStatement().getBefore();
|
||||
Map<String, Object> after = record.getStatement().getAfter();
|
||||
Optional<String> field = fieldGetter.apply(meta);
|
||||
if (isMapEmpty(before) || isMapEmpty(after) || !field.isPresent()) {
|
||||
return false;
|
||||
}
|
||||
Object beforeField = before.getOrDefault(field.get(), null);
|
||||
Object afterField = after.getOrDefault(field.get(), null);
|
||||
if (beforeField == null || afterField == null) {
|
||||
return false;
|
||||
}
|
||||
return !Objects.equals(String.valueOf(beforeField), String.valueOf(afterField));
|
||||
}
|
||||
|
||||
public static Map<String, Object> addExtraMetadata(Map<String, Object> current, TableMeta tableMeta, Record record) {
|
||||
String operationType = record.getStatement().getOpType();
|
||||
return addExtraMetadata(current, tableMeta, record, Constants.DELETE.equals(operationType));
|
||||
}
|
||||
|
||||
public static Map<String, Object> addExtraMetadata(Map<String, Object> current, TableMeta tableMeta, Record record, Boolean isDelete) {
|
||||
Map<String, Object> newMap = new HashMap<>(current);
|
||||
newMap.put(Constants.UNION_KEY_NAME, RecordHelper.createUnionKey(tableMeta, current));
|
||||
newMap.put(Constants.UPDATE_TIMESTAMP_KEY_NAME, SnowFlakeHelper.next());
|
||||
newMap.put(Constants.LATEST_OPERATION_TIMESTAMP_KEY_NAME, record.getStatement().getOpTs());
|
||||
newMap.put(Constants.HUDI_DELETE_KEY_NAME, isDelete);
|
||||
return newMap;
|
||||
}
|
||||
|
||||
public static String createUnionKey(TableMeta tableMeta, Record record) {
|
||||
return createUnionKey(tableMeta, getCurrentStatement(record));
|
||||
}
|
||||
|
||||
private static final String PRIMARY_KEY_NOT_FOUND = "Primary Key Not Found";
|
||||
|
||||
/**
|
||||
* 生成基于主键和分片键的联合主键
|
||||
*
|
||||
* @param tableMeta 表信息
|
||||
* @param fields 字段
|
||||
* @return 联合主键值
|
||||
*/
|
||||
public static String createUnionKey(TableMeta tableMeta, Map<String, Object> fields) {
|
||||
if (tableMeta.getPrimaryKeys().isEmpty()) {
|
||||
throw new RuntimeException(PRIMARY_KEY_NOT_FOUND);
|
||||
}
|
||||
if (Objects.isNull(fields)) {
|
||||
throw new RuntimeException("Fields cannot be null");
|
||||
}
|
||||
List<String> primaryKeys = tableMeta.getPrimaryKeys()
|
||||
.stream()
|
||||
.map(key -> Optional.ofNullable(fields.get(Constants.FIELD_COVERT.apply(tableMeta, key.getName())))
|
||||
.orElseThrow(() -> new RuntimeException(PRIMARY_KEY_NOT_FOUND + " " + fields))
|
||||
.toString())
|
||||
.collect(Collectors.toList());
|
||||
String primaryKey = String.join("-", primaryKeys);
|
||||
if (tableMeta.getPartitionKeys().isEmpty()) {
|
||||
return primaryKey;
|
||||
} else {
|
||||
List<String> partitionKeys = tableMeta.getPartitionKeys()
|
||||
.stream()
|
||||
.map(key -> fields.get(Constants.FIELD_COVERT.apply(tableMeta, key.getName())).toString())
|
||||
.collect(Collectors.toList());
|
||||
String partitionKey = String.join("-", partitionKeys);
|
||||
return primaryKey + "_" + partitionKey;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,72 @@
|
||||
package com.lanyuanxiaoyao.service.common.utils;
|
||||
|
||||
/**
|
||||
* 雪花算(pi)法(jiu)
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @date 2020-06-05
|
||||
*/
|
||||
public class SnowFlakeHelper {
|
||||
/**
|
||||
* 起始的时间戳
|
||||
*/
|
||||
private final static long START_TIMESTAMP = 1;
|
||||
|
||||
/**
|
||||
* 序列号占用的位数
|
||||
*/
|
||||
private final static long SEQUENCE_BIT = 11;
|
||||
|
||||
/**
|
||||
* 序列号最大值
|
||||
*/
|
||||
private final static long MAX_SEQUENCE_BIT = ~(-1 << SEQUENCE_BIT);
|
||||
|
||||
/**
|
||||
* 时间戳值向左位移
|
||||
*/
|
||||
private final static long TIMESTAMP_OFFSET = SEQUENCE_BIT;
|
||||
|
||||
/**
|
||||
* 序列号
|
||||
*/
|
||||
private static long sequence = 0;
|
||||
/**
|
||||
* 上一次时间戳
|
||||
*/
|
||||
private static long lastTimestamp = -1;
|
||||
|
||||
public static synchronized long next() {
|
||||
long currentTimestamp = nowTimestamp();
|
||||
if (currentTimestamp < lastTimestamp) {
|
||||
throw new RuntimeException("Clock have moved backwards.");
|
||||
}
|
||||
|
||||
if (currentTimestamp == lastTimestamp) {
|
||||
// 相同毫秒内, 序列号自增
|
||||
sequence = (sequence + 1) & MAX_SEQUENCE_BIT;
|
||||
// 同一毫秒的序列数已经达到最大
|
||||
if (sequence == 0) {
|
||||
currentTimestamp = nextTimestamp();
|
||||
}
|
||||
} else {
|
||||
// 不同毫秒内, 序列号置为0
|
||||
sequence = 0;
|
||||
}
|
||||
|
||||
lastTimestamp = currentTimestamp;
|
||||
return (currentTimestamp - START_TIMESTAMP) << TIMESTAMP_OFFSET | sequence;
|
||||
}
|
||||
|
||||
private static long nextTimestamp() {
|
||||
long milli = nowTimestamp();
|
||||
while (milli <= lastTimestamp) {
|
||||
milli = nowTimestamp();
|
||||
}
|
||||
return milli;
|
||||
}
|
||||
|
||||
private static long nowTimestamp() {
|
||||
return System.currentTimeMillis();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,82 @@
|
||||
package com.lanyuanxiaoyao.service.common.utils;
|
||||
|
||||
import com.lanyuanxiaoyao.service.common.entity.SyncState;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.sql.Timestamp;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.function.Function;
|
||||
|
||||
/**
|
||||
* @author ZhangJiacheng
|
||||
* @date 2023-04-24
|
||||
*/
|
||||
public class SyncStateHelper {
|
||||
public static String allSyncStateSql(String database) {
|
||||
// language=MySQL
|
||||
return "select info.flink_job_id,\n" +
|
||||
" info.alias,\n" +
|
||||
" state.message_id,\n" +
|
||||
" state.source_start_time,\n" +
|
||||
" state.source_checkpoint_time,\n" +
|
||||
" state.source_publish_time,\n" +
|
||||
" state.source_op_time,\n" +
|
||||
" state.compaction_start_time,\n" +
|
||||
" state.compaction_finish_time,\n" +
|
||||
" state.compaction_application_id,\n" +
|
||||
" state.compaction_status,\n" +
|
||||
" state.compaction_status_time,\n" +
|
||||
" state.compaction_latest_op_ts\n" +
|
||||
"from " + database + ".tb_app_hudi_sync_state state,\n" +
|
||||
" " + database + ".tb_app_collect_table_info info\n" +
|
||||
"where state.id = concat(info.flink_job_id, '-', info.alias)\n" +
|
||||
" and info.status = 'y'";
|
||||
}
|
||||
|
||||
public static String syncStateSql(String database) {
|
||||
// language=MySQL
|
||||
return "select info.flink_job_id,\n" +
|
||||
" info.alias,\n" +
|
||||
" state.message_id,\n" +
|
||||
" state.source_start_time,\n" +
|
||||
" state.source_checkpoint_time,\n" +
|
||||
" state.source_publish_time,\n" +
|
||||
" state.source_op_time,\n" +
|
||||
" state.compaction_start_time,\n" +
|
||||
" state.compaction_finish_time,\n" +
|
||||
" state.compaction_application_id,\n" +
|
||||
" state.compaction_status,\n" +
|
||||
" state.compaction_status_time,\n" +
|
||||
" state.compaction_latest_op_ts\n" +
|
||||
"from " + database + ".tb_app_hudi_sync_state state,\n" +
|
||||
" " + database + ".tb_app_collect_table_info info\n" +
|
||||
"where state.id = concat(info.flink_job_id, '-', info.alias)\n" +
|
||||
" and info.flink_job_id = ?\n" +
|
||||
" and info.alias = ?\n" +
|
||||
" and info.status = 'y'";
|
||||
}
|
||||
|
||||
public static List<SyncState> from(ResultSet rs) throws SQLException {
|
||||
List<SyncState> results = new ArrayList<>();
|
||||
Function<Timestamp, Long> dateConvertor = timestamp -> timestamp == null ? 0 : timestamp.getTime();
|
||||
while (rs.next()) {
|
||||
results.add(SyncState.builder()
|
||||
.flinkJobId(rs.getLong(1))
|
||||
.alias(rs.getString(2))
|
||||
.messageId(rs.getString(3))
|
||||
.sourceStartTime(dateConvertor.apply(rs.getTimestamp(4)))
|
||||
.sourceCheckpointTime(dateConvertor.apply(rs.getTimestamp(5)))
|
||||
.sourcePublishTime(dateConvertor.apply(rs.getTimestamp(6)))
|
||||
.sourceOperationTime(dateConvertor.apply(rs.getTimestamp(7)))
|
||||
.compactionStartTime(dateConvertor.apply(rs.getTimestamp(8)))
|
||||
.compactionFinishTime(dateConvertor.apply(rs.getTimestamp(9)))
|
||||
.compactionApplicationId(rs.getString(10))
|
||||
.compactionStatus(rs.getString(11))
|
||||
.compactionStatusTime(dateConvertor.apply(rs.getTimestamp(12)))
|
||||
.compactionLatestOperationTime(dateConvertor.apply(rs.getTimestamp(13)))
|
||||
.build());
|
||||
}
|
||||
return results;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,576 @@
|
||||
package com.lanyuanxiaoyao.service.common.utils;
|
||||
|
||||
import cn.hutool.json.JSONObject;
|
||||
import cn.hutool.json.JSONUtil;
|
||||
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
|
||||
import com.lanyuanxiaoyao.service.common.exception.ConfigException;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* Table Meta 工具类
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @version 0.0.1
|
||||
* @date 2021-12-01
|
||||
*/
|
||||
public class TableMetaHelper {
|
||||
//private static final AES AES = new AES(Mode.CBC, Padding.NoPadding, "6fa22c779ec14b98".getBytes(), "6fa22c779ec14b98".getBytes());
|
||||
|
||||
public static String tableMetaSql(String database) {
|
||||
return tableMetaSql(database, true, false);
|
||||
}
|
||||
|
||||
/*
|
||||
SqlBuilder.select(
|
||||
DataSource.DS_NAME,
|
||||
DataSource.SCHEMA_NAME,
|
||||
DataSourceTable.TABLE_NAME,
|
||||
DataSourceTable.TABLE_TYPE,
|
||||
DataSourceTableField.FIELD_NAME,
|
||||
DataSourceTableField.FIELD_SEQ,
|
||||
DataSourceTableField.FIELD_TYPE,
|
||||
DataSourceTableField.PRIMARY_KEY,
|
||||
DataSourceTableField.PARTITION_KEY,
|
||||
DataSourceTableField.LENGTH,
|
||||
TbAppCollectTableInfo.TGT_DB,
|
||||
TbAppCollectTableInfo.TGT_TABLE,
|
||||
TbAppCollectTableInfo.TGT_TABLE_TYPE,
|
||||
TbAppCollectTableInfo.TGT_HDFS_PATH,
|
||||
TbAppHudiJobConfig.WRITE_TASKS,
|
||||
TbAppHudiJobConfig.WRITE_OPERATION,
|
||||
TbAppHudiJobConfig.WRITE_TASK_MAX_MEMORY,
|
||||
TbAppHudiJobConfig.WRITE_BATCH_SIZE,
|
||||
TbAppHudiJobConfig.WRITE_RATE_LIMIT,
|
||||
TbAppCollectTableInfo.BUCKET_NUMBER,
|
||||
TbAppHudiJobConfig.COMPACTION_STRATEGY,
|
||||
TbAppHudiJobConfig.COMPACTION_TASKS,
|
||||
TbAppHudiJobConfig.COMPACTION_DELTA_COMMITS,
|
||||
TbAppHudiJobConfig.COMPACTION_DELTA_SECONDS,
|
||||
TbAppHudiJobConfig.COMPACTION_ASYNC_ENABLED,
|
||||
TbAppHudiJobConfig.COMPACTION_MAX_MEMORY,
|
||||
TbAppHudiJobConfig.CONFIGS,
|
||||
TbAppCollectTableInfo.FILTER_FIELD,
|
||||
TbAppCollectTableInfo.FILTER_VALUES,
|
||||
TbAppCollectTableInfo.FILTER_TYPE,
|
||||
TbAppCollectTableInfo.SRC_TOPIC,
|
||||
TbAppCollectTableInfo.SRC_PULSAR_ADDR,
|
||||
Alias.of(TbAppYarnJobConfigSync.JOB_MANAGER_MEMORY, "sync_job_manager_memory"),
|
||||
Alias.of(TbAppYarnJobConfigSync.TASK_MANAGER_MEMORY, "sync_task_manager_memory"),
|
||||
Alias.of(TbAppYarnJobConfigCompaction.JOB_MANAGER_MEMORY, "compaction_job_manager_memory"),
|
||||
Alias.of(TbAppYarnJobConfigCompaction.TASK_MANAGER_MEMORY, "compaction_task_manger_momory"),
|
||||
TbAppCollectTableInfo.PARTITION_FIELD,
|
||||
TbAppHudiSyncState.MESSAGE_ID,
|
||||
TbAppGlobalConfig.METRIC_PUBLISH_URL,
|
||||
TbAppGlobalConfig.METRIC_PROMETHEUS_URL,
|
||||
TbAppGlobalConfig.METRIC_API_URL,
|
||||
TbAppGlobalConfig.METRIC_PUBLISH_DELAY,
|
||||
TbAppGlobalConfig.METRIC_PUBLISH_PERIOD,
|
||||
TbAppGlobalConfig.METRIC_PUBLISH_TIMEOUT,
|
||||
TbAppGlobalConfig.METRIC_PUBLISH_BATCH,
|
||||
Alias.of(TbAppFlinkJobConfig.ID, "job_id"),
|
||||
Alias.of(TbAppFlinkJobConfig.NAME, "job_name"),
|
||||
TbAppGlobalConfig.CHECKPOINT_ROOT_PATH,
|
||||
TbAppHudiJobConfig.SOURCE_TASKS,
|
||||
TbAppCollectTableInfo.ALIAS,
|
||||
DataSource.CONNECTION,
|
||||
TbAppCollectTableInfo.PRIORITY,
|
||||
DataSource.DS_TYPE,
|
||||
TbAppHudiJobConfig.KEEP_FILE_VERSION,
|
||||
TbAppHudiJobConfig.KEEP_COMMIT_VERSION,
|
||||
TbAppCollectTableInfo.TAGS,
|
||||
TbAppGlobalConfig.ZK_URL
|
||||
)
|
||||
.from(
|
||||
DataSource._alias_,
|
||||
DataSourceTable._alias_,
|
||||
DataSourceTableField._alias_,
|
||||
TbAppFlinkJobConfig._alias_,
|
||||
TbAppHudiJobConfig._alias_,
|
||||
TbAppYarnJobConfigSync._alias_,
|
||||
TbAppYarnJobConfigCompaction._alias_,
|
||||
TbAppGlobalConfig._alias_,
|
||||
TbAppCollectTableInfo._alias_
|
||||
)
|
||||
.leftJoin(TbAppHudiSyncState._alias_)
|
||||
.onEq(TbAppHudiSyncState.ID, Column.as(StrUtil.format("concat({}, '-', {})", TbAppCollectTableInfo.FLINK_JOB_ID, TbAppCollectTableInfo.ALIAS)))
|
||||
.whereEq(DataSource.DS_ROLE, "src")
|
||||
.andEq(DataSource.DS_STATE, "y")
|
||||
.andEq(DataSource.RECORD_STATE, "y")
|
||||
.andEq(DataSourceTable.DS_ID, Column.as(DataSource.DS_ID))
|
||||
.andEq(DataSourceTable.RECORD_STATE, "y")
|
||||
.andEq(DataSourceTableField.TABLE_ID, Column.as(DataSourceTable.TABLE_ID))
|
||||
.andEq(DataSourceTableField.RECORD_STATE, "y")
|
||||
.andIn(DataSource.DS_TYPE, "udal", "telepg")
|
||||
.andEq(DataSource.DS_NAME, Column.as(TbAppCollectTableInfo.SRC_DB))
|
||||
.andEq(DataSource.SCHEMA_NAME, Column.as(TbAppCollectTableInfo.SRC_SCHEMA))
|
||||
.andEq(DataSourceTable.TABLE_NAME, Column.as(TbAppCollectTableInfo.SRC_TABLE))
|
||||
.andEq(TbAppCollectTableInfo.FLINK_JOB_ID, Column.as(TbAppFlinkJobConfig.ID))
|
||||
.andEq(TbAppCollectTableInfo.HUDI_JOB_ID, Column.as(TbAppHudiJobConfig.ID))
|
||||
.andEq(TbAppCollectTableInfo.SYNC_YARN_JOB_ID, Column.as(TbAppYarnJobConfigSync.ID))
|
||||
.andEq(TbAppCollectTableInfo.COMPACTION_YARN_JOB_ID, Column.as(TbAppYarnJobConfigCompaction.ID))
|
||||
.andEq(TbAppCollectTableInfo.CONFIG_ID, Column.as(TbAppGlobalConfig.ID))
|
||||
.andEq(TbAppFlinkJobConfig.ID, 1542097984132706304L)
|
||||
.andEq(TbAppCollectTableInfo.ALIAS, "crm_cfguse_channel")
|
||||
.andEq(TbAppCollectTableInfo.STATUS, "y")
|
||||
.andEq(TbAppFlinkJobConfig.STATUS, "y")
|
||||
.andEq(TbAppHudiJobConfig.STATUS, "y")
|
||||
.andEq(TbAppYarnJobConfigSync.STATUS, "y")
|
||||
.andEq(TbAppYarnJobConfigCompaction.STATUS, "y")
|
||||
.orderBy(DataSourceTableField.FIELD_SEQ)
|
||||
.build()
|
||||
*/
|
||||
public static String tableMetaSql(String database, Boolean filterByFlinkJobId, Boolean filterByAlias) {
|
||||
// language=MySQL
|
||||
return "select dst.ds_name,\n" +
|
||||
" dst.schema_name,\n" +
|
||||
" dst.table_name,\n" +
|
||||
" dst.table_type,\n" +
|
||||
" dstf.field_name,\n" +
|
||||
" dstf.field_seq,\n" +
|
||||
" dstf.field_type,\n" +
|
||||
" dstf.primary_key,\n" +
|
||||
" dstf.partition_key,\n" +
|
||||
" dstf.length,\n" +
|
||||
" tacti.tgt_db,\n" +
|
||||
" tacti.tgt_table,\n" +
|
||||
" tacti.tgt_table_type,\n" +
|
||||
" tacti.tgt_hdfs_path,\n" +
|
||||
" tajhc.write_tasks,\n" +
|
||||
" tajhc.write_operation,\n" +
|
||||
" tajhc.write_task_max_memory,\n" +
|
||||
" tajhc.write_batch_size,\n" +
|
||||
" tajhc.write_rate_limit,\n" +
|
||||
" tacti.bucket_number,\n" +
|
||||
" tajhc.compaction_strategy,\n" +
|
||||
" tajhc.compaction_tasks,\n" +
|
||||
" tajhc.compaction_delta_commits,\n" +
|
||||
" tajhc.compaction_delta_seconds,\n" +
|
||||
" tajhc.compaction_async_enabled,\n" +
|
||||
" tajhc.compaction_max_memory,\n" +
|
||||
" tajhc.configs,\n" +
|
||||
" tacti.filter_field,\n" +
|
||||
" tacti.filter_values,\n" +
|
||||
" tacti.filter_type,\n" +
|
||||
" tacti.src_topic,\n" +
|
||||
" tacti.src_pulsar_addr,\n" +
|
||||
" tayjc_sync.job_manager_memory as sync_job_manager_memory,\n" +
|
||||
" tayjc_sync.task_manager_memory as sync_task_manager_memory,\n" +
|
||||
" tayjc_compaction.job_manager_memory as compaction_job_manager_memory,\n" +
|
||||
" tayjc_compaction.task_manager_memory as compaction_task_manger_momory,\n" +
|
||||
" tacti.partition_field,\n" +
|
||||
" tahss.message_id,\n" +
|
||||
" tagc.metric_publish_url,\n" +
|
||||
" tagc.metric_prometheus_url,\n" +
|
||||
" tagc.metric_api_url,\n" +
|
||||
" tagc.metric_publish_delay,\n" +
|
||||
" tagc.metric_publish_period,\n" +
|
||||
" tagc.metric_publish_timeout,\n" +
|
||||
" tagc.metric_publish_batch,\n" +
|
||||
" tafjc.id as job_id,\n" +
|
||||
" tafjc.name as job_name,\n" +
|
||||
" tagc.checkpoint_root_path,\n" +
|
||||
" tajhc.source_tasks,\n" +
|
||||
" tacti.alias,\n" +
|
||||
" dst.connection,\n" +
|
||||
" tacti.priority,\n" +
|
||||
" dst.ds_type,\n" +
|
||||
" tajhc.keep_file_version,\n" +
|
||||
" tajhc.keep_commit_version,\n" +
|
||||
" tacti.tags,\n" +
|
||||
" tagc.zk_url,\n" +
|
||||
" tacti.version,\n" +
|
||||
" dstf.scale\n" +
|
||||
"from `" + database + "`.tb_app_collect_table_info tacti\n" +
|
||||
" left join `" + database + "`.tb_app_hudi_sync_state tahss\n" +
|
||||
" on tahss.id = concat(tacti.flink_job_id, '-', tacti.alias),\n" +
|
||||
" `" + database + "`.tb_app_flink_job_config tafjc,\n" +
|
||||
" `" + database + "`.tb_app_hudi_job_config tajhc,\n" +
|
||||
" `" + database + "`.tb_app_yarn_job_config tayjc_sync,\n" +
|
||||
" `" + database + "`.tb_app_yarn_job_config tayjc_compaction,\n" +
|
||||
" `" + database + "`.tb_app_global_config tagc,\n" +
|
||||
" `" + database + "`.tb_app_hudi_compaction_schedule tahcs,\n" +
|
||||
" `iap-datahub`.data_source_table_field dstf,\n" +
|
||||
" (select ds.*, dst.table_id, dst.table_name, dst.table_type\n" +
|
||||
" from `iap-datahub`.data_source_table dst,\n" +
|
||||
" (select ds.ds_id, ds.ds_name, ds.ds_type, ds.schema_name, ds.connection\n" +
|
||||
" from `iap-datahub`.data_source ds\n" +
|
||||
" where ds.ds_role = 'src'\n" +
|
||||
" and ds.ds_state = 'y'\n" +
|
||||
" and ds.record_state = 'y') ds\n" +
|
||||
" where dst.ds_id = ds.ds_id\n" +
|
||||
" and dst.record_state = 'y') dst\n" +
|
||||
"where dstf.table_id = dst.table_id\n" +
|
||||
" and dstf.record_state = 'y'\n" +
|
||||
" and dst.ds_type in ('udal', 'telepg')\n" +
|
||||
" and dst.ds_name = tacti.src_db\n" +
|
||||
" and dst.schema_name = tacti.src_schema\n" +
|
||||
" and dst.table_name = tacti.src_table\n" +
|
||||
" and tacti.flink_job_id = tafjc.id\n" +
|
||||
" and tacti.hudi_job_id = tajhc.id\n" +
|
||||
" and tacti.sync_yarn_job_id = tayjc_sync.id\n" +
|
||||
" and tacti.compaction_yarn_job_id = tayjc_compaction.id\n" +
|
||||
" and tacti.config_id = tagc.id\n" +
|
||||
" and tacti.schedule_id = tahcs.id\n" +
|
||||
(filterByFlinkJobId ? " and tafjc.id = ?\n" : "") +
|
||||
(filterByAlias ? " and tacti.alias = ?\n" : "") +
|
||||
" and tacti.status = 'y'\n" +
|
||||
" and tafjc.status = 'y'\n" +
|
||||
" and tajhc.status = 'y'\n" +
|
||||
" and tayjc_sync.status = 'y'\n" +
|
||||
" and tayjc_compaction.status = 'y'\n" +
|
||||
"order by dstf.field_seq;";
|
||||
}
|
||||
|
||||
public static List<TableMeta> from(ResultSet rs) throws SQLException {
|
||||
List<TableMeta> results = new ArrayList<>();
|
||||
List<TableMeta.RowMeta> metaList = new ArrayList<>();
|
||||
while (rs.next( )) {
|
||||
metaList.add(
|
||||
TableMeta.RowMeta.builder()
|
||||
.dsName(rs.getString(1))
|
||||
.schemaName(rs.getString(2))
|
||||
.tableName(rs.getString(3))
|
||||
.tableType(rs.getString(4))
|
||||
.fieldName(rs.getString(5))
|
||||
.fieldSeq(rs.getInt(6))
|
||||
.fieldType(rs.getString(7))
|
||||
.primaryKey(rs.getString(8))
|
||||
.partitionKey(rs.getString(9))
|
||||
.length(rs.getLong(10))
|
||||
.tgtDb(rs.getString(11))
|
||||
.tgtTable(rs.getString(12))
|
||||
.tgtTableType(rs.getString(13))
|
||||
.tgtHdfsPath(rs.getString(14))
|
||||
.writeTasks(rs.getInt(15))
|
||||
.writeOperation(rs.getString(16))
|
||||
.writeTaskMaxMemory(rs.getInt(17))
|
||||
.writeBatchSize(rs.getInt(18))
|
||||
.writeRateLimit(rs.getInt(19))
|
||||
.bucketIndexNumber(rs.getInt(20))
|
||||
.compactionStrategy(rs.getString(21))
|
||||
.compactionTasks(rs.getInt(22))
|
||||
.compactionDeltaCommits(rs.getInt(23))
|
||||
.compactionDeltaSeconds(rs.getInt(24))
|
||||
.compactionAsyncEnabled(rs.getString(25))
|
||||
.compactionMaxMemory(rs.getInt(26))
|
||||
.configs(rs.getString(27))
|
||||
.filterField(rs.getString(28))
|
||||
.filterValues(rs.getString(29))
|
||||
.filterType(rs.getString(30))
|
||||
.topic(rs.getString(31))
|
||||
.pulsarAddress(rs.getString(32))
|
||||
.syncJobManagerMemory(rs.getInt(33))
|
||||
.syncTaskManagerMemory(rs.getInt(34))
|
||||
.compactionJobManagerMemory(rs.getInt(35))
|
||||
.compactionTaskManagerMemory(rs.getInt(36))
|
||||
.partitionField(rs.getString(37))
|
||||
.messageId(rs.getString(38))
|
||||
.metricPublishUrl(rs.getString(39))
|
||||
.metricPrometheusUrl(rs.getString(40))
|
||||
.metricApiUrl(rs.getString(41))
|
||||
.metricPublishDelay(rs.getInt(42))
|
||||
.metricPublishPeriod(rs.getInt(43))
|
||||
.metricPublishTimeout(rs.getInt(44))
|
||||
.metricPublishBatch(rs.getInt(45))
|
||||
.jobId(rs.getLong(46))
|
||||
.jobName(rs.getString(47))
|
||||
.checkpointRootPath(rs.getString(48))
|
||||
.sourceTasks(rs.getInt(49))
|
||||
.alias(rs.getString(50))
|
||||
.connection(rs.getString(51))
|
||||
.priority(rs.getInt(52))
|
||||
.sourceType(rs.getString(53))
|
||||
.keepFileVersion(rs.getInt(54))
|
||||
.keepCommitVersion(rs.getInt(55))
|
||||
.tags(rs.getString(56))
|
||||
.zookeeperUrl(rs.getString(57))
|
||||
.version(rs.getInt(58))
|
||||
.scala(rs.getInt(59))
|
||||
.build()
|
||||
);
|
||||
}
|
||||
metaList.stream()
|
||||
.collect(Collectors.groupingBy(TableMeta.RowMeta::getAlias))
|
||||
.values()
|
||||
.stream()
|
||||
.flatMap(schemaRowMetas -> schemaRowMetas
|
||||
.stream()
|
||||
.collect(Collectors.groupingBy(TableMeta.RowMeta::getTableName))
|
||||
.values()
|
||||
.stream()
|
||||
.map(tableRowMetas -> {
|
||||
try {
|
||||
return fromRowMetas(tableRowMetas);
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}))
|
||||
.forEach(results::add);
|
||||
return results;
|
||||
}
|
||||
|
||||
private static void checkMoreThanOne(String fieldName, Collection<?> collection) throws ConfigException {
|
||||
ConfigException.check(fieldName + " cannot be more than 1", () -> collection.size() > 1);
|
||||
}
|
||||
|
||||
private static void checkEmpty(String fieldName, Collection<?> collection) throws ConfigException {
|
||||
ConfigException.check(fieldName + " cannot be empty", collection::isEmpty);
|
||||
}
|
||||
|
||||
private static void checkEmptyOrMoreThanOne(String fieldName, Collection<?> collection) throws ConfigException {
|
||||
checkEmpty(fieldName, collection);
|
||||
checkMoreThanOne(fieldName, collection);
|
||||
}
|
||||
|
||||
public static TableMeta fromRowMetas(List<TableMeta.RowMeta> metaList) throws Exception {
|
||||
List<String> aliasList = metaList.stream()
|
||||
.map(TableMeta.RowMeta::getAlias)
|
||||
.distinct()
|
||||
.collect(Collectors.toList());
|
||||
checkEmptyOrMoreThanOne("alias", aliasList);
|
||||
String alias = aliasList.get(0);
|
||||
List<String> sourceTypeList = metaList.stream()
|
||||
.map(TableMeta.RowMeta::getSourceType)
|
||||
.distinct()
|
||||
.collect(Collectors.toList());
|
||||
checkEmptyOrMoreThanOne("source_type", sourceTypeList);
|
||||
String sourceTypeText = sourceTypeList.get(0).toUpperCase();
|
||||
TableMeta.SourceType sourceType;
|
||||
try {
|
||||
sourceType = TableMeta.SourceType.valueOf(sourceTypeText);
|
||||
} catch (IllegalArgumentException e) {
|
||||
throw new Exception("Cannot parse source type " + sourceTypeText);
|
||||
}
|
||||
List<String> dsNames = metaList.stream()
|
||||
.map(TableMeta.RowMeta::getDsName)
|
||||
.distinct()
|
||||
.collect(Collectors.toList());
|
||||
checkEmptyOrMoreThanOne("ds_name", dsNames);
|
||||
String dataSource = dsNames.get(0);
|
||||
List<String> schemaNames = metaList.stream()
|
||||
.map(TableMeta.RowMeta::getSchemaName)
|
||||
.distinct()
|
||||
.collect(Collectors.toList());
|
||||
checkEmptyOrMoreThanOne("schema_name", schemaNames);
|
||||
String schema = schemaNames.get(0);
|
||||
List<String> tableNames = metaList.stream()
|
||||
.map(TableMeta.RowMeta::getTableName)
|
||||
.distinct()
|
||||
.collect(Collectors.toList());
|
||||
// 每次只能获取 1 张表的元信息
|
||||
checkMoreThanOne("table_name", tableNames);
|
||||
checkEmpty("table_name", tableNames);
|
||||
String table = tableNames.get(0);
|
||||
List<String> tableTypes = metaList.stream()
|
||||
.map(TableMeta.RowMeta::getTableType)
|
||||
.distinct()
|
||||
.collect(Collectors.toList());
|
||||
checkEmptyOrMoreThanOne("table_type", tableTypes);
|
||||
String type = tableTypes.get(0);
|
||||
List<String> filterFields = metaList.stream()
|
||||
.map(TableMeta.RowMeta::getFilterField)
|
||||
.distinct()
|
||||
.collect(Collectors.toList());
|
||||
checkEmptyOrMoreThanOne("filter_field", filterFields);
|
||||
String filterField = filterFields.get(0);
|
||||
List<String> filterValueList = metaList.stream()
|
||||
.map(TableMeta.RowMeta::getFilterValues)
|
||||
.distinct()
|
||||
.collect(Collectors.toList());
|
||||
checkEmptyOrMoreThanOne("filter_values", filterValueList);
|
||||
String filterValuesText = filterValueList.get(0);
|
||||
List<String> filterValues = (filterValuesText == null || filterValuesText.isEmpty())
|
||||
? Collections.emptyList()
|
||||
: Arrays.asList(filterValuesText.split(","));
|
||||
List<String> filterTypes = metaList.stream()
|
||||
.map(TableMeta.RowMeta::getFilterType)
|
||||
.distinct()
|
||||
.collect(Collectors.toList());
|
||||
checkEmptyOrMoreThanOne("filter_field", filterFields);
|
||||
TableMeta.FilterType filterType;
|
||||
try {
|
||||
filterType = TableMeta.FilterType.valueOf(filterTypes.get(0));
|
||||
} catch (IllegalArgumentException e) {
|
||||
filterType = TableMeta.FilterType.NONE;
|
||||
}
|
||||
List<String> topics = metaList.stream()
|
||||
.map(TableMeta.RowMeta::getTopic)
|
||||
.distinct()
|
||||
.collect(Collectors.toList());
|
||||
checkEmptyOrMoreThanOne("topic", topics);
|
||||
String topic = topics.get(0);
|
||||
List<String> pulsarAddresses = metaList.stream()
|
||||
.map(TableMeta.RowMeta::getPulsarAddress)
|
||||
.distinct()
|
||||
.collect(Collectors.toList());
|
||||
checkEmptyOrMoreThanOne("pulsar address", pulsarAddresses);
|
||||
String pulsarAddress = pulsarAddresses.get(0);
|
||||
List<Integer> priorities = metaList.stream()
|
||||
.map(TableMeta.RowMeta::getPriority)
|
||||
.distinct()
|
||||
.collect(Collectors.toList());
|
||||
checkEmptyOrMoreThanOne("priority", priorities);
|
||||
Integer priority = priorities.get(0);
|
||||
List<String> tagTexts = metaList.stream()
|
||||
.map(TableMeta.RowMeta::getTags)
|
||||
.distinct()
|
||||
.collect(Collectors.toList());
|
||||
checkEmptyOrMoreThanOne("tags", tagTexts);
|
||||
String tagText = tagTexts.get(0) == null ? "" : tagTexts.get(0);
|
||||
List<String> tags = Arrays.asList(tagText.split(","));
|
||||
List<Integer> versions = metaList.stream()
|
||||
.map(TableMeta.RowMeta::getVersion)
|
||||
.distinct()
|
||||
.collect(Collectors.toList());
|
||||
checkEmptyOrMoreThanOne("version", versions);
|
||||
Integer version = versions.get(0);
|
||||
|
||||
// 获取 Hudi 配置, 因为查出来同一张表的配置都相同, 所以直接取第一条即可
|
||||
TableMeta.RowMeta example = metaList.get(0);
|
||||
TableMeta.HudiMeta hudiMeta = TableMeta.HudiMeta.builder()
|
||||
.targetDataSource(example.getTgtDb())
|
||||
.targetTable(example.getTgtTable())
|
||||
.targetTableType(example.getTgtTableType())
|
||||
.targetHdfsPath(example.getTgtHdfsPath())
|
||||
.sourceTasks(example.getSourceTasks())
|
||||
.writeTasks(example.getWriteTasks())
|
||||
.writeOperation(example.getWriteOperation())
|
||||
.writeTaskMaxMemory(example.getWriteTaskMaxMemory())
|
||||
.writeBatchSize(example.getWriteBatchSize())
|
||||
.writeRateLimit(example.getWriteRateLimit())
|
||||
.bucketIndexNumber(example.getBucketIndexNumber())
|
||||
.compactionStrategy(example.getCompactionStrategy())
|
||||
.compactionTasks(example.getCompactionTasks())
|
||||
.compactionDeltaCommits(example.getCompactionDeltaCommits())
|
||||
.compactionDeltaSeconds(example.getCompactionDeltaSeconds())
|
||||
.compactionAsyncEnabled(example.getCompactionAsyncEnabled())
|
||||
.compactionMaxMemory(example.getCompactionMaxMemory())
|
||||
.configs(example.getConfigs())
|
||||
.keepFileVersion(example.getKeepFileVersion())
|
||||
.keepCommitVersion(example.getKeepCommitVersion())
|
||||
.build();
|
||||
TableMeta.YarnMeta syncYarnMeta = TableMeta.YarnMeta.builder()
|
||||
.jobManagerMemory(example.getSyncJobManagerMemory())
|
||||
.taskManagerMemory(example.getSyncTaskManagerMemory())
|
||||
.build();
|
||||
TableMeta.YarnMeta compactionYarnMeta = TableMeta.YarnMeta.builder()
|
||||
.jobManagerMemory(example.getCompactionJobManagerMemory())
|
||||
.taskManagerMemory(example.getCompactionTaskManagerMemory())
|
||||
.build();
|
||||
TableMeta.ConfigMeta configMeta = TableMeta.ConfigMeta.builder()
|
||||
.messageId(example.getMessageId())
|
||||
.metricPublishUrl(example.getMetricPublishUrl())
|
||||
.metricPrometheusUrl(example.getMetricPrometheusUrl())
|
||||
.metricApiUrl(example.getMetricApiUrl())
|
||||
.metricPublishDelay(example.getMetricPublishDelay())
|
||||
.metricPublishPeriod(example.getMetricPublishPeriod())
|
||||
.metricPublishTimeout(example.getMetricPublishTimeout())
|
||||
.metricPublishBatch(example.getMetricPublishBatch())
|
||||
.checkpointRootPath(example.getCheckpointRootPath())
|
||||
.zookeeperUrl(example.getZookeeperUrl())
|
||||
.build();
|
||||
TableMeta.JobMeta jobMeta = TableMeta.JobMeta.builder()
|
||||
.id(example.getJobId())
|
||||
.name(example.getJobName())
|
||||
.build();
|
||||
|
||||
TableMeta.ConnectionMeta connectionMeta = null;
|
||||
String connectionText = example.getConnection();
|
||||
if (connectionText != null && !connectionText.isEmpty()) {
|
||||
JSONObject connectionObj = JSONUtil.parseObj(connectionText);
|
||||
connectionMeta = TableMeta.ConnectionMeta.builder()
|
||||
.url(connectionObj.getStr("jdbc_url"))
|
||||
.user(connectionObj.getStr("jdbc_user"))
|
||||
.password(connectionObj.getStr("jdbc_password"))
|
||||
.driver(connectionObj.getStr("jdbc_driver"))
|
||||
.build();
|
||||
}
|
||||
|
||||
List<String> partitionFields = metaList.stream()
|
||||
.map(TableMeta.RowMeta::getPartitionField)
|
||||
.distinct()
|
||||
.collect(Collectors.toList());
|
||||
checkEmptyOrMoreThanOne("partition_field", filterFields);
|
||||
String partitionField = partitionFields.get(0);
|
||||
|
||||
List<TableMeta.FieldMeta> primaryKeys = new ArrayList<>(), partitionKeys = new ArrayList<>();
|
||||
List<TableMeta.FieldMeta> fieldMetaList = new ArrayList<>(metaList.size());
|
||||
for (TableMeta.RowMeta rowMeta : metaList) {
|
||||
boolean isPrimaryKey = "y".equals(rowMeta.getPrimaryKey());
|
||||
boolean isPartitionKey = "y".equals(rowMeta.getPartitionKey());
|
||||
TableMeta.FieldMeta fieldMeta = TableMeta.FieldMeta.builder()
|
||||
.name(rowMeta.getFieldName().toUpperCase(Locale.ROOT))
|
||||
.sequence(rowMeta.getFieldSeq())
|
||||
.type(rowMeta.getFieldType())
|
||||
.isPrimaryKey(isPrimaryKey)
|
||||
.partitionKey(isPartitionKey)
|
||||
.length(rowMeta.getLength())
|
||||
.scala(rowMeta.getScala())
|
||||
.build();
|
||||
if (isPrimaryKey) {
|
||||
primaryKeys.add(fieldMeta);
|
||||
}
|
||||
if (isPartitionKey) {
|
||||
partitionKeys.add(fieldMeta);
|
||||
}
|
||||
fieldMetaList.add(fieldMeta);
|
||||
}
|
||||
return TableMeta.builder()
|
||||
.alias(alias)
|
||||
.source(dataSource)
|
||||
.schema(schema)
|
||||
.table(table)
|
||||
.type(type)
|
||||
.primaryKeys(primaryKeys)
|
||||
.partitionKeys(partitionKeys)
|
||||
.hudi(hudiMeta)
|
||||
.fields(fieldMetaList)
|
||||
.filterField(filterField)
|
||||
.filterValues(filterValues)
|
||||
.filterType(filterType)
|
||||
.topic(topic)
|
||||
.pulsarAddress(pulsarAddress)
|
||||
.syncYarn(syncYarnMeta)
|
||||
.compactionYarn(compactionYarnMeta)
|
||||
.partitionField(partitionField)
|
||||
.config(configMeta)
|
||||
.job(jobMeta)
|
||||
.connection(connectionMeta)
|
||||
.priority(priority)
|
||||
.sourceType(sourceType)
|
||||
.tags(tags)
|
||||
.version(version)
|
||||
.build();
|
||||
}
|
||||
|
||||
public static Optional<String> getPartitionField(TableMeta meta) {
|
||||
if (meta.getPartitionField() == null || "".equals(meta.getPartitionField())) {
|
||||
return Optional.empty();
|
||||
} else {
|
||||
return meta.getFields()
|
||||
.stream()
|
||||
.map(TableMeta.FieldMeta::getName)
|
||||
.filter(name -> meta.getPartitionField().equalsIgnoreCase(name))
|
||||
.findFirst();
|
||||
}
|
||||
}
|
||||
|
||||
public static Optional<String> getFilterField(TableMeta meta) {
|
||||
if (meta.getFilterField() == null || "".equals(meta.getFilterField())) {
|
||||
return Optional.empty();
|
||||
} else {
|
||||
return meta.getFields()
|
||||
.stream()
|
||||
.map(TableMeta.FieldMeta::getName)
|
||||
.filter(name -> meta.getFilterField().equalsIgnoreCase(name))
|
||||
.findFirst();
|
||||
}
|
||||
}
|
||||
|
||||
public static boolean existsTag(TableMeta meta, String tag) {
|
||||
return meta.getTags() != null && meta.getTags().contains(tag);
|
||||
}
|
||||
}
|
||||
161
utils/executor/pom.xml
Normal file
161
utils/executor/pom.xml
Normal file
@@ -0,0 +1,161 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<parent>
|
||||
<groupId>com.lanyuanxiaoyao</groupId>
|
||||
<artifactId>hudi-service</artifactId>
|
||||
<version>1.0.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<artifactId>executor</artifactId>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.flink</groupId>
|
||||
<artifactId>flink-core</artifactId>
|
||||
<version>${flink.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.flink</groupId>
|
||||
<artifactId>flink-java</artifactId>
|
||||
<version>${flink.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.flink</groupId>
|
||||
<artifactId>flink-runtime_${scala.major.version}</artifactId>
|
||||
<version>${flink.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.flink</groupId>
|
||||
<artifactId>flink-runtime-web_${scala.major.version}</artifactId>
|
||||
<version>${flink.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.flink</groupId>
|
||||
<artifactId>flink-optimizer_${scala.major.version}</artifactId>
|
||||
<version>${flink.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.flink</groupId>
|
||||
<artifactId>flink-clients_${scala.major.version}</artifactId>
|
||||
<version>${flink.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.flink</groupId>
|
||||
<artifactId>flink-streaming-java_${scala.major.version}</artifactId>
|
||||
<version>${flink.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.flink</groupId>
|
||||
<artifactId>flink-streaming-scala_${scala.major.version}</artifactId>
|
||||
<version>${flink.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.flink</groupId>
|
||||
<artifactId>flink-metrics-core</artifactId>
|
||||
<version>${flink.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.flink</groupId>
|
||||
<artifactId>flink-metrics-prometheus_${scala.major.version}</artifactId>
|
||||
<version>${flink.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.flink</groupId>
|
||||
<artifactId>flink-container_${scala.major.version}</artifactId>
|
||||
<version>${flink.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.flink</groupId>
|
||||
<artifactId>flink-yarn_${scala.major.version}</artifactId>
|
||||
<version>${flink.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>*</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.flink</groupId>
|
||||
<artifactId>flink-hadoop-fs</artifactId>
|
||||
<version>${flink.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.flink</groupId>
|
||||
<artifactId>flink-table-uber_${scala.major.version}</artifactId>
|
||||
<version>${flink.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.flink</groupId>
|
||||
<artifactId>flink-table-uber-blink_${scala.major.version}</artifactId>
|
||||
<version>${flink.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.flink</groupId>
|
||||
<artifactId>flink-state-processor-api_${scala.major.version}</artifactId>
|
||||
<version>${flink.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.flink</groupId>
|
||||
<artifactId>flink-shaded-hadoop-3-uber</artifactId>
|
||||
<version>3.1.1.7.2.9.0-173-9.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>cn.hutool</groupId>
|
||||
<artifactId>hutool-all</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.parquet</groupId>
|
||||
<artifactId>parquet-format</artifactId>
|
||||
<version>2.4.0</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-shade-plugin</artifactId>
|
||||
<version>3.3.0</version>
|
||||
<configuration>
|
||||
<createDependencyReducedPom>false</createDependencyReducedPom>
|
||||
<promoteTransitiveDependencies>true</promoteTransitiveDependencies>
|
||||
<transformers>
|
||||
<transformer
|
||||
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
|
||||
<mainClass>com.eshore.odcp.hudi.connector.utils.executor.Runner</mainClass>
|
||||
</transformer>
|
||||
<transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
|
||||
<resource>reference.conf</resource>
|
||||
</transformer>
|
||||
<transformer
|
||||
implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
|
||||
</transformers>
|
||||
<filters>
|
||||
<filter>
|
||||
<artifact>*:*</artifact>
|
||||
<excludes>
|
||||
<exclude>META-INF/*.SF</exclude>
|
||||
<exclude>META-INF/*.DSA</exclude>
|
||||
<exclude>META-INF/*.RSA</exclude>
|
||||
<exclude>log4j-surefire*.properties</exclude>
|
||||
</excludes>
|
||||
</filter>
|
||||
</filters>
|
||||
</configuration>
|
||||
<executions>
|
||||
<execution>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>shade</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
</project>
|
||||
@@ -0,0 +1,43 @@
|
||||
package com.lanyuanxiaoyao.service.executor;
|
||||
|
||||
import org.apache.flink.client.deployment.ClusterClientFactory;
|
||||
import org.apache.flink.client.deployment.ClusterDescriptor;
|
||||
import org.apache.flink.client.deployment.ClusterSpecification;
|
||||
import org.apache.flink.client.deployment.DefaultClusterClientServiceLoader;
|
||||
import org.apache.flink.client.deployment.application.ApplicationConfiguration;
|
||||
import org.apache.flink.client.program.ClusterClient;
|
||||
import org.apache.flink.client.program.ClusterClientProvider;
|
||||
import org.apache.flink.configuration.Configuration;
|
||||
import org.apache.flink.runtime.security.SecurityConfiguration;
|
||||
import org.apache.flink.runtime.security.SecurityUtils;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||
|
||||
/**
|
||||
* 启动类 #
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @date 2022-06-01
|
||||
*/
|
||||
public class Runner {
|
||||
public static ApplicationId run(Configuration inputConfiguration, String className, String[] args) throws Exception {
|
||||
Configuration configuration = new Configuration(inputConfiguration);
|
||||
SecurityUtils.install(new SecurityConfiguration(configuration));
|
||||
return SecurityUtils.getInstalledContext()
|
||||
.runSecured(() -> {
|
||||
DefaultClusterClientServiceLoader yarnServiceLoader = new DefaultClusterClientServiceLoader();
|
||||
ApplicationConfiguration applicationConfiguration = new ApplicationConfiguration(args, className);
|
||||
//ApplicationDeployer deployer = new ApplicationClusterDeployer(new DefaultClusterClientServiceLoader());
|
||||
//deployer.run(configuration, applicationConfiguration);
|
||||
final ClusterClientFactory<ApplicationId> clientFactory = yarnServiceLoader.getClusterClientFactory(configuration);
|
||||
try (final ClusterDescriptor<ApplicationId> clusterDescriptor = clientFactory.createClusterDescriptor(configuration)) {
|
||||
final ClusterSpecification clusterSpecification = clientFactory.getClusterSpecification(configuration);
|
||||
ClusterClientProvider<ApplicationId> provider = clusterDescriptor.deployApplicationCluster(clusterSpecification, applicationConfiguration);
|
||||
ClusterClient<ApplicationId> clusterClient = provider.getClusterClient();
|
||||
if (clusterClient == null) {
|
||||
return null;
|
||||
}
|
||||
return clusterClient.getClusterId();
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,48 @@
|
||||
package com.lanyuanxiaoyao.service.executor.metrics;
|
||||
|
||||
import org.apache.flink.annotation.docs.Documentation;
|
||||
import org.apache.flink.configuration.ConfigOption;
|
||||
|
||||
import static org.apache.flink.configuration.ConfigOptions.key;
|
||||
|
||||
/**
|
||||
* Config options for the {@link VictoriaMetricsReporter}.
|
||||
*/
|
||||
@Documentation.SuffixOption
|
||||
public class VictoriaMetricsOptions {
|
||||
public static final ConfigOption<String> ENDPOINT =
|
||||
key("endpoint")
|
||||
.stringType()
|
||||
.noDefaultValue()
|
||||
.withDescription("Victoria metrics endpoint. eg: http://localhost:8428/api/v1/import/prometheus");
|
||||
|
||||
public static final ConfigOption<Integer> TIMEOUT =
|
||||
key("timeout")
|
||||
.intType()
|
||||
.defaultValue(60000)
|
||||
.withDescription("Http push timeout. Default 1 minute");
|
||||
|
||||
public static final ConfigOption<String> TAGS =
|
||||
key("tags")
|
||||
.stringType()
|
||||
.defaultValue("")
|
||||
.withDescription("Extra tags for every metric");
|
||||
|
||||
public static final ConfigOption<Boolean> ENABLE_AUTH =
|
||||
key("enable.auth")
|
||||
.booleanType()
|
||||
.defaultValue(false)
|
||||
.withDescription("Enable metric server http basic auth");
|
||||
|
||||
public static final ConfigOption<String> AUTH_USERNAME =
|
||||
key("auth.username")
|
||||
.stringType()
|
||||
.defaultValue("")
|
||||
.withDescription("Basic auth username");
|
||||
|
||||
public static final ConfigOption<String> AUTH_PASSWORD =
|
||||
key("auth.password")
|
||||
.stringType()
|
||||
.defaultValue("")
|
||||
.withDescription("Basic auth password");
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
package com.lanyuanxiaoyao.service.executor.metrics;
|
||||
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import cn.hutool.http.HttpRequest;
|
||||
import cn.hutool.http.HttpResponse;
|
||||
import cn.hutool.http.HttpUtil;
|
||||
import io.prometheus.client.CollectorRegistry;
|
||||
import io.prometheus.client.exporter.common.TextFormat;
|
||||
import java.io.IOException;
|
||||
import java.io.StringWriter;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.flink.metrics.prometheus.AbstractPrometheusReporter;
|
||||
import org.apache.flink.metrics.reporter.Scheduled;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* @author ZhangJiacheng
|
||||
* @date 2022-06-22
|
||||
*/
|
||||
public class VictoriaMetricsReporter extends AbstractPrometheusReporter implements Scheduled {
|
||||
private static final Logger logger = LoggerFactory.getLogger(VictoriaMetricsReporter.class);
|
||||
|
||||
private final String endpoint;
|
||||
private final Integer timout;
|
||||
private final Map<String, String> tags;
|
||||
private final Boolean enableBasicAuth;
|
||||
private final String basicAuthUsername;
|
||||
private final String basicAuthPassword;
|
||||
|
||||
public VictoriaMetricsReporter(String endpoint, Integer timout, Map<String, String> tags, Boolean enableBasicAuth, String basicAuthUsername, String basicAuthPassword) {
|
||||
this.endpoint = endpoint;
|
||||
this.timout = timout;
|
||||
this.tags = tags;
|
||||
this.enableBasicAuth = enableBasicAuth;
|
||||
this.basicAuthUsername = basicAuthUsername;
|
||||
this.basicAuthPassword = basicAuthPassword;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void report() {
|
||||
try (StringWriter writer = new StringWriter()) {
|
||||
TextFormat.write004(writer, CollectorRegistry.defaultRegistry.metricFamilySamples());
|
||||
|
||||
String query = tags.entrySet()
|
||||
.stream()
|
||||
.map(entry -> StrUtil.format("extra_label={}={}", entry.getKey(), entry.getValue()))
|
||||
.collect(Collectors.joining("&"));
|
||||
HttpRequest request = HttpUtil.createPost(StrUtil.format("{}?{}", endpoint, query))
|
||||
.body(writer.toString())
|
||||
.timeout(timout);
|
||||
if (enableBasicAuth) {
|
||||
request.basicAuth(basicAuthUsername, basicAuthPassword);
|
||||
}
|
||||
HttpResponse response = request.execute();
|
||||
if (!response.isOk()) {
|
||||
logger.warn("Fail to push metrics: {}, {}, endpoint: {}, tags: {}", response.getStatus(), response.body(), endpoint, tags);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
logger.error("Fail to write metrics, endpoint: {}, tags: {}, exception: {}", endpoint, tags, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
package com.lanyuanxiaoyao.service.executor.metrics;
|
||||
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import org.apache.flink.metrics.MetricConfig;
|
||||
import org.apache.flink.metrics.reporter.InterceptInstantiationViaReflection;
|
||||
import org.apache.flink.metrics.reporter.MetricReporterFactory;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static com.lanyuanxiaoyao.service.executor.metrics.VictoriaMetricsOptions.*;
|
||||
|
||||
/**
|
||||
* @author ZhangJiacheng
|
||||
* @date 2022-06-22
|
||||
*/
|
||||
@InterceptInstantiationViaReflection(
|
||||
reporterClassName = "com.eshore.odcp.hudi.connector.utils.executor.metrics.VictoriaMetricsReporter")
|
||||
public class VictoriaMetricsReporterFactory implements MetricReporterFactory {
|
||||
private static final Logger logger = LoggerFactory.getLogger(VictoriaMetricsReporterFactory.class);
|
||||
|
||||
@Override
|
||||
public VictoriaMetricsReporter createMetricReporter(Properties properties) {
|
||||
MetricConfig metricConfig = (MetricConfig) properties;
|
||||
String endpoint = metricConfig.getString(ENDPOINT.key(), ENDPOINT.defaultValue());
|
||||
int timeout = metricConfig.getInteger(TIMEOUT.key(), TIMEOUT.defaultValue());
|
||||
String tagsText = metricConfig.getString(TAGS.key(), TAGS.defaultValue());
|
||||
Boolean enableAuth = metricConfig.getBoolean(ENABLE_AUTH.key(), ENABLE_AUTH.defaultValue());
|
||||
String authUsername = metricConfig.getString(AUTH_USERNAME.key(), AUTH_USERNAME.defaultValue());
|
||||
String authPassword = metricConfig.getString(AUTH_PASSWORD.key(), AUTH_PASSWORD.defaultValue());
|
||||
|
||||
Map<String, String> tags = new HashMap<>(10);
|
||||
if (StrUtil.isNotBlank(tagsText)) {
|
||||
for (String item : tagsText.split(";")) {
|
||||
String[] parsed = item.split("=");
|
||||
tags.put(parsed[0], parsed[1]);
|
||||
}
|
||||
}
|
||||
logger.info("Create victoria metric reporter for endpoint {} timeout: {}, tags: {}, enable_auth: {}, auth_username: {}, auth_password: {}", endpoint, timeout, tags, enableAuth, authUsername, authPassword);
|
||||
return new VictoriaMetricsReporter(endpoint, timeout, tags, enableAuth, authUsername, authPassword);
|
||||
}
|
||||
}
|
||||
54
utils/executor/src/main/resources/log4j.properties
Normal file
54
utils/executor/src/main/resources/log4j.properties
Normal file
@@ -0,0 +1,54 @@
|
||||
################################################################################
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
################################################################################
|
||||
|
||||
# Allows this configuration to be modified at runtime. The file will be checked every 30 seconds.
|
||||
monitorInterval=30
|
||||
|
||||
# This affects logging for both user code and Flink
|
||||
rootLogger.level = INFO
|
||||
rootLogger.appenderRef.file.ref = MainAppender
|
||||
|
||||
# Uncomment this if you want to _only_ change Flink's logging
|
||||
#logger.flink.name = org.apache.flink
|
||||
#logger.flink.level = INFO
|
||||
|
||||
# The following lines keep the log level of common libraries/connectors on
|
||||
# log level INFO. The root logger does not override this. You have to manually
|
||||
# change the log levels here.
|
||||
logger.akka.name = akka
|
||||
logger.akka.level = INFO
|
||||
logger.kafka.name= org.apache.kafka
|
||||
logger.kafka.level = INFO
|
||||
logger.hadoop.name = org.apache.hadoop
|
||||
logger.hadoop.level = INFO
|
||||
logger.zookeeper.name = org.apache.zookeeper
|
||||
logger.zookeeper.level = INFO
|
||||
logger.shaded_zookeeper.name = org.apache.flink.shaded.zookeeper3
|
||||
logger.shaded_zookeeper.level = INFO
|
||||
logger.hudi.name=org.apache.hudi
|
||||
logger.hudi.level=INFO
|
||||
|
||||
# Log all infos in the given file
|
||||
appender.main.name = MainAppender
|
||||
appender.main.type = Console
|
||||
appender.main.layout.type = PatternLayout
|
||||
appender.main.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n
|
||||
|
||||
# Suppress the irrelevant (wrong) warnings from the Netty channel handler
|
||||
logger.netty.name = org.jboss.netty.channel.DefaultChannelPipeline
|
||||
logger.netty.level = OFF
|
||||
1
utils/executor/src/main/resources/log4j2.properties
Normal file
1
utils/executor/src/main/resources/log4j2.properties
Normal file
@@ -0,0 +1 @@
|
||||
log4j.rootLogger=INFO
|
||||
@@ -0,0 +1,16 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
org.apache.flink.yarn.YarnClusterClientFactory
|
||||
@@ -0,0 +1,17 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
org.apache.flink.yarn.executors.YarnJobClusterExecutorFactory
|
||||
org.apache.flink.yarn.executors.YarnSessionClusterExecutorFactory
|
||||
@@ -0,0 +1 @@
|
||||
com.eshore.odcp.hudi.connector.utils.executor.metrics.VictoriaMetricsReporterFactory
|
||||
@@ -0,0 +1,18 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
org.apache.flink.table.planner.delegation.BlinkPlannerFactory
|
||||
org.apache.flink.table.planner.delegation.BlinkExecutorFactory
|
||||
org.apache.flink.table.planner.delegation.DefaultParserFactory
|
||||
158
utils/sync/pom.xml
Normal file
158
utils/sync/pom.xml
Normal file
@@ -0,0 +1,158 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<parent>
|
||||
<groupId>com.lanyuanxiaoyao</groupId>
|
||||
<artifactId>hudi-service</artifactId>
|
||||
<version>1.0.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<artifactId>sync</artifactId>
|
||||
|
||||
<properties>
|
||||
<parquet.version>1.10.1</parquet.version>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.lanyuanxiaoyao</groupId>
|
||||
<artifactId>service-common</artifactId>
|
||||
<version>1.0.0-SNAPSHOT</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-flink${flink.major.version}-bundle</artifactId>
|
||||
<version>${hudi.version}</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.flink</groupId>
|
||||
<artifactId>flink-java</artifactId>
|
||||
<version>${flink.version}</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.flink</groupId>
|
||||
<artifactId>flink-streaming-java_${scala.major.version}</artifactId>
|
||||
<version>${flink.version}</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.flink</groupId>
|
||||
<artifactId>flink-clients_${scala.major.version}</artifactId>
|
||||
<version>${flink.version}</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.flink</groupId>
|
||||
<artifactId>flink-table-common</artifactId>
|
||||
<version>${flink.version}</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.flink</groupId>
|
||||
<artifactId>flink-table-runtime-blink_${scala.major.version}</artifactId>
|
||||
<version>${flink.version}</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.flink</groupId>
|
||||
<artifactId>flink-table-planner-blink_${scala.major.version}</artifactId>
|
||||
<version>${flink.version}</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.pulsar</groupId>
|
||||
<artifactId>pulsar-client</artifactId>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>cn.hutool</groupId>
|
||||
<artifactId>hutool-all</artifactId>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.flink</groupId>
|
||||
<artifactId>flink-shaded-guava</artifactId>
|
||||
<version>30.1.1-jre-15.0</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-client</artifactId>
|
||||
<version>3.1.2</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>dev.failsafe</groupId>
|
||||
<artifactId>failsafe</artifactId>
|
||||
<version>3.2.4</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>pl.tkowalcz.tjahzi</groupId>
|
||||
<artifactId>logback-appender</artifactId>
|
||||
<version>0.9.23</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>log4j-over-slf4j</artifactId>
|
||||
<version>1.7.15</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>jcl-over-slf4j</artifactId>
|
||||
<version>1.7.15</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.junit.jupiter</groupId>
|
||||
<artifactId>junit-jupiter</artifactId>
|
||||
<version>5.7.2</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-shade-plugin</artifactId>
|
||||
<version>3.3.0</version>
|
||||
<configuration>
|
||||
<createDependencyReducedPom>false</createDependencyReducedPom>
|
||||
<promoteTransitiveDependencies>true</promoteTransitiveDependencies>
|
||||
<transformers>
|
||||
<transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
|
||||
<resource>reference.conf</resource>
|
||||
</transformer>
|
||||
<transformer
|
||||
implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
|
||||
</transformers>
|
||||
<filters>
|
||||
<filter>
|
||||
<artifact>*:*</artifact>
|
||||
<excludes>
|
||||
<exclude>META-INF/*.SF</exclude>
|
||||
<exclude>META-INF/*.DSA</exclude>
|
||||
<exclude>META-INF/*.RSA</exclude>
|
||||
<exclude>log4j-surefire*.properties</exclude>
|
||||
</excludes>
|
||||
</filter>
|
||||
</filters>
|
||||
</configuration>
|
||||
<executions>
|
||||
<execution>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>shade</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
</project>
|
||||
@@ -0,0 +1,263 @@
|
||||
package com.lanyuanxiaoyao.service.sync;
|
||||
|
||||
import cn.hutool.core.util.ObjectUtil;
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import com.lanyuanxiaoyao.service.common.entity.FlinkJob;
|
||||
import com.lanyuanxiaoyao.service.common.entity.RunMeta;
|
||||
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
|
||||
import com.lanyuanxiaoyao.service.common.utils.NameHelper;
|
||||
import com.lanyuanxiaoyao.service.sync.configuration.GlobalConfiguration;
|
||||
import com.lanyuanxiaoyao.service.sync.functions.CompactionEventHandler;
|
||||
import com.lanyuanxiaoyao.service.sync.utils.*;
|
||||
import java.io.IOException;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.flink.api.common.typeinfo.TypeInformation;
|
||||
import org.apache.flink.configuration.Configuration;
|
||||
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
|
||||
import org.apache.flink.streaming.api.operators.ProcessOperator;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
||||
import org.apache.hudi.client.HoodieFlinkWriteClient;
|
||||
import org.apache.hudi.common.model.HoodieLogFile;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.TableSchemaResolver;
|
||||
import org.apache.hudi.common.table.log.HoodieLogFormat;
|
||||
import org.apache.hudi.common.table.log.block.HoodieAvroDataBlock;
|
||||
import org.apache.hudi.common.table.log.block.HoodieDeleteBlock;
|
||||
import org.apache.hudi.common.table.log.block.HoodieLogBlock;
|
||||
import org.apache.hudi.common.table.log.block.HoodieParquetDataBlock;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.apache.hudi.common.util.ClosableIterator;
|
||||
import org.apache.hudi.common.util.CompactionUtils;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
import org.apache.hudi.configuration.FlinkOptions;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
import org.apache.hudi.org.apache.avro.Schema;
|
||||
import org.apache.hudi.org.apache.avro.generic.IndexedRecord;
|
||||
import org.apache.hudi.sink.compact.*;
|
||||
import org.apache.hudi.sink.compact.strategy.CompactionPlanStrategies;
|
||||
import org.apache.hudi.sink.compact.strategy.CompactionPlanStrategy;
|
||||
import org.apache.hudi.table.HoodieFlinkTable;
|
||||
import org.apache.hudi.util.CompactionUtil;
|
||||
import org.apache.hudi.util.StreamerUtil;
|
||||
import org.apache.parquet.avro.AvroSchemaConverter;
|
||||
import org.apache.parquet.schema.MessageType;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* 单表离线压缩
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @date 2022-06-21
|
||||
*/
|
||||
public class Compactor {
|
||||
private static final Logger logger = LoggerFactory.getLogger(Compactor.class);
|
||||
private static final ObjectMapper mapper = JacksonUtils.getMapper();
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
FlinkJob flinkJob = ArgumentsUtils.getFlinkJob(args);
|
||||
TableMeta tableMeta = ArgumentsUtils.getTableMeta(args);
|
||||
String selectedInstants = ArgumentsUtils.getInstants(args);
|
||||
String cluster = ArgumentsUtils.getCluster(args);
|
||||
|
||||
logger.info("Bootstrap flink job: {}", mapper.writeValueAsString(flinkJob));
|
||||
logger.info("Bootstrap table meta: {}", mapper.writeValueAsString(tableMeta));
|
||||
logger.info("Bootstrap instants: {}", selectedInstants);
|
||||
logger.info("Bootstrap cluster: {}", cluster);
|
||||
|
||||
String applicationId = System.getenv("_APP_ID");
|
||||
|
||||
RunMeta runMeta = new RunMeta(cluster, flinkJob.getId(), tableMeta.getAlias());
|
||||
logger.info("Run meta: {}", runMeta);
|
||||
ZkUtils.createCompactionLock(flinkJob, tableMeta, tableMeta.getConfig().getZookeeperUrl(), mapper.writeValueAsString(runMeta));
|
||||
logger.info("Lock for {} {} success", flinkJob.getId(), tableMeta.getAlias());
|
||||
|
||||
StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment();
|
||||
|
||||
FlinkCompactionConfig config = new FlinkCompactionConfig();
|
||||
if (ObjectUtil.isEmpty(selectedInstants)) {
|
||||
config.compactionPlanSelectStrategy = CompactionPlanStrategy.ALL;
|
||||
} else {
|
||||
config.compactionPlanSelectStrategy = CompactionPlanStrategy.INSTANTS;
|
||||
config.compactionPlanInstant = selectedInstants;
|
||||
}
|
||||
|
||||
GlobalConfiguration globalConfiguration = new GlobalConfiguration(cluster, applicationId, tableMeta);
|
||||
Configuration configuration = SyncUtils.getCompactionFlinkConfiguration(
|
||||
globalConfiguration,
|
||||
new Configuration(),
|
||||
flinkJob,
|
||||
tableMeta,
|
||||
SyncUtils.avroSchemaWithExtraFields(tableMeta),
|
||||
1
|
||||
);
|
||||
|
||||
CompactionEventHandler eventHandler = new CompactionEventHandler(globalConfiguration, flinkJob, tableMeta);
|
||||
|
||||
HoodieFlinkWriteClient<?> writeClient = StreamerUtil.createWriteClient(configuration);
|
||||
HoodieFlinkTable<?> table = writeClient.getHoodieTable();
|
||||
|
||||
table.getMetaClient().reloadActiveTimeline();
|
||||
|
||||
StatusUtils.compactionStart(globalConfiguration, flinkJob, tableMeta);
|
||||
|
||||
// 检查时间线当前状态
|
||||
logger.info("{} timeline detail ({})", tableMeta.getAlias(), LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")));
|
||||
table.getActiveTimeline()
|
||||
.getInstants()
|
||||
.forEach(instant -> logger.info("{} {} {}", instant.getTimestamp(), instant.getAction(), instant.getState()));
|
||||
|
||||
HoodieTimeline pendingCompactionTimeline = table.getActiveTimeline().filterPendingCompactionTimeline();
|
||||
List<HoodieInstant> requested = CompactionPlanStrategies.getStrategy(config).select(pendingCompactionTimeline);
|
||||
if (requested.isEmpty()) {
|
||||
logger.info("No compaction plan scheduled");
|
||||
eventHandler.closed("No compaction plan scheduled", null);
|
||||
return;
|
||||
}
|
||||
|
||||
List<String> compactionInstantTimes = requested.stream().map(HoodieInstant::getTimestamp).collect(Collectors.toList());
|
||||
compactionInstantTimes.forEach(timestamp -> {
|
||||
HoodieInstant inflightInstant = HoodieTimeline.getCompactionInflightInstant(timestamp);
|
||||
if (pendingCompactionTimeline.containsInstant(inflightInstant)) {
|
||||
logger.info("Rollback inflight compaction instant: [" + timestamp + "]");
|
||||
table.rollbackInflightCompaction(inflightInstant);
|
||||
table.getMetaClient().reloadActiveTimeline();
|
||||
}
|
||||
});
|
||||
|
||||
List<Pair<String, HoodieCompactionPlan>> compactionPlans = compactionInstantTimes.stream()
|
||||
.map(timestamp -> {
|
||||
try {
|
||||
return Pair.of(timestamp, CompactionUtils.getCompactionPlan(table.getMetaClient(), timestamp));
|
||||
} catch (IOException e) {
|
||||
throw new HoodieException("Get compaction plan at instant " + timestamp + " error", e);
|
||||
}
|
||||
})
|
||||
.filter(pair -> {
|
||||
HoodieCompactionPlan plan = pair.getRight();
|
||||
return plan != null && plan.getOperations() != null && plan.getOperations().size() > 0;
|
||||
})
|
||||
.collect(Collectors.toList());
|
||||
|
||||
if (compactionPlans.isEmpty()) {
|
||||
logger.info("No compaction plan for instant " + String.join(",", compactionInstantTimes));
|
||||
eventHandler.closed("No compaction plan for instant " + String.join(",", compactionInstantTimes), null);
|
||||
return;
|
||||
}
|
||||
|
||||
List<HoodieInstant> instants = compactionInstantTimes.stream().map(HoodieTimeline::getCompactionRequestedInstant).collect(Collectors.toList());
|
||||
|
||||
logger.info("Start to compaction for instant " + compactionInstantTimes);
|
||||
|
||||
for (HoodieInstant instant : instants) {
|
||||
if (!pendingCompactionTimeline.containsInstant(instant)) {
|
||||
CompactionUtil.cleanInstant(table.getMetaClient(), instant);
|
||||
}
|
||||
table.getActiveTimeline().transitionCompactionRequestedToInflight(instant);
|
||||
}
|
||||
table.getMetaClient().reloadActiveTimeline();
|
||||
|
||||
try {
|
||||
compactionPlans.forEach(pair -> preCommit(globalConfiguration, flinkJob, tableMeta, table, pair.getLeft(), pair.getRight()));
|
||||
} catch (Throwable e) {
|
||||
logger.warn("Cannot submit pre-commit log");
|
||||
}
|
||||
|
||||
environment.addSource(new CompactionPlanSourceFunction(compactionPlans))
|
||||
.name("compaction_source")
|
||||
.uid("uid_compaction_source")
|
||||
.rebalance()
|
||||
.transform("compact_task",
|
||||
TypeInformation.of(CompactionCommitEvent.class),
|
||||
new ProcessOperator<>(new CompactFunction(configuration)))
|
||||
.setParallelism(configuration.getInteger(FlinkOptions.COMPACTION_TASKS))
|
||||
.addSink(new CompactionCommitSink(configuration, eventHandler))
|
||||
.name("compaction_commit")
|
||||
.uid("uid_compaction_commit")
|
||||
.setParallelism(1);
|
||||
|
||||
environment.execute(NameHelper.compactionFlinkName(flinkJob.getId(), tableMeta.getSchema(), tableMeta.getAlias()));
|
||||
}
|
||||
|
||||
private static void preCommit(GlobalConfiguration configuration, FlinkJob flinkJob, TableMeta tableMeta, HoodieFlinkTable<?> table, String instant, HoodieCompactionPlan compactionPlan) {
|
||||
HoodieTableMetaClient client = table.getMetaClient();
|
||||
FileSystem fileSystem = client.getRawFs();
|
||||
List<Path> deltaFilePaths = compactionPlan.getOperations()
|
||||
.stream()
|
||||
.flatMap(operation -> operation.getDeltaFilePaths().stream().map(path -> StrUtil.format("{}/{}", operation.getPartitionPath(), path)))
|
||||
.map(path -> new Path(StrUtil.format("{}/{}", client.getBasePathV2(), path)))
|
||||
.collect(Collectors.toList());
|
||||
AtomicLong totalLogFilesCompacted = new AtomicLong(0);
|
||||
AtomicLong totalLogFilesSize = new AtomicLong(0);
|
||||
AtomicLong totalRecordsDeleted = new AtomicLong(0);
|
||||
AtomicLong totalCompactedRecordsUpdated = new AtomicLong(0);
|
||||
deltaFilePaths.parallelStream().forEach(path -> {
|
||||
try {
|
||||
FileStatus fileStatus = fileSystem.getFileStatus(path);
|
||||
totalLogFilesCompacted.incrementAndGet();
|
||||
totalLogFilesSize.addAndGet(fileStatus.getLen());
|
||||
MessageType messageType = TableSchemaResolver.readSchemaFromLogFile(fileSystem, path);
|
||||
if (ObjectUtil.isNull(messageType)) {
|
||||
return;
|
||||
}
|
||||
Schema writerSchema = new AvroSchemaConverter().convert(messageType);
|
||||
try (HoodieLogFormat.Reader reader = HoodieLogFormat.newReader(fileSystem, new HoodieLogFile(path), writerSchema)) {
|
||||
while (reader.hasNext()) {
|
||||
HoodieLogBlock block = reader.next();
|
||||
switch (block.getBlockType()) {
|
||||
case AVRO_DATA_BLOCK:
|
||||
HoodieAvroDataBlock avroDataBlock = (HoodieAvroDataBlock) block;
|
||||
try (ClosableIterator<IndexedRecord> avroDataBlockRecordIterator = avroDataBlock.getRecordIterator()) {
|
||||
while (avroDataBlockRecordIterator.hasNext()) {
|
||||
// logger.info("totalCompactedRecordsUpdated {}", totalCompactedRecordsUpdated.incrementAndGet());
|
||||
totalCompactedRecordsUpdated.incrementAndGet();
|
||||
avroDataBlockRecordIterator.next();
|
||||
}
|
||||
}
|
||||
break;
|
||||
case PARQUET_DATA_BLOCK:
|
||||
HoodieParquetDataBlock parquetDataBlock = (HoodieParquetDataBlock) block;
|
||||
try (ClosableIterator<IndexedRecord> parquetDataBlockRecordIterator = parquetDataBlock.getRecordIterator()) {
|
||||
while (parquetDataBlockRecordIterator.hasNext()) {
|
||||
// logger.info("totalCompactedRecordsUpdated {}", totalCompactedRecordsUpdated.incrementAndGet());
|
||||
totalCompactedRecordsUpdated.incrementAndGet();
|
||||
parquetDataBlockRecordIterator.next();
|
||||
}
|
||||
}
|
||||
break;
|
||||
case DELETE_BLOCK:
|
||||
HoodieDeleteBlock deleteBlock = (HoodieDeleteBlock) block;
|
||||
// logger.info("totalRecordsDeleted {}", totalRecordsDeleted.addAndGet(deleteBlock.getRecordsToDelete().length));
|
||||
totalRecordsDeleted.addAndGet(deleteBlock.getRecordsToDelete().length);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
logger.warn("Parse log file failure for " + path, e);
|
||||
}
|
||||
});
|
||||
|
||||
Map<String, Long> metadata = new ConcurrentHashMap<>(5);
|
||||
metadata.put("totalLogFilesCompacted", totalLogFilesCompacted.get());
|
||||
metadata.put("totalLogFilesSize", totalLogFilesSize.get());
|
||||
metadata.put("totalRecordsDeleted", totalRecordsDeleted.get());
|
||||
metadata.put("totalCompactedRecordsUpdated", totalCompactedRecordsUpdated.get());
|
||||
metadata.put("totalLogRecordsCompacted", totalRecordsDeleted.get() + totalCompactedRecordsUpdated.get());
|
||||
|
||||
StatusUtils.compactionPreCommit(configuration, flinkJob, tableMeta, instant, metadata);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,170 @@
|
||||
package com.lanyuanxiaoyao.service.sync;
|
||||
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import com.lanyuanxiaoyao.service.common.Constants;
|
||||
import com.lanyuanxiaoyao.service.common.entity.FlinkJob;
|
||||
import com.lanyuanxiaoyao.service.common.entity.Record;
|
||||
import com.lanyuanxiaoyao.service.common.entity.RunMeta;
|
||||
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
|
||||
import com.lanyuanxiaoyao.service.common.exception.CheckpointRootPathNotFoundException;
|
||||
import com.lanyuanxiaoyao.service.common.exception.ZookeeperUrlNotFoundException;
|
||||
import com.lanyuanxiaoyao.service.common.utils.NameHelper;
|
||||
import com.lanyuanxiaoyao.service.common.utils.TableMetaHelper;
|
||||
import com.lanyuanxiaoyao.service.sync.configuration.GlobalConfiguration;
|
||||
import com.lanyuanxiaoyao.service.sync.functions.PulsarMessage2RecordFunction;
|
||||
import com.lanyuanxiaoyao.service.sync.functions.PulsarMessageSourceReader;
|
||||
import com.lanyuanxiaoyao.service.sync.functions.ValidateRecordFilter;
|
||||
import com.lanyuanxiaoyao.service.sync.utils.*;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.Supplier;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
|
||||
import org.apache.flink.api.common.serialization.SimpleStringEncoder;
|
||||
import org.apache.flink.core.fs.Path;
|
||||
import org.apache.flink.runtime.state.hashmap.HashMapStateBackend;
|
||||
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import org.apache.flink.streaming.api.CheckpointingMode;
|
||||
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
|
||||
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
|
||||
import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink;
|
||||
import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.DefaultRollingPolicy;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static com.lanyuanxiaoyao.service.common.Constants.*;
|
||||
|
||||
/**
|
||||
* 同步应用
|
||||
*
|
||||
* @author lanyuanxiaoyao
|
||||
* @version 0.0.1
|
||||
* @date 2021-11-26
|
||||
*/
|
||||
public class Synchronizer {
|
||||
private static final Logger logger = LoggerFactory.getLogger(Synchronizer.class);
|
||||
private static final ObjectMapper mapper = JacksonUtils.getMapper();
|
||||
|
||||
private static String findConfigFromList(List<TableMeta> metas, Function<TableMeta, String> getter, Supplier<Exception> notFoundException) throws Exception {
|
||||
return metas.stream()
|
||||
.map(getter)
|
||||
.distinct()
|
||||
.findFirst()
|
||||
.orElseThrow(notFoundException);
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
FlinkJob flinkJob = ArgumentsUtils.getFlinkJob(args);
|
||||
List<TableMeta> tableMetaList = ArgumentsUtils.getTableMetaList(args);
|
||||
String cluster = ArgumentsUtils.getCluster(args);
|
||||
|
||||
logger.info("Bootstrap flink job: {}", mapper.writeValueAsString(flinkJob));
|
||||
logger.info("Bootstrap table meta list: {}", mapper.writeValueAsString(tableMetaList));
|
||||
logger.info("Bootstrap cluster: {}", cluster);
|
||||
|
||||
String applicationId = System.getenv("_APP_ID");
|
||||
|
||||
String zkUrl = findConfigFromList(tableMetaList, meta -> meta.getConfig().getZookeeperUrl(), ZookeeperUrlNotFoundException::new);
|
||||
for (TableMeta tableMeta : tableMetaList) {
|
||||
RunMeta runMeta = new RunMeta(cluster, flinkJob.getId(), tableMeta.getAlias());
|
||||
logger.info("Run meta: {}", runMeta);
|
||||
ZkUtils.createSynchronizerLock(flinkJob, tableMeta, zkUrl, mapper.writeValueAsString(runMeta));
|
||||
}
|
||||
RunMeta runMeta = new RunMeta(cluster, flinkJob.getId());
|
||||
logger.info("Run meta: {}", runMeta);
|
||||
ZkUtils.createSynchronizerLock(flinkJob, zkUrl, mapper.writeValueAsString(runMeta));
|
||||
logger.info("Lock for {} success", flinkJob.getId());
|
||||
|
||||
StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment();
|
||||
|
||||
environment.enableCheckpointing(15 * MINUTE);
|
||||
String checkpointRootPath = findConfigFromList(tableMetaList, meta -> meta.getConfig().getCheckpointRootPath(), CheckpointRootPathNotFoundException::new);
|
||||
environment.getCheckpointConfig().setCheckpointStorage(new Path(checkpointRootPath + "/" + flinkJob.getId()));
|
||||
environment.getCheckpointConfig().setMaxConcurrentCheckpoints(1);
|
||||
environment.getCheckpointConfig().setCheckpointTimeout(2 * HOUR);
|
||||
environment.getCheckpointConfig().setMinPauseBetweenCheckpoints(15 * MINUTE);
|
||||
environment.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
|
||||
environment.getCheckpointConfig().setForceUnalignedCheckpoints(true);
|
||||
environment.getCheckpointConfig().enableUnalignedCheckpoints();
|
||||
environment.getCheckpointConfig().setTolerableCheckpointFailureNumber(5);
|
||||
|
||||
if (tableMetaList.stream().anyMatch(meta -> TableMetaHelper.existsTag(meta, TAGS_DISABLE_CHAINING))) {
|
||||
logger.warn("Disable operator chaining");
|
||||
environment.disableOperatorChaining();
|
||||
}
|
||||
|
||||
environment.setRestartStrategy(RestartStrategies.fixedDelayRestart(5, MINUTE));
|
||||
|
||||
environment.setStateBackend(new HashMapStateBackend());
|
||||
|
||||
switch (flinkJob.getRunMode()) {
|
||||
case ALL_IN_ONE:
|
||||
for (TableMeta tableMeta : tableMetaList) {
|
||||
GlobalConfiguration globalConfiguration = new GlobalConfiguration(cluster, applicationId, tableMeta);
|
||||
createFlinkJob(environment, globalConfiguration, flinkJob, tableMeta);
|
||||
publishSyncStart(globalConfiguration, flinkJob, tableMeta);
|
||||
}
|
||||
environment.execute(NameHelper.syncFlinkName(flinkJob.getId(), flinkJob.getName()));
|
||||
break;
|
||||
case ONE_IN_ONE:
|
||||
for (TableMeta tableMeta : tableMetaList) {
|
||||
GlobalConfiguration globalConfiguration = new GlobalConfiguration(cluster, applicationId, tableMeta);
|
||||
createFlinkJob(environment, globalConfiguration, flinkJob, tableMeta);
|
||||
publishSyncStart(globalConfiguration, flinkJob, tableMeta);
|
||||
environment.execute(NameHelper.syncFlinkName(flinkJob.getId(), flinkJob.getName(), tableMeta.getAlias()));
|
||||
}
|
||||
break;
|
||||
case ALL_IN_ONE_BY_TABLE:
|
||||
scheduleOneInOneRegistryByField(environment, cluster, applicationId, flinkJob, tableMetaList, TableMeta::getTable);
|
||||
break;
|
||||
case ALL_IN_ONE_BY_SCHEMA:
|
||||
scheduleOneInOneRegistryByField(environment, cluster, applicationId, flinkJob, tableMetaList, TableMeta::getSchema);
|
||||
break;
|
||||
default:
|
||||
throw new IllegalArgumentException("Unsupported run mode: " + flinkJob.getRunMode());
|
||||
}
|
||||
}
|
||||
|
||||
private static void scheduleOneInOneRegistryByField(StreamExecutionEnvironment environment, String cluster, String applicationId, FlinkJob flinkJob, List<TableMeta> tableMetaList, Function<TableMeta, String> field) throws Exception {
|
||||
Map<String, List<TableMeta>> map = tableMetaList.stream()
|
||||
.collect(Collectors.groupingBy(field));
|
||||
for (Map.Entry<String, List<TableMeta>> entry : map.entrySet()) {
|
||||
for (TableMeta tableMeta : entry.getValue()) {
|
||||
GlobalConfiguration globalConfiguration = new GlobalConfiguration(cluster, applicationId, tableMeta);
|
||||
createFlinkJob(environment, globalConfiguration, flinkJob, tableMeta);
|
||||
publishSyncStart(globalConfiguration, flinkJob, tableMeta);
|
||||
}
|
||||
environment.execute(NameHelper.syncFlinkName(flinkJob.getId(), flinkJob.getName(), entry.getKey()));
|
||||
}
|
||||
}
|
||||
|
||||
private static void createFlinkJob(StreamExecutionEnvironment environment, GlobalConfiguration configuration, FlinkJob flinkJob, TableMeta tableMeta) throws IOException {
|
||||
logger.info("Table meta: {}", mapper.writeValueAsString(tableMeta));
|
||||
logger.info("Config meta: {}", mapper.writeValueAsString(configuration));
|
||||
SingleOutputStreamOperator<String> source = environment
|
||||
.addSource(new PulsarMessageSourceReader(configuration, flinkJob, tableMeta))
|
||||
.setParallelism(tableMeta.getHudi().getSourceTasks());
|
||||
if (TableMetaHelper.existsTag(tableMeta, Constants.TAGS_PULSAR_BACKUP)) {
|
||||
Path path = new Path(StrUtil.format("hdfs://b2/apps/datalake/hive_test/source/{}/{}", String.join("_", flinkJob.getName().split("\\s")), tableMeta.getAlias()));
|
||||
StreamingFileSink<String> fileSink = StreamingFileSink.<String>forRowFormat(path, new SimpleStringEncoder<>("UTF-8"))
|
||||
.withRollingPolicy(DefaultRollingPolicy.builder()
|
||||
.withInactivityInterval(HOUR)
|
||||
.withMaxPartSize(GB)
|
||||
.build())
|
||||
.build();
|
||||
source.addSink(fileSink).name("Backup pulsar data");
|
||||
}
|
||||
SingleOutputStreamOperator<Record> middle = source
|
||||
.map(new PulsarMessage2RecordFunction(configuration, flinkJob, tableMeta))
|
||||
.name("Json ( " + tableMeta.getSchema() + "-" + tableMeta.getAlias() + " )")
|
||||
.filter(new ValidateRecordFilter(configuration, flinkJob, tableMeta))
|
||||
.name("Reject json parse failure");
|
||||
SyncUtils.sinkToHoodieByTable(configuration, flinkJob, tableMeta, environment, middle);
|
||||
}
|
||||
|
||||
private static void publishSyncStart(GlobalConfiguration configuration, FlinkJob flinkJob, TableMeta tableMeta) {
|
||||
StatusUtils.syncStart(configuration, flinkJob, tableMeta);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
package com.lanyuanxiaoyao.service.sync.configuration;
|
||||
|
||||
import org.apache.hudi.avro.HoodieAvroUtils;
|
||||
import org.apache.hudi.common.config.TypedProperties;
|
||||
import org.apache.hudi.common.util.PartitionPathEncodeUtils;
|
||||
import org.apache.hudi.configuration.FlinkOptions;
|
||||
import org.apache.hudi.keygen.SimpleAvroKeyGenerator;
|
||||
import org.apache.hudi.org.apache.avro.generic.GenericRecord;
|
||||
|
||||
/**
|
||||
* @author ZhangJiacheng
|
||||
*/
|
||||
public class DefaultPartitionNameKeyGenerator extends SimpleAvroKeyGenerator {
|
||||
private final String defaultPartitionName;
|
||||
|
||||
public DefaultPartitionNameKeyGenerator(TypedProperties props) {
|
||||
super(props);
|
||||
defaultPartitionName = props.getString(FlinkOptions.PARTITION_DEFAULT_NAME.key(), FlinkOptions.PARTITION_DEFAULT_NAME.defaultValue());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getPartitionPath(GenericRecord record) {
|
||||
String partitionPathField = getPartitionPathFields().get(0);
|
||||
String partitionPath = HoodieAvroUtils.getNestedFieldValAsString(record, partitionPathField, true, consistentLogicalTimestampEnabled);
|
||||
if (partitionPath == null || partitionPath.isEmpty()) {
|
||||
partitionPath = defaultPartitionName;
|
||||
}
|
||||
if (encodePartitionPath) {
|
||||
partitionPath = PartitionPathEncodeUtils.escapePathName(partitionPath);
|
||||
}
|
||||
if (hiveStylePartitioning) {
|
||||
partitionPath = partitionPathField + "=" + partitionPath;
|
||||
}
|
||||
return partitionPath;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,92 @@
|
||||
package com.lanyuanxiaoyao.service.sync.configuration;
|
||||
|
||||
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* 同步静态配置
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @date 2022-06-13
|
||||
*/
|
||||
public class GlobalConfiguration implements Serializable {
|
||||
private final String cluster;
|
||||
private final String applicationId;
|
||||
private final Boolean metricEnable = false;
|
||||
private final String metricPublishUrl;
|
||||
private final String metricPublishPrometheusUrl;
|
||||
private final String metricsPublishCustomUrl;
|
||||
private final Integer metricPublishDelay;
|
||||
private final Integer metricPublishPeriod;
|
||||
private final Integer metricPublishTimeout;
|
||||
private final Integer metricPublishBatch;
|
||||
|
||||
public GlobalConfiguration(String cluster, String applicationId, TableMeta meta) {
|
||||
this.cluster = cluster;
|
||||
this.applicationId = applicationId;
|
||||
this.metricPublishUrl = meta.getConfig().getMetricPublishUrl();
|
||||
this.metricPublishPrometheusUrl = meta.getConfig().getMetricPrometheusUrl();
|
||||
this.metricsPublishCustomUrl = meta.getConfig().getMetricApiUrl();
|
||||
this.metricPublishDelay = meta.getConfig().getMetricPublishDelay();
|
||||
this.metricPublishPeriod = meta.getConfig().getMetricPublishPeriod();
|
||||
this.metricPublishTimeout = meta.getConfig().getMetricPublishTimeout();
|
||||
this.metricPublishBatch = meta.getConfig().getMetricPublishBatch();
|
||||
}
|
||||
|
||||
public String getCluster() {
|
||||
return cluster;
|
||||
}
|
||||
|
||||
public String getApplicationId() {
|
||||
return applicationId;
|
||||
}
|
||||
|
||||
public Boolean getMetricEnable() {
|
||||
return metricEnable;
|
||||
}
|
||||
|
||||
public String getMetricPublishUrl() {
|
||||
return metricPublishUrl;
|
||||
}
|
||||
|
||||
public String getMetricPublishPrometheusUrl() {
|
||||
return metricPublishPrometheusUrl;
|
||||
}
|
||||
|
||||
public String getMetricsPublishCustomUrl() {
|
||||
return metricsPublishCustomUrl;
|
||||
}
|
||||
|
||||
public Integer getMetricPublishDelay() {
|
||||
return metricPublishDelay;
|
||||
}
|
||||
|
||||
public Integer getMetricPublishPeriod() {
|
||||
return metricPublishPeriod;
|
||||
}
|
||||
|
||||
public Integer getMetricPublishTimeout() {
|
||||
return metricPublishTimeout;
|
||||
}
|
||||
|
||||
public Integer getMetricPublishBatch() {
|
||||
return metricPublishBatch;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "GlobalConfiguration{" +
|
||||
"cluster='" + cluster + '\'' +
|
||||
", applicationId='" + applicationId + '\'' +
|
||||
", metricEnable=" + metricEnable +
|
||||
", metricPublishUrl='" + metricPublishUrl + '\'' +
|
||||
", metricPublishPrometheusUrl='" + metricPublishPrometheusUrl + '\'' +
|
||||
", metricsPublishCustomUrl='" + metricsPublishCustomUrl + '\'' +
|
||||
", metricPublishDelay=" + metricPublishDelay +
|
||||
", metricPublishPeriod=" + metricPublishPeriod +
|
||||
", metricPublishTimeout=" + metricPublishTimeout +
|
||||
", metricPublishBatch=" + metricPublishBatch +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
package com.lanyuanxiaoyao.service.sync.configuration;
|
||||
|
||||
import dev.failsafe.RetryPolicy;
|
||||
import java.time.Duration;
|
||||
|
||||
public interface RetryPolicyProvider {
|
||||
RetryPolicy<String> HTTP_RETRY = RetryPolicy.<String>builder()
|
||||
.handle(Throwable.class)
|
||||
.withDelay(Duration.ofSeconds(1))
|
||||
.withMaxAttempts(10)
|
||||
.build();
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
package com.lanyuanxiaoyao.service.sync.configuration;
|
||||
|
||||
import com.lanyuanxiaoyao.service.common.Constants;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.hudi.org.apache.avro.util.Utf8;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* @author lanyuanxiaoyao
|
||||
* @date 2023-04-18
|
||||
*/
|
||||
public class TraceOverwriteWithLatestAvroPayload extends OverwriteWithLatestAvroPayload {
|
||||
private static final Logger logger = LoggerFactory.getLogger(TraceOverwriteWithLatestAvroPayload.class);
|
||||
|
||||
private final String latestOpts;
|
||||
|
||||
public TraceOverwriteWithLatestAvroPayload(GenericRecord record, Comparable orderingVal) {
|
||||
super(record, orderingVal);
|
||||
this.latestOpts = updateLatestOpts(Option.ofNullable(record));
|
||||
}
|
||||
|
||||
public TraceOverwriteWithLatestAvroPayload(Option<GenericRecord> record) {
|
||||
super(record);
|
||||
this.latestOpts = updateLatestOpts(record);
|
||||
}
|
||||
|
||||
private String updateLatestOpts(Option<GenericRecord> record) {
|
||||
try {
|
||||
return record
|
||||
.map(r -> ((Utf8) r.get(Constants.LATEST_OPERATION_TIMESTAMP_KEY_NAME)).toString())
|
||||
.orElse(null);
|
||||
} catch (Throwable throwable) {
|
||||
logger.error("Get latest opts failure", throwable);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Option<Map<String, String>> getMetadata() {
|
||||
if (this.latestOpts == null) {
|
||||
return Option.empty();
|
||||
}
|
||||
Map<String, String> metadata = super.getMetadata().orElse(new HashMap<>());
|
||||
metadata.put(Constants.LATEST_OPERATION_TIMESTAMP_KEY_NAME, this.latestOpts);
|
||||
return Option.of(metadata);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
package com.lanyuanxiaoyao.service.sync.configuration;
|
||||
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import com.lanyuanxiaoyao.service.common.Constants;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.ZoneOffset;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.Map;
|
||||
import org.apache.hudi.client.WriteStatus;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* @author lanyuanxiaoyao
|
||||
* @date 2023-04-17
|
||||
*/
|
||||
public class TraceWriteStatus extends WriteStatus {
|
||||
private static final Logger logger = LoggerFactory.getLogger(TraceWriteStatus.class);
|
||||
private final static DateTimeFormatter FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
|
||||
private long latestOpts = 0L;
|
||||
|
||||
public TraceWriteStatus() {
|
||||
super();
|
||||
}
|
||||
|
||||
public TraceWriteStatus(Boolean trackSuccessRecords, Double failureFraction) {
|
||||
super(trackSuccessRecords, failureFraction);
|
||||
}
|
||||
|
||||
public long getLatestOpts() {
|
||||
return latestOpts;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void markSuccess(HoodieRecord record, Option<Map<String, String>> optionalRecordMetadata) {
|
||||
super.markSuccess(record, optionalRecordMetadata);
|
||||
try {
|
||||
optionalRecordMetadata.ifPresent(map -> {
|
||||
if (map.containsKey(Constants.LATEST_OPERATION_TIMESTAMP_KEY_NAME)) {
|
||||
String inOpts = map.get(Constants.LATEST_OPERATION_TIMESTAMP_KEY_NAME);
|
||||
if (StrUtil.isNotBlank(inOpts)) {
|
||||
long current = LocalDateTime.parse(inOpts, FORMATTER).toInstant(ZoneOffset.ofHours(8)).toEpochMilli();
|
||||
latestOpts = Long.max(latestOpts, current);
|
||||
}
|
||||
}
|
||||
});
|
||||
} catch (Throwable throwable) {
|
||||
logger.error("Parse latest opts failure", throwable);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void markFailure(HoodieRecord record, Throwable t, Option<Map<String, String>> optionalRecordMetadata) {
|
||||
super.markFailure(record, t, optionalRecordMetadata);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,65 @@
|
||||
package com.lanyuanxiaoyao.service.sync.functions;
|
||||
|
||||
import com.lanyuanxiaoyao.service.common.Constants;
|
||||
import com.lanyuanxiaoyao.service.common.entity.FlinkJob;
|
||||
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
|
||||
import com.lanyuanxiaoyao.service.common.utils.TableMetaHelper;
|
||||
import com.lanyuanxiaoyao.service.sync.configuration.GlobalConfiguration;
|
||||
import com.lanyuanxiaoyao.service.sync.configuration.TraceWriteStatus;
|
||||
import com.lanyuanxiaoyao.service.sync.utils.StatusUtils;
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
import org.apache.hudi.client.WriteStatus;
|
||||
import org.apache.hudi.common.model.HoodieCommitMetadata;
|
||||
import org.apache.hudi.sink.compact.CompactEventHandler;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* 压缩事件处理
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @date 2022-06-15
|
||||
*/
|
||||
public class CompactionEventHandler implements CompactEventHandler, Serializable {
|
||||
private static final Logger logger = LoggerFactory.getLogger(CompactionEventHandler.class);
|
||||
|
||||
private final GlobalConfiguration configuration;
|
||||
private final FlinkJob flinkJob;
|
||||
private final TableMeta tableMeta;
|
||||
|
||||
public CompactionEventHandler(GlobalConfiguration configuration, FlinkJob flinkJob, TableMeta tableMeta) {
|
||||
this.configuration = configuration;
|
||||
this.flinkJob = flinkJob;
|
||||
this.tableMeta = tableMeta;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void failure(String instant) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void success(String instant, List<WriteStatus> statuses, HoodieCommitMetadata metadata) {
|
||||
StatusUtils.compactionCommit(configuration, flinkJob, tableMeta, instant, metadata);
|
||||
logger.info("WriteStatus: {}", statuses);
|
||||
if (TableMetaHelper.existsTag(tableMeta, Constants.TAGS_TRACE_LATEST_OP_TS)) {
|
||||
Long max = statuses.stream()
|
||||
.map(status -> {
|
||||
if (status instanceof TraceWriteStatus) {
|
||||
TraceWriteStatus s = (TraceWriteStatus) status;
|
||||
return s.getLatestOpts();
|
||||
}
|
||||
return 0L;
|
||||
})
|
||||
.max(Long::compare)
|
||||
.orElse(0L);
|
||||
logger.info("Latest op ts: {}", max);
|
||||
StatusUtils.compactionLatestOpTs(configuration, flinkJob, tableMeta, max);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void closed(String message, Exception exception) {
|
||||
StatusUtils.compactionFinish(configuration, flinkJob, tableMeta, message, exception);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,90 @@
|
||||
package com.lanyuanxiaoyao.service.sync.functions;
|
||||
|
||||
import cn.hutool.core.collection.ListUtil;
|
||||
import cn.hutool.core.map.MapUtil;
|
||||
import com.lanyuanxiaoyao.service.common.Constants;
|
||||
import com.lanyuanxiaoyao.service.common.entity.FlinkJob;
|
||||
import com.lanyuanxiaoyao.service.common.entity.Record;
|
||||
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
|
||||
import com.lanyuanxiaoyao.service.sync.configuration.GlobalConfiguration;
|
||||
import com.lanyuanxiaoyao.service.sync.metrics.CountMetric;
|
||||
import com.lanyuanxiaoyao.service.sync.utils.MetricsUtils;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.function.Function;
|
||||
import org.apache.flink.api.common.functions.RichFilterFunction;
|
||||
import org.apache.flink.configuration.Configuration;
|
||||
import org.apache.flink.runtime.state.FunctionInitializationContext;
|
||||
import org.apache.flink.runtime.state.FunctionSnapshotContext;
|
||||
import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction;
|
||||
|
||||
/**
|
||||
* 操作类型过滤算子
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @date 2022-06-12
|
||||
*/
|
||||
public class OperationTypeFilter extends RichFilterFunction<Record> implements CheckpointedFunction {
|
||||
private final CountMetric insertRateMetric;
|
||||
private final CountMetric updateRateMetric;
|
||||
private final CountMetric deleteRateMetric;
|
||||
private final CountMetric ddlRateMetric;
|
||||
private final CountMetric unknownRateMetric;
|
||||
private final List<CountMetric> metrics;
|
||||
private final GlobalConfiguration globalConfiguration;
|
||||
|
||||
public OperationTypeFilter(GlobalConfiguration globalConfiguration, FlinkJob flinkJob, TableMeta tableMeta) {
|
||||
this.globalConfiguration = globalConfiguration;
|
||||
|
||||
Function<String, Map<String, String>> fillTags = operator -> MapUtil.<String, String>builder()
|
||||
.put(Constants.METRICS_LABEL_TYPE, operator)
|
||||
.build();
|
||||
|
||||
insertRateMetric = new CountMetric(globalConfiguration, Constants.METRICS_SYNC_SOURCE_OPERATION_TYPE_RECEIVE, flinkJob, tableMeta, fillTags.apply(Constants.INSERT));
|
||||
updateRateMetric = new CountMetric(globalConfiguration, Constants.METRICS_SYNC_SOURCE_OPERATION_TYPE_RECEIVE, flinkJob, tableMeta, fillTags.apply(Constants.UPDATE));
|
||||
deleteRateMetric = new CountMetric(globalConfiguration, Constants.METRICS_SYNC_SOURCE_OPERATION_TYPE_RECEIVE, flinkJob, tableMeta, fillTags.apply(Constants.DELETE));
|
||||
ddlRateMetric = new CountMetric(globalConfiguration, Constants.METRICS_SYNC_SOURCE_OPERATION_TYPE_RECEIVE, flinkJob, tableMeta, fillTags.apply(Constants.DDL));
|
||||
unknownRateMetric = new CountMetric(globalConfiguration, Constants.METRICS_SYNC_SOURCE_OPERATION_TYPE_RECEIVE, flinkJob, tableMeta, fillTags.apply(Constants.UNKNOWN));
|
||||
|
||||
metrics = ListUtil.toList(insertRateMetric, updateRateMetric, deleteRateMetric, ddlRateMetric, unknownRateMetric);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void open(Configuration parameters) throws Exception {
|
||||
super.open(parameters);
|
||||
|
||||
// 初始化指标
|
||||
MetricsUtils.createMakePointTimer(globalConfiguration, metrics);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean filter(Record record) {
|
||||
String opType = record.getStatement().getOpType();
|
||||
switch (opType) {
|
||||
case Constants.INSERT:
|
||||
insertRateMetric.increment();
|
||||
break;
|
||||
case Constants.UPDATE:
|
||||
updateRateMetric.increment();
|
||||
break;
|
||||
case Constants.DELETE:
|
||||
deleteRateMetric.increment();
|
||||
break;
|
||||
case Constants.DDL:
|
||||
ddlRateMetric.increment();
|
||||
break;
|
||||
default:
|
||||
unknownRateMetric.increment();
|
||||
}
|
||||
return !Constants.DDL.equals(record.getStatement().getOpType());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void initializeState(FunctionInitializationContext context) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void snapshotState(FunctionSnapshotContext context) {
|
||||
MetricsUtils.publishAllMetrics(metrics);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,80 @@
|
||||
package com.lanyuanxiaoyao.service.sync.functions;
|
||||
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import com.lanyuanxiaoyao.service.common.entity.FlinkJob;
|
||||
import com.lanyuanxiaoyao.service.common.entity.Record;
|
||||
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
|
||||
import com.lanyuanxiaoyao.service.common.utils.RecordHelper;
|
||||
import com.lanyuanxiaoyao.service.sync.configuration.GlobalConfiguration;
|
||||
import com.lanyuanxiaoyao.service.sync.utils.JacksonUtils;
|
||||
import com.lanyuanxiaoyao.service.sync.utils.StatusUtils;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.ZoneOffset;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
import java.util.regex.Pattern;
|
||||
import org.apache.flink.api.common.functions.RichMapFunction;
|
||||
import org.apache.flink.runtime.state.FunctionInitializationContext;
|
||||
import org.apache.flink.runtime.state.FunctionSnapshotContext;
|
||||
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* Pulsar message to object
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @date 2022-06-11
|
||||
*/
|
||||
public class PulsarMessage2RecordFunction extends RichMapFunction<String, Record> implements CheckpointedFunction {
|
||||
private static final Logger logger = LoggerFactory.getLogger(PulsarMessage2RecordFunction.class);
|
||||
private static final AtomicReference<String> lastOperationTime = new AtomicReference<>("");
|
||||
private final static DateTimeFormatter FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
|
||||
private static final Pattern OPTS_PATTERN = Pattern.compile("^\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}$");
|
||||
private final GlobalConfiguration globalConfiguration;
|
||||
private final FlinkJob flinkJob;
|
||||
private final TableMeta tableMeta;
|
||||
private final ObjectMapper mapper = JacksonUtils.getMapper();
|
||||
|
||||
public PulsarMessage2RecordFunction(GlobalConfiguration globalConfiguration, FlinkJob flinkJob, TableMeta tableMeta) {
|
||||
this.globalConfiguration = globalConfiguration;
|
||||
this.flinkJob = flinkJob;
|
||||
this.tableMeta = tableMeta;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Record map(String message) throws JsonProcessingException {
|
||||
Record record = null;
|
||||
try {
|
||||
record = mapper.readValue(message, Record.class);
|
||||
if (RecordHelper.isNotVersionUpdateRecord(record)) {
|
||||
lastOperationTime.set(record.getStatement().getOpTs());
|
||||
}
|
||||
} catch (Exception exception) {
|
||||
logger.error("Message json parse failure", exception);
|
||||
}
|
||||
return record;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void snapshotState(FunctionSnapshotContext context) {
|
||||
String opTs = lastOperationTime.get();
|
||||
Long timestamp = null;
|
||||
try {
|
||||
if (StrUtil.isNotBlank(opTs) && OPTS_PATTERN.matcher(opTs).matches()) {
|
||||
timestamp = LocalDateTime.parse(opTs, FORMATTER).toInstant(ZoneOffset.ofHours(8)).toEpochMilli();
|
||||
} else {
|
||||
throw new Exception("Opts is not match regex " + OPTS_PATTERN.pattern());
|
||||
}
|
||||
} catch (Exception e) {
|
||||
logger.error("Parse operation time error", e);
|
||||
}
|
||||
StatusUtils.syncOperation(globalConfiguration, flinkJob, tableMeta, timestamp);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void initializeState(FunctionInitializationContext context) {
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,235 @@
|
||||
package com.lanyuanxiaoyao.service.sync.functions;
|
||||
|
||||
import cn.hutool.core.util.ObjectUtil;
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import cn.hutool.http.HttpUtil;
|
||||
import com.lanyuanxiaoyao.service.common.Constants;
|
||||
import com.lanyuanxiaoyao.service.common.entity.FlinkJob;
|
||||
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
|
||||
import com.lanyuanxiaoyao.service.common.utils.LogHelper;
|
||||
import com.lanyuanxiaoyao.service.common.utils.NameHelper;
|
||||
import com.lanyuanxiaoyao.service.common.utils.RecordHelper;
|
||||
import com.lanyuanxiaoyao.service.sync.configuration.GlobalConfiguration;
|
||||
import com.lanyuanxiaoyao.service.sync.metrics.MessageSizeSizeMetric;
|
||||
import com.lanyuanxiaoyao.service.sync.metrics.RateMetric;
|
||||
import com.lanyuanxiaoyao.service.sync.utils.LoadBalance;
|
||||
import com.lanyuanxiaoyao.service.sync.utils.MetricsUtils;
|
||||
import com.lanyuanxiaoyao.service.sync.utils.StatusUtils;
|
||||
import dev.failsafe.Failsafe;
|
||||
import dev.failsafe.RetryPolicy;
|
||||
import java.time.Duration;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
import org.apache.flink.api.common.state.CheckpointListener;
|
||||
import org.apache.flink.configuration.Configuration;
|
||||
import org.apache.flink.runtime.state.FunctionInitializationContext;
|
||||
import org.apache.flink.runtime.state.FunctionSnapshotContext;
|
||||
import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction;
|
||||
import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction;
|
||||
import org.apache.pulsar.client.api.*;
|
||||
import org.apache.pulsar.client.impl.schema.StringSchema;
|
||||
import org.apache.pulsar.client.internal.DefaultImplementation;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static com.lanyuanxiaoyao.service.common.utils.LogHelper.LogPoint.*;
|
||||
|
||||
/**
|
||||
* Pulsar Reader Source
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @date 2022-06-11
|
||||
*/
|
||||
public class PulsarMessageSourceReader extends RichParallelSourceFunction<String> implements CheckpointedFunction, CheckpointListener {
|
||||
private static final Logger logger = LoggerFactory.getLogger(PulsarMessageSourceReader.class);
|
||||
private static final RetryPolicy<String> MESSAGE_ID_RETRY = RetryPolicy.<String>builder()
|
||||
.handle(Exception.class)
|
||||
.withDelay(Duration.ofSeconds(1))
|
||||
.withMaxAttempts(10)
|
||||
.build();
|
||||
private final String topic;
|
||||
private final GlobalConfiguration globalConfiguration;
|
||||
private final FlinkJob flinkJob;
|
||||
private final TableMeta tableMeta;
|
||||
private final AtomicReference<MessageId> lastMessageId = new AtomicReference<>();
|
||||
private final AtomicLong lastPublishTime = new AtomicLong(0);
|
||||
private final RateMetric messageReceiveMetric;
|
||||
private final MessageSizeSizeMetric messageSizeReceiveMetric;
|
||||
private final Map<Long, MessageId> messageIdMap = new ConcurrentHashMap<>();
|
||||
private boolean running = true;
|
||||
private PulsarClient client = null;
|
||||
private Reader<String> reader = null;
|
||||
|
||||
public PulsarMessageSourceReader(GlobalConfiguration globalConfiguration, FlinkJob flinkJob, TableMeta tableMeta) {
|
||||
logger.info("Use PulsarMessageSourceReader");
|
||||
this.globalConfiguration = globalConfiguration;
|
||||
this.flinkJob = flinkJob;
|
||||
this.topic = tableMeta.getTopic();
|
||||
this.tableMeta = tableMeta;
|
||||
|
||||
String messageId = tableMeta.getConfig().getMessageId();
|
||||
logger.info("{} {}", Constants.LOG_POINT_PULSAR_SOURCE_BOOTSTRAP_MESSAGE_ID, messageId);
|
||||
if (StrUtil.isNotBlank(messageId)) {
|
||||
lastMessageId.set(parseMessageId(messageId));
|
||||
} else {
|
||||
logger.warn("Message id is empty");
|
||||
lastMessageId.set(MessageId.earliest);
|
||||
}
|
||||
|
||||
messageReceiveMetric = new RateMetric(
|
||||
globalConfiguration,
|
||||
Constants.METRICS_SYNC_SOURCE_MESSAGE_RECEIVE,
|
||||
flinkJob,
|
||||
tableMeta
|
||||
);
|
||||
messageSizeReceiveMetric = new MessageSizeSizeMetric(
|
||||
globalConfiguration,
|
||||
Constants.METRICS_SYNC_SOURCE_MESSAGE_SIZE_RECEIVE_BYTES,
|
||||
flinkJob, tableMeta
|
||||
);
|
||||
}
|
||||
|
||||
private static MessageId parseMessageId(String messageIdText) {
|
||||
return DefaultImplementation.newMessageId(Long.parseLong(messageIdText.split(":")[0]), Long.parseLong(messageIdText.split(":")[1]), -1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void initializeState(FunctionInitializationContext context) throws Exception {
|
||||
LogHelper.info(logger, CHECKPOINT_INITIAL);
|
||||
String queryUrl = StrUtil.format(
|
||||
"{}/api/message_id?flink_job_id={}&alias={}",
|
||||
LoadBalance.getCustomPublishUrl(globalConfiguration),
|
||||
flinkJob.getId(),
|
||||
tableMeta.getAlias()
|
||||
);
|
||||
logger.info("Query url: {}", queryUrl);
|
||||
String messageId = Failsafe.with(MESSAGE_ID_RETRY)
|
||||
.onFailure(event -> {
|
||||
if (ObjectUtil.isNotNull(event.getException())) {
|
||||
logger.error(StrUtil.format("{} Get message id error", Constants.LOG_POINT_PULSAR_SOURCE_GET_MESSAGE_ID_ERROR), event.getException());
|
||||
}
|
||||
})
|
||||
.get(() ->
|
||||
HttpUtil.createGet(queryUrl)
|
||||
.header(Constants.API_HEADER_NAME, Constants.API_VERSION)
|
||||
.execute()
|
||||
.body()
|
||||
);
|
||||
LogHelper.info(logger, CHECKPOINT_INITIAL_MESSAGE_ID, "Get message id: {}", messageId);
|
||||
if (StrUtil.isNotBlank(messageId)) {
|
||||
lastMessageId.set(parseMessageId(messageId));
|
||||
} else {
|
||||
logger.warn(StrUtil.format("{} Message id is empty, now message id is {}", Constants.LOG_POINT_MESSAGE_ID_EMPTY, lastMessageId.get()));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run(SourceContext<String> context) throws Exception {
|
||||
String currentValue = null;
|
||||
while (running) {
|
||||
Message<String> message;
|
||||
try {
|
||||
message = reader.readNext();
|
||||
if (ObjectUtil.isNotNull(message)) {
|
||||
String value = message.getValue();
|
||||
currentValue = value;
|
||||
if (ObjectUtil.isEmpty(value)) {
|
||||
logger.warn("{} {}", message.getValue(), message.getMessageId());
|
||||
}
|
||||
synchronized (context.getCheckpointLock()) {
|
||||
context.collect(value);
|
||||
}
|
||||
|
||||
if (RecordHelper.isNotVersionUpdateRecord(value)) {
|
||||
lastPublishTime.set(message.getPublishTime());
|
||||
}
|
||||
lastMessageId.set(message.getMessageId());
|
||||
|
||||
messageReceiveMetric.increment();
|
||||
try {
|
||||
messageSizeReceiveMetric.increment(message.getValue().getBytes().length);
|
||||
} catch (Throwable t) {
|
||||
logger.warn("Parse message size failure", t);
|
||||
}
|
||||
}
|
||||
} catch (Throwable t) {
|
||||
logger.error("Read message failure, current value: " + currentValue, t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void open(Configuration configuration) throws Exception {
|
||||
super.open(configuration);
|
||||
|
||||
// 初始化指标
|
||||
MetricsUtils.createMakePointTimer(globalConfiguration, messageReceiveMetric);
|
||||
MetricsUtils.createMakePointTimer(globalConfiguration, messageSizeReceiveMetric);
|
||||
|
||||
try {
|
||||
client = PulsarClient.builder()
|
||||
.serviceUrl(tableMeta.getPulsarAddress())
|
||||
.build();
|
||||
reader = client.newReader(new StringSchema())
|
||||
.topic(topic)
|
||||
.receiverQueueSize(10000)
|
||||
.subscriptionName(NameHelper.pulsarSubscriptionName(flinkJob.getId(), tableMeta.getAlias()))
|
||||
.startMessageId(lastMessageId.get())
|
||||
.startMessageIdInclusive()
|
||||
.create();
|
||||
} catch (Exception exception) {
|
||||
logger.error(StrUtil.format("Connect pulsar error ({} {})", tableMeta.getPulsarAddress(), topic), exception);
|
||||
throw exception;
|
||||
}
|
||||
logger.info("Message id set to {}", lastMessageId.get());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void cancel() {
|
||||
running = false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws Exception {
|
||||
super.close();
|
||||
if (reader != null) {
|
||||
try {
|
||||
reader.close();
|
||||
} catch (PulsarClientException e) {
|
||||
logger.error("Pulsar reader close error", e);
|
||||
}
|
||||
}
|
||||
if (client != null) {
|
||||
try {
|
||||
client.close();
|
||||
} catch (PulsarClientException e) {
|
||||
logger.error("Pulsar client close error", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void snapshotState(FunctionSnapshotContext context) {
|
||||
MessageId messageId = lastMessageId.get();
|
||||
messageIdMap.put(context.getCheckpointId(), messageId);
|
||||
LogHelper.info(logger, CHECKPOINT_START, "Checkpoint start message id: {}, checkpoint id: {}", messageId, context.getCheckpointId());
|
||||
|
||||
messageReceiveMetric.publish();
|
||||
messageSizeReceiveMetric.publish();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void notifyCheckpointComplete(long checkpointId) {
|
||||
MessageId messageId = messageIdMap.getOrDefault(checkpointId, MessageId.earliest);
|
||||
LogHelper.info(logger, CHECKPOINT_COMPLETE, "Checkpoint complete message id: {}, checkpoint id: {}", messageId, checkpointId);
|
||||
StatusUtils.syncCheckpoint(globalConfiguration, flinkJob, tableMeta, messageId.toString(), lastPublishTime.get());
|
||||
messageIdMap.remove(checkpointId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void notifyCheckpointAborted(long checkpointId) throws Exception {
|
||||
CheckpointListener.super.notifyCheckpointAborted(checkpointId);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,171 @@
|
||||
package com.lanyuanxiaoyao.service.sync.functions;
|
||||
|
||||
import cn.hutool.core.collection.ListUtil;
|
||||
import com.lanyuanxiaoyao.service.common.Constants;
|
||||
import com.lanyuanxiaoyao.service.common.entity.FlinkJob;
|
||||
import com.lanyuanxiaoyao.service.common.entity.Record;
|
||||
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
|
||||
import com.lanyuanxiaoyao.service.common.utils.LogHelper;
|
||||
import com.lanyuanxiaoyao.service.common.utils.MapHelper;
|
||||
import com.lanyuanxiaoyao.service.common.utils.RecordHelper;
|
||||
import com.lanyuanxiaoyao.service.common.utils.TableMetaHelper;
|
||||
import com.lanyuanxiaoyao.service.sync.configuration.GlobalConfiguration;
|
||||
import com.lanyuanxiaoyao.service.sync.functions.type.TypeConverter;
|
||||
import com.lanyuanxiaoyao.service.sync.metrics.CountMetric;
|
||||
import com.lanyuanxiaoyao.service.sync.utils.JacksonUtils;
|
||||
import com.lanyuanxiaoyao.service.sync.utils.MetricsUtils;
|
||||
import com.lanyuanxiaoyao.service.sync.utils.StatusUtils;
|
||||
import com.lanyuanxiaoyao.service.sync.utils.SyncUtils;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.flink.api.common.functions.RichMapFunction;
|
||||
import org.apache.flink.configuration.Configuration;
|
||||
import org.apache.flink.runtime.state.FunctionInitializationContext;
|
||||
import org.apache.flink.runtime.state.FunctionSnapshotContext;
|
||||
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction;
|
||||
import org.apache.flink.table.data.GenericRowData;
|
||||
import org.apache.flink.table.data.RowData;
|
||||
import org.apache.flink.table.data.StringData;
|
||||
import org.apache.hudi.org.apache.avro.Schema;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static com.lanyuanxiaoyao.service.common.utils.LogHelper.LogPoint.VERSION_UPDATE;
|
||||
|
||||
/**
|
||||
* Record 转 Rowdata
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @date 2022-06-13
|
||||
*/
|
||||
public class Record2RowDataFunction extends RichMapFunction<Record, List<RowData>> implements CheckpointedFunction {
|
||||
private static final Logger logger = LoggerFactory.getLogger(Record2RowDataFunction.class);
|
||||
|
||||
private final GlobalConfiguration globalConfiguration;
|
||||
private final TableMeta tableMeta;
|
||||
private final CountMetric changeFilterMetric;
|
||||
private final CountMetric changePartitionMetric;
|
||||
private final ObjectMapper mapper = JacksonUtils.getMapper();
|
||||
private final FlinkJob flinkJob;
|
||||
private Schema schema;
|
||||
|
||||
public Record2RowDataFunction(GlobalConfiguration globalConfiguration, FlinkJob flinkJob, TableMeta tableMeta) {
|
||||
this.globalConfiguration = globalConfiguration;
|
||||
this.flinkJob = flinkJob;
|
||||
this.tableMeta = tableMeta;
|
||||
|
||||
changeFilterMetric = new CountMetric(
|
||||
globalConfiguration,
|
||||
Constants.METRICS_SYNC_SOURCE_CHANGE_FILTER,
|
||||
flinkJob, tableMeta
|
||||
);
|
||||
changePartitionMetric = new CountMetric(
|
||||
globalConfiguration,
|
||||
Constants.METRICS_SYNC_SOURCE_CHANGE_PARTITION,
|
||||
flinkJob, tableMeta
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void initializeState(FunctionInitializationContext context) throws Exception {
|
||||
schema = SyncUtils.avroSchemaWithExtraFields(tableMeta);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void open(Configuration parameters) throws Exception {
|
||||
super.open(parameters);
|
||||
|
||||
// 初始化指标
|
||||
MetricsUtils.createMakePointTimer(globalConfiguration, changeFilterMetric);
|
||||
MetricsUtils.createMakePointTimer(globalConfiguration, changePartitionMetric);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void snapshotState(FunctionSnapshotContext context) throws Exception {
|
||||
MetricsUtils.publishAllMetrics(changeFilterMetric);
|
||||
MetricsUtils.publishAllMetrics(changePartitionMetric);
|
||||
}
|
||||
|
||||
private GenericRowData covert2RowData(Schema schema, Map<String, Object> current) {
|
||||
List<Schema.Field> fields = schema.getFields();
|
||||
GenericRowData data = new GenericRowData(fields.size());
|
||||
for (int index = 0; index < fields.size(); index++) {
|
||||
Schema.Field field = fields.get(index);
|
||||
// 如果是telepg的话,字段名就要统一改成小写,上游不规范,下游擦屁股
|
||||
Object value = current.getOrDefault(Constants.FIELD_COVERT.apply(tableMeta, field.name()), null);
|
||||
if (field.schema().getType().equals(Schema.Type.STRING)
|
||||
|| (field.schema().isUnion() && field.schema().getTypes().contains(Schema.create(Schema.Type.STRING)))
|
||||
|| value instanceof String) {
|
||||
data.setField(index, StringData.fromString((String) value));
|
||||
} else {
|
||||
data.setField(index, value);
|
||||
}
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
private Boolean isFilterOut(TableMeta tableMeta, Map<String, Object> current) {
|
||||
if (!tableMeta.getFilterType().equals(TableMeta.FilterType.NONE)) {
|
||||
if (current.containsKey(tableMeta.getFilterField())) {
|
||||
String fieldValue = MapHelper.getStringWithoutCase(current, tableMeta.getFilterField());
|
||||
if (tableMeta.getFilterType().equals(TableMeta.FilterType.EXCLUDE)) {
|
||||
return tableMeta.getFilterValues().contains(fieldValue);
|
||||
} else if (tableMeta.getFilterType().equals(TableMeta.FilterType.INCLUDE)) {
|
||||
return !tableMeta.getFilterValues().contains(fieldValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<RowData> map(Record record) throws Exception {
|
||||
List<Map<String, Object>> result = ListUtil.list(false);
|
||||
|
||||
if (RecordHelper.isVersionUpdateRecord(record)) {
|
||||
Record.Statement statement = record.getStatement();
|
||||
LogHelper.info(logger, VERSION_UPDATE, "{} {} version: {}", mapper.writeValueAsString(statement.getSchema()), statement.getVersion(), statement.getVersion());
|
||||
LogHelper.info(logger, VERSION_UPDATE, "Raw: {}", mapper.writeValueAsString(record));
|
||||
StatusUtils.versionUpdate(globalConfiguration, flinkJob, tableMeta, record.getStatement().getVersion(), statement.getOpTs());
|
||||
return ListUtil.empty();
|
||||
}
|
||||
|
||||
Map<String, Object> current = RecordHelper.getCurrentStatement(record);
|
||||
if (Objects.isNull(current)) {
|
||||
logger.error("Record: {}", mapper.writeValueAsString(record));
|
||||
throw new RuntimeException("Current cannot be null");
|
||||
}
|
||||
|
||||
// 如果 update 改变了过滤字段的值也需要先删除
|
||||
boolean isChangeFilter = RecordHelper.isChangeField(tableMeta, record, TableMetaHelper::getFilterField);
|
||||
if (isChangeFilter) {
|
||||
logger.info("Change filter: {}", mapper.writeValueAsString(record));
|
||||
changeFilterMetric.increment();
|
||||
}
|
||||
|
||||
// 如果是 update 且 city_id 不相等就先删除旧记录
|
||||
boolean isChangePartition = RecordHelper.isChangeField(tableMeta, record, TableMetaHelper::getPartitionField);
|
||||
if (isChangePartition) {
|
||||
logger.info("Change partition field: {}", mapper.writeValueAsString(record));
|
||||
changePartitionMetric.increment();
|
||||
}
|
||||
|
||||
if (isChangeFilter || isChangePartition) {
|
||||
Map<String, Object> before = record.getStatement().getBefore();
|
||||
result.add(0, RecordHelper.addExtraMetadata(before, tableMeta, record, true));
|
||||
}
|
||||
|
||||
// 增加 Hudi 特有字段信息
|
||||
result.add(RecordHelper.addExtraMetadata(current, tableMeta, record));
|
||||
|
||||
return result.stream()
|
||||
// 按过滤字段过滤
|
||||
.filter(r -> !isFilterOut(tableMeta, r))
|
||||
.map(r -> TypeConverter.getInstance(tableMeta)
|
||||
.convertToGenericRowData(tableMeta, schema, r))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,40 @@
|
||||
package com.lanyuanxiaoyao.service.sync.functions;
|
||||
|
||||
import cn.hutool.core.util.ObjectUtil;
|
||||
import com.lanyuanxiaoyao.service.common.entity.FlinkJob;
|
||||
import com.lanyuanxiaoyao.service.common.entity.Record;
|
||||
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
|
||||
import com.lanyuanxiaoyao.service.sync.configuration.GlobalConfiguration;
|
||||
import org.apache.flink.api.common.functions.RichFilterFunction;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* 判断 Record 是否正确
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @date 2022-11-15
|
||||
*/
|
||||
public class ValidateRecordFilter extends RichFilterFunction<Record> {
|
||||
private static final Logger logger = LoggerFactory.getLogger(ValidateRecordFilter.class);
|
||||
|
||||
public ValidateRecordFilter(GlobalConfiguration configuration, FlinkJob flinkJob, TableMeta tableMeta) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean filter(Record record) {
|
||||
if (ObjectUtil.isNull(record)) {
|
||||
logger.warn("Record is null");
|
||||
return false;
|
||||
}
|
||||
if (ObjectUtil.isNull(record.getSource())) {
|
||||
logger.warn("Record Source is null");
|
||||
return false;
|
||||
}
|
||||
if (ObjectUtil.isNull(record.getStatement())) {
|
||||
logger.warn("Record Statement is null");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
package com.lanyuanxiaoyao.service.sync.functions.type;
|
||||
|
||||
import com.github.benmanes.caffeine.cache.Caffeine;
|
||||
import com.github.benmanes.caffeine.cache.LoadingCache;
|
||||
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
|
||||
import java.io.Serializable;
|
||||
import java.util.Map;
|
||||
import org.apache.flink.table.data.GenericRowData;
|
||||
import org.apache.hudi.org.apache.avro.Schema;
|
||||
|
||||
/**
|
||||
* 处理类型相关内容
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @date 2023-07-20
|
||||
*/
|
||||
public interface TypeConverter extends Serializable {
|
||||
LoadingCache<Integer, TypeConverter> CACHE = Caffeine.newBuilder()
|
||||
.build(version -> {
|
||||
switch (version) {
|
||||
case 1:
|
||||
return new TypeConverterV2();
|
||||
case 0:
|
||||
default:
|
||||
return new TypeConverterV1();
|
||||
}
|
||||
});
|
||||
|
||||
static TypeConverter getInstance(TableMeta meta) {
|
||||
return getInstance(meta.getVersion());
|
||||
}
|
||||
|
||||
static TypeConverter getInstance(Integer version) {
|
||||
if (version == null) {
|
||||
version = 0;
|
||||
}
|
||||
return CACHE.get(version);
|
||||
}
|
||||
|
||||
Schema convertToSchema(TableMeta meta);
|
||||
|
||||
GenericRowData convertToGenericRowData(TableMeta meta, Schema schema, Map<String, Object> data);
|
||||
}
|
||||
@@ -0,0 +1,53 @@
|
||||
package com.lanyuanxiaoyao.service.sync.functions.type;
|
||||
|
||||
import com.lanyuanxiaoyao.service.common.Constants;
|
||||
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import org.apache.flink.table.data.GenericRowData;
|
||||
import org.apache.flink.table.data.StringData;
|
||||
import org.apache.hudi.org.apache.avro.Schema;
|
||||
import org.apache.hudi.org.apache.avro.SchemaBuilder;
|
||||
|
||||
/**
|
||||
* 第一版本的类型转换
|
||||
* 所有字段类型都是 String
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @date 2023-07-20
|
||||
*/
|
||||
public class TypeConverterV1 implements TypeConverter {
|
||||
@Override
|
||||
public Schema convertToSchema(TableMeta meta) {
|
||||
SchemaBuilder.FieldAssembler<Schema> fieldBuilder = SchemaBuilder.builder()
|
||||
.record(meta.getTable())
|
||||
.fields();
|
||||
fieldBuilder.nullableBoolean(Constants.HUDI_DELETE_KEY_NAME, false);
|
||||
fieldBuilder.nullableString(Constants.UNION_KEY_NAME, "");
|
||||
|
||||
meta.getFields().forEach(fieldMeta -> fieldBuilder.optionalString(fieldMeta.getName()));
|
||||
|
||||
fieldBuilder.nullableLong(Constants.UPDATE_TIMESTAMP_KEY_NAME, -1);
|
||||
fieldBuilder.nullableString(Constants.LATEST_OPERATION_TIMESTAMP_KEY_NAME, "");
|
||||
return fieldBuilder.endRecord();
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenericRowData convertToGenericRowData(TableMeta meta, Schema schema, Map<String, Object> data) {
|
||||
List<Schema.Field> fields = schema.getFields();
|
||||
GenericRowData row = new GenericRowData(fields.size());
|
||||
for (int index = 0; index < fields.size(); index++) {
|
||||
Schema.Field field = fields.get(index);
|
||||
// 如果是telepg的话,字段名就要统一改成小写,上游不规范,下游擦屁股
|
||||
Object value = data.getOrDefault(Constants.FIELD_COVERT.apply(meta, field.name()), null);
|
||||
if (field.schema().getType().equals(Schema.Type.STRING)
|
||||
|| (field.schema().isUnion() && field.schema().getTypes().contains(Schema.create(Schema.Type.STRING)))
|
||||
|| value instanceof String) {
|
||||
row.setField(index, StringData.fromString((String) value));
|
||||
} else {
|
||||
row.setField(index, value);
|
||||
}
|
||||
}
|
||||
return row;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,141 @@
|
||||
package com.lanyuanxiaoyao.service.sync.functions.type;
|
||||
|
||||
import cn.hutool.core.util.ObjectUtil;
|
||||
import com.lanyuanxiaoyao.service.common.Constants;
|
||||
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
|
||||
import java.math.BigDecimal;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.function.BiFunction;
|
||||
import java.util.function.Function;
|
||||
import java.util.regex.Pattern;
|
||||
import org.apache.flink.table.data.DecimalData;
|
||||
import org.apache.flink.table.data.GenericRowData;
|
||||
import org.apache.flink.table.data.StringData;
|
||||
import org.apache.hudi.org.apache.avro.JsonProperties;
|
||||
import org.apache.hudi.org.apache.avro.LogicalTypes;
|
||||
import org.apache.hudi.org.apache.avro.Schema;
|
||||
|
||||
import static org.apache.hudi.org.apache.avro.Schema.*;
|
||||
|
||||
/**
|
||||
* 类型转换第二版本
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @date 2023-07-20
|
||||
*/
|
||||
public class TypeConverterV2 implements TypeConverter {
|
||||
public static final Schema NULL_SCHEMA = create(Type.NULL);
|
||||
public static final Schema BOOLEAN_SCHEMA = create(Type.BOOLEAN);
|
||||
public static final Schema INT_SCHEMA = create(Type.INT);
|
||||
public static final Schema LONG_SCHEMA = create(Type.LONG);
|
||||
public static final Schema FLOAT_SCHEMA = create(Type.FLOAT);
|
||||
public static final Schema DOUBLE_SCHEMA = create(Type.DOUBLE);
|
||||
public static final Schema STRING_SCHEMA = create(Type.STRING);
|
||||
public static final Function<Integer, Schema> FIXED_SCHEMA = length -> createFixed("decimal_" + length, null, null, length);
|
||||
public static final BiFunction<Integer, Integer, Schema> DECIMAL_SCHEMA = (length, scala) -> LogicalTypes.decimal(length, scala).addToSchema(FIXED_SCHEMA.apply(length));
|
||||
public static final BiFunction<Integer, Integer, Schema> NULLABLE_DECIMAL_SCHEMA = (length, scala) -> createUnion(NULL_SCHEMA, DECIMAL_SCHEMA.apply(length, scala));
|
||||
public static final Schema NULLABLE_BOOLEAN_SCHEMA = createUnion(NULL_SCHEMA, BOOLEAN_SCHEMA);
|
||||
public static final Schema NULLABLE_INT_SCHEMA = createUnion(NULL_SCHEMA, INT_SCHEMA);
|
||||
public static final Schema NULLABLE_LONG_SCHEMA = createUnion(NULL_SCHEMA, LONG_SCHEMA);
|
||||
public static final Schema NULLABLE_FLOAT_SCHEMA = createUnion(NULL_SCHEMA, FLOAT_SCHEMA);
|
||||
public static final Schema NULLABLE_DOUBLE_SCHEMA = createUnion(NULL_SCHEMA, DOUBLE_SCHEMA);
|
||||
public static final Schema NULLABLE_STRING_SCHEMA = createUnion(NULL_SCHEMA, STRING_SCHEMA);
|
||||
public static final Function<Integer, Schema> NULLABLE_FIXED_SCHEMA = length -> createUnion(NULL_SCHEMA, FIXED_SCHEMA.apply(length));
|
||||
private static final Pattern BOOLEAN_REGEX = Pattern.compile("^boolean|bool$");
|
||||
private static final Pattern INT_REGEX = Pattern.compile("^(tinyint|smallint|int|smallserial|integer)(\\(\\d+\\))?$");
|
||||
private static final Pattern LONG_REGEX = Pattern.compile("^(bigint unsigned)|((bigint|serial|long)(\\(\\d+\\))?)$");
|
||||
private static final Pattern DATE_REGEX = Pattern.compile("^date|timestamp|timestamp without time zone|datetime|time$");
|
||||
private static final Pattern FLOAT_REGEX = Pattern.compile("^float(\\(\\d+\\))?$");
|
||||
private static final Pattern DOUBLE_REGEX = Pattern.compile("^double(\\(\\d+\\))?$");
|
||||
private static final Pattern FIXED_REGEX = Pattern.compile("^(number|money|bigserial)(\\(\\d+\\))?$");
|
||||
private static final Pattern DECIMAL_REGEX = Pattern.compile("^(double precision)|(decimal(\\(\\s*\\d+\\s*(,\\s*\\d+\\s*)?\\))?)$");
|
||||
private static final Pattern NUMERIC_REGEX = Pattern.compile("^numeric(\\(\\s*\\d+\\s*(,\\s*\\d+\\s*)?\\))?$");
|
||||
private static final Pattern STRING_REGEX = Pattern.compile("^(character varying|(long|medium)text)|((varchar|char|text|clob|binary|bit)(\\(\\d+\\))?)$");
|
||||
private static final Pattern YYYYMMDD = Pattern.compile("\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}");
|
||||
private static final DateTimeFormatter YYYYMMDD_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
|
||||
|
||||
@Override
|
||||
public Schema convertToSchema(TableMeta meta) {
|
||||
List<Field> fields = new ArrayList<>(meta.getFields().size() + 4);
|
||||
fields.add(new Field(Constants.HUDI_DELETE_KEY_NAME, BOOLEAN_SCHEMA, null, false));
|
||||
fields.add(new Field(Constants.UNION_KEY_NAME, STRING_SCHEMA, null, ""));
|
||||
|
||||
for (TableMeta.FieldMeta field : meta.getFields()) {
|
||||
fields.add(new Field(field.getName(), convertType(field.getType(), field.getLength(), field.getScala()), null, JsonProperties.NULL_VALUE));
|
||||
}
|
||||
|
||||
fields.add(new Field(Constants.UPDATE_TIMESTAMP_KEY_NAME, LONG_SCHEMA, null, -1));
|
||||
fields.add(new Field(Constants.LATEST_OPERATION_TIMESTAMP_KEY_NAME, STRING_SCHEMA, null, ""));
|
||||
return Schema.createRecord(meta.getTable(), null, null, false, fields);
|
||||
}
|
||||
|
||||
private Schema convertType(String type, Long length, Integer scala) {
|
||||
type = type.trim().toLowerCase();
|
||||
if (BOOLEAN_REGEX.matcher(type).matches()) {
|
||||
return NULLABLE_BOOLEAN_SCHEMA;
|
||||
} else if (STRING_REGEX.matcher(type).matches() || DATE_REGEX.matcher(type).matches()) {
|
||||
return NULLABLE_STRING_SCHEMA;
|
||||
} else if (INT_REGEX.matcher(type).matches()) {
|
||||
return NULLABLE_INT_SCHEMA;
|
||||
} else if (LONG_REGEX.matcher(type).matches()) {
|
||||
return NULLABLE_LONG_SCHEMA;
|
||||
} else if (FLOAT_REGEX.matcher(type).matches()) {
|
||||
return NULLABLE_FLOAT_SCHEMA;
|
||||
} else if (DOUBLE_REGEX.matcher(type).matches()) {
|
||||
return NULLABLE_DOUBLE_SCHEMA;
|
||||
} else if (FIXED_REGEX.matcher(type).matches()) {
|
||||
return NULLABLE_DECIMAL_SCHEMA.apply(length.intValue(), 0);
|
||||
} else if (DECIMAL_REGEX.matcher(type).matches() || NUMERIC_REGEX.matcher(type).matches()) {
|
||||
if (ObjectUtil.isNull(scala)) {
|
||||
return NULLABLE_DECIMAL_SCHEMA.apply(length.intValue(), 6);
|
||||
} else {
|
||||
return NULLABLE_DECIMAL_SCHEMA.apply(length.intValue(), scala);
|
||||
}
|
||||
} else {
|
||||
throw new RuntimeException(Constants.LOG_POINT_FIELD_TYPE_NOT_FOUND + " Cannot find correct type for source type: " + type + " length: " + length);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenericRowData convertToGenericRowData(TableMeta meta, Schema schema, Map<String, Object> data) {
|
||||
List<Field> fields = schema.getFields();
|
||||
GenericRowData row = new GenericRowData(fields.size());
|
||||
for (int index = 0; index < fields.size(); index++) {
|
||||
Field field = fields.get(index);
|
||||
Object value = data.getOrDefault(Constants.FIELD_COVERT.apply(meta, field.name()), null);
|
||||
row.setField(index, covertValue(field.schema(), value));
|
||||
}
|
||||
return row;
|
||||
}
|
||||
|
||||
private Object covertValue(Schema schema, Object value) {
|
||||
if (ObjectUtil.isNull(value)) {
|
||||
return value;
|
||||
} else if (NULLABLE_BOOLEAN_SCHEMA.equals(schema) || BOOLEAN_SCHEMA.equals(schema)) {
|
||||
return value instanceof String ? Boolean.valueOf((String) value) : value;
|
||||
} else if (NULLABLE_INT_SCHEMA.equals(schema) || INT_SCHEMA.equals(schema)) {
|
||||
return value instanceof String ? Integer.valueOf((String) value) : value;
|
||||
} else if (NULLABLE_LONG_SCHEMA.equals(schema) || LONG_SCHEMA.equals(schema)) {
|
||||
return value instanceof String ? Long.valueOf((String) value) : value;
|
||||
} else if (NULLABLE_FLOAT_SCHEMA.equals(schema) || FLOAT_SCHEMA.equals(schema)) {
|
||||
return value instanceof String ? Float.valueOf((String) value) : value;
|
||||
} else if (NULLABLE_DOUBLE_SCHEMA.equals(schema) || DOUBLE_SCHEMA.equals(schema)) {
|
||||
return value instanceof String ? Double.valueOf((String) value) : value;
|
||||
} else if (NULLABLE_STRING_SCHEMA.equals(schema) || STRING_SCHEMA.equals(schema)) {
|
||||
return StringData.fromString((String) value);
|
||||
} else {
|
||||
for (Schema type : schema.getTypes()) {
|
||||
if (type.getLogicalType() instanceof LogicalTypes.Decimal) {
|
||||
LogicalTypes.Decimal decimalType = (LogicalTypes.Decimal) type.getLogicalType();
|
||||
int precision = decimalType.getPrecision();
|
||||
int scala = decimalType.getScale();
|
||||
return DecimalData.fromBigDecimal(new BigDecimal((String) value), precision, scala);
|
||||
}
|
||||
}
|
||||
return value;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,123 @@
|
||||
package com.lanyuanxiaoyao.service.sync.metrics;
|
||||
|
||||
import cn.hutool.core.collection.ListUtil;
|
||||
import cn.hutool.http.HttpResponse;
|
||||
import cn.hutool.http.HttpUtil;
|
||||
import com.lanyuanxiaoyao.service.common.Constants;
|
||||
import com.lanyuanxiaoyao.service.sync.configuration.GlobalConfiguration;
|
||||
import com.lanyuanxiaoyao.service.sync.utils.JacksonUtils;
|
||||
import dev.failsafe.Failsafe;
|
||||
import dev.failsafe.RetryPolicy;
|
||||
import java.time.Duration;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.atomic.LongAdder;
|
||||
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* 指标基础类
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @date 2022-06-13
|
||||
*/
|
||||
public abstract class AbstractMetric implements Metric {
|
||||
private static final Logger logger = LoggerFactory.getLogger(AbstractMetric.class);
|
||||
private static final ObjectMapper MAPPER = JacksonUtils.getMapper();
|
||||
private static final RetryPolicy<HttpResponse> PUBLISH_RETRY = RetryPolicy.<HttpResponse>builder()
|
||||
.handle(Exception.class)
|
||||
.withDelay(Duration.ofSeconds(1))
|
||||
.withMaxAttempts(5)
|
||||
.build();
|
||||
private final GlobalConfiguration globalConfiguration;
|
||||
private final List<String> lineCache = ListUtil.toList();
|
||||
private final LongAdder autoPublishCount = new LongAdder();
|
||||
private List<HttpMetricsRequest> requests = new ArrayList<>();
|
||||
|
||||
public AbstractMetric(GlobalConfiguration globalConfiguration) {
|
||||
this.globalConfiguration = globalConfiguration;
|
||||
}
|
||||
|
||||
public void setRequests(HttpMetricsRequest... requests) {
|
||||
setRequests(ListUtil.toList(requests));
|
||||
}
|
||||
|
||||
public void setRequests(List<HttpMetricsRequest> requests) {
|
||||
this.requests = requests;
|
||||
}
|
||||
|
||||
public void addRequest(HttpMetricsRequest request) {
|
||||
this.requests.add(request);
|
||||
}
|
||||
|
||||
public void addRequests(HttpMetricsRequest... requests) {
|
||||
addRequests(ListUtil.toList(requests));
|
||||
}
|
||||
|
||||
public void addRequests(List<HttpMetricsRequest> requests) {
|
||||
this.requests.addAll(requests);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addTag(String key, String value) {
|
||||
requests.forEach(request -> request.addTag(key, value));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void makePoint(boolean autoPublish, int batch) {
|
||||
if (!globalConfiguration.getMetricEnable()) {
|
||||
return;
|
||||
}
|
||||
if (autoPublish) {
|
||||
if (autoPublishCount.sum() >= batch) {
|
||||
publish();
|
||||
}
|
||||
autoPublishCount.increment();
|
||||
}
|
||||
makePoint();
|
||||
}
|
||||
|
||||
public synchronized void publish() {
|
||||
if (!globalConfiguration.getMetricEnable()) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
requests.stream()
|
||||
.filter(request -> !request.isEmpty())
|
||||
.map(request -> {
|
||||
try {
|
||||
String data = MAPPER.writeValueAsString(request);
|
||||
request.clear();
|
||||
return data;
|
||||
} catch (JsonProcessingException e) {
|
||||
logger.warn("Parse metrics failure: " + request, e);
|
||||
}
|
||||
return null;
|
||||
})
|
||||
.filter(Objects::nonNull)
|
||||
.forEach(lineCache::add);
|
||||
if (lineCache.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
String lines = String.join("\n", lineCache);
|
||||
logger.debug("Push metrics: \n{}", lines);
|
||||
HttpResponse response = Failsafe.with(PUBLISH_RETRY)
|
||||
.get(() -> HttpUtil.createPost(globalConfiguration.getMetricPublishUrl())
|
||||
.body(lines)
|
||||
.basicAuth(Constants.VICTORIA_USERNAME, Constants.VICTORIA_PASSWORD)
|
||||
.timeout(globalConfiguration.getMetricPublishTimeout())
|
||||
.execute());
|
||||
if (response.isOk()) {
|
||||
logger.debug("Metrics push success");
|
||||
}
|
||||
} catch (Throwable throwable) {
|
||||
logger.warn("Push metrics failure, url: " + globalConfiguration.getMetricPublishUrl(), throwable);
|
||||
} finally {
|
||||
lineCache.clear();
|
||||
autoPublishCount.reset();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,67 @@
|
||||
package com.lanyuanxiaoyao.service.sync.metrics;
|
||||
|
||||
import cn.hutool.core.collection.ListUtil;
|
||||
import cn.hutool.core.map.MapUtil;
|
||||
import com.lanyuanxiaoyao.service.common.entity.FlinkJob;
|
||||
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
|
||||
import com.lanyuanxiaoyao.service.sync.configuration.GlobalConfiguration;
|
||||
import com.lanyuanxiaoyao.service.sync.utils.MetricsUtils;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.atomic.LongAdder;
|
||||
|
||||
/**
|
||||
* 基础类
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @date 2022-06-13
|
||||
*/
|
||||
public class CountMetric extends AbstractMetric {
|
||||
private final LongAdder count = new LongAdder();
|
||||
|
||||
private final HttpMetricsRequest countMetrics;
|
||||
|
||||
public CountMetric(GlobalConfiguration globalConfiguration, String name) {
|
||||
this(globalConfiguration, name, MapUtil.empty());
|
||||
}
|
||||
|
||||
public CountMetric(GlobalConfiguration globalConfiguration, String name, FlinkJob job, TableMeta meta) {
|
||||
this(globalConfiguration, name, MapUtil.builder(MetricsUtils.commonTags(job, meta)).build());
|
||||
}
|
||||
|
||||
public CountMetric(GlobalConfiguration globalConfiguration, String name, FlinkJob job, TableMeta meta, String extraTagKey, String extraTagValue) {
|
||||
this(globalConfiguration, name, job, meta, MapUtil.of(extraTagKey, extraTagValue));
|
||||
}
|
||||
|
||||
public CountMetric(GlobalConfiguration globalConfiguration, String name, FlinkJob job, TableMeta meta, Map<String, String> tags) {
|
||||
this(globalConfiguration, name, MapUtil.builder(MetricsUtils.commonTags(job, meta))
|
||||
.putAll(tags)
|
||||
.build());
|
||||
}
|
||||
|
||||
public CountMetric(GlobalConfiguration globalConfiguration, String name, Map<String, String> tags) {
|
||||
super(globalConfiguration);
|
||||
countMetrics = new HttpMetricsRequest(
|
||||
name + "_count",
|
||||
MapUtil.<String, String>builder().putAll(tags).build()
|
||||
);
|
||||
setRequests(countMetrics);
|
||||
}
|
||||
|
||||
public void increment() {
|
||||
count.increment();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void makePoint() {
|
||||
double count = this.count.doubleValue();
|
||||
if (count != 0) {
|
||||
countMetrics.addMetric(count);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<HttpMetricsRequest> getMetrics() {
|
||||
return ListUtil.toList(countMetrics);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,88 @@
|
||||
package com.lanyuanxiaoyao.service.sync.metrics;
|
||||
|
||||
import cn.hutool.core.collection.ListUtil;
|
||||
import cn.hutool.core.map.MapUtil;
|
||||
import java.beans.Transient;
|
||||
import java.io.Serializable;
|
||||
import java.time.Instant;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.locks.Lock;
|
||||
import java.util.concurrent.locks.ReentrantLock;
|
||||
|
||||
/**
|
||||
* 指标实体类
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @date 2022-06-13
|
||||
*/
|
||||
public class HttpMetricsRequest implements Serializable {
|
||||
private final Map<String, String> metric;
|
||||
private final List<Double> values;
|
||||
private final List<Long> timestamps;
|
||||
|
||||
private final Lock lock = new ReentrantLock();
|
||||
|
||||
public HttpMetricsRequest(String name, Map<String, String> metrics) {
|
||||
this.metric = MapUtil.<String, String>builder()
|
||||
.put("__name__", name)
|
||||
.build();
|
||||
this.metric.putAll(metrics);
|
||||
this.values = Collections.synchronizedList(ListUtil.list(true));
|
||||
this.timestamps = Collections.synchronizedList(ListUtil.list(true));
|
||||
}
|
||||
|
||||
public void addTag(String key, String value) {
|
||||
this.metric.put(key, value);
|
||||
}
|
||||
|
||||
public void addMetric(Double value) {
|
||||
addMetric(value, Instant.now().toEpochMilli());
|
||||
}
|
||||
|
||||
public void addMetric(Double value, Long timestamp) {
|
||||
synchronized (this) {
|
||||
values.add(value);
|
||||
timestamps.add(timestamp);
|
||||
}
|
||||
}
|
||||
|
||||
public void clear() {
|
||||
synchronized (this) {
|
||||
this.values.clear();
|
||||
this.timestamps.clear();
|
||||
}
|
||||
}
|
||||
|
||||
@Transient
|
||||
public boolean isEmpty() {
|
||||
return this.values.isEmpty() && this.timestamps.isEmpty();
|
||||
}
|
||||
|
||||
@Transient
|
||||
public boolean isNonEmpty() {
|
||||
return !isEmpty();
|
||||
}
|
||||
|
||||
public Map<String, String> getMetric() {
|
||||
return metric;
|
||||
}
|
||||
|
||||
public List<Double> getValues() {
|
||||
return values;
|
||||
}
|
||||
|
||||
public List<Long> getTimestamps() {
|
||||
return timestamps;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "MetricsItem{" +
|
||||
"metrics=" + metric +
|
||||
", values=" + values +
|
||||
", timestamps=" + timestamps +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,77 @@
|
||||
package com.lanyuanxiaoyao.service.sync.metrics;
|
||||
|
||||
import cn.hutool.core.collection.ListUtil;
|
||||
import cn.hutool.core.map.MapUtil;
|
||||
import com.lanyuanxiaoyao.service.common.entity.FlinkJob;
|
||||
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
|
||||
import com.lanyuanxiaoyao.service.sync.configuration.GlobalConfiguration;
|
||||
import com.lanyuanxiaoyao.service.sync.utils.MetricsUtils;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.atomic.DoubleAdder;
|
||||
import java.util.concurrent.atomic.LongAdder;
|
||||
|
||||
/**
|
||||
* 基础类
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @date 2022-06-13
|
||||
*/
|
||||
public class MessageSizeSizeMetric extends AbstractMetric {
|
||||
private final LongAdder count = new LongAdder();
|
||||
private final DoubleAdder size = new DoubleAdder();
|
||||
|
||||
private final HttpMetricsRequest sizeMetrics;
|
||||
private final HttpMetricsRequest perMessageSizeMetrics;
|
||||
|
||||
public MessageSizeSizeMetric(GlobalConfiguration globalConfiguration, String name) {
|
||||
this(globalConfiguration, name, MapUtil.empty());
|
||||
}
|
||||
|
||||
public MessageSizeSizeMetric(GlobalConfiguration globalConfiguration, String name, FlinkJob job, TableMeta meta) {
|
||||
this(globalConfiguration, name, MapUtil.builder(MetricsUtils.commonTags(job, meta)).build());
|
||||
}
|
||||
|
||||
public MessageSizeSizeMetric(GlobalConfiguration globalConfiguration, String name, FlinkJob job, TableMeta meta, String extraTagKey, String extraTagValue) {
|
||||
this(globalConfiguration, name, job, meta, MapUtil.of(extraTagKey, extraTagValue));
|
||||
}
|
||||
|
||||
public MessageSizeSizeMetric(GlobalConfiguration globalConfiguration, String name, FlinkJob job, TableMeta meta, Map<String, String> tags) {
|
||||
this(globalConfiguration, name, MapUtil.builder(MetricsUtils.commonTags(job, meta))
|
||||
.putAll(tags)
|
||||
.build());
|
||||
}
|
||||
|
||||
public MessageSizeSizeMetric(GlobalConfiguration globalConfiguration, String name, Map<String, String> tags) {
|
||||
super(globalConfiguration);
|
||||
sizeMetrics = new HttpMetricsRequest(
|
||||
name + "_total",
|
||||
MapUtil.<String, String>builder().putAll(tags).build()
|
||||
);
|
||||
perMessageSizeMetrics = new HttpMetricsRequest(
|
||||
name + "_per_message",
|
||||
MapUtil.<String, String>builder().putAll(tags).build()
|
||||
);
|
||||
setRequests(sizeMetrics, perMessageSizeMetrics);
|
||||
}
|
||||
|
||||
public void increment(long size) {
|
||||
this.count.increment();
|
||||
this.size.add(size);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void makePoint() {
|
||||
double count = this.count.doubleValue();
|
||||
double size = this.size.doubleValue();
|
||||
if (size != 0 && count != 0) {
|
||||
sizeMetrics.addMetric(size);
|
||||
perMessageSizeMetrics.addMetric(size / count);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<HttpMetricsRequest> getMetrics() {
|
||||
return ListUtil.toList(sizeMetrics);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
package com.lanyuanxiaoyao.service.sync.metrics;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 指标类定义
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @date 2022-06-13
|
||||
*/
|
||||
public interface Metric extends Serializable {
|
||||
void addTag(String key, String value);
|
||||
|
||||
void makePoint(boolean autoPublish, int batch);
|
||||
|
||||
void makePoint();
|
||||
|
||||
List<HttpMetricsRequest> getMetrics();
|
||||
}
|
||||
@@ -0,0 +1,87 @@
|
||||
package com.lanyuanxiaoyao.service.sync.metrics;
|
||||
|
||||
import cn.hutool.core.collection.ListUtil;
|
||||
import cn.hutool.core.map.MapUtil;
|
||||
import com.lanyuanxiaoyao.service.common.entity.FlinkJob;
|
||||
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
|
||||
import com.lanyuanxiaoyao.service.sync.configuration.GlobalConfiguration;
|
||||
import com.lanyuanxiaoyao.service.sync.utils.MetricsUtils;
|
||||
import java.time.Duration;
|
||||
import java.time.Instant;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.atomic.LongAdder;
|
||||
|
||||
/**
|
||||
* 基础类
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @date 2022-06-13
|
||||
*/
|
||||
public class RateMetric extends AbstractMetric {
|
||||
private final LongAdder count = new LongAdder();
|
||||
|
||||
private final HttpMetricsRequest countMetrics;
|
||||
private final HttpMetricsRequest millisecondMetrics;
|
||||
private final HttpMetricsRequest perSecondMetrics;
|
||||
|
||||
private final Instant startInstant;
|
||||
|
||||
public RateMetric(GlobalConfiguration globalConfiguration, String name) {
|
||||
this(globalConfiguration, name, MapUtil.empty());
|
||||
}
|
||||
|
||||
public RateMetric(GlobalConfiguration globalConfiguration, String name, FlinkJob job, TableMeta meta) {
|
||||
this(globalConfiguration, name, MapUtil.builder(MetricsUtils.commonTags(job, meta)).build());
|
||||
}
|
||||
|
||||
public RateMetric(GlobalConfiguration globalConfiguration, String name, FlinkJob job, TableMeta meta, String extraTagKey, String extraTagValue) {
|
||||
this(globalConfiguration, name, job, meta, MapUtil.of(extraTagKey, extraTagValue));
|
||||
}
|
||||
|
||||
public RateMetric(GlobalConfiguration globalConfiguration, String name, FlinkJob job, TableMeta meta, Map<String, String> tags) {
|
||||
this(globalConfiguration, name, MapUtil.builder(MetricsUtils.commonTags(job, meta))
|
||||
.putAll(tags)
|
||||
.build());
|
||||
}
|
||||
|
||||
public RateMetric(GlobalConfiguration globalConfiguration, String name, Map<String, String> tags) {
|
||||
super(globalConfiguration);
|
||||
|
||||
startInstant = Instant.now();
|
||||
|
||||
countMetrics = new HttpMetricsRequest(
|
||||
name + "_count",
|
||||
MapUtil.<String, String>builder().putAll(tags).build()
|
||||
);
|
||||
millisecondMetrics = new HttpMetricsRequest(
|
||||
name + "_millisecond",
|
||||
MapUtil.<String, String>builder().putAll(tags).build()
|
||||
);
|
||||
perSecondMetrics = new HttpMetricsRequest(
|
||||
name + "_per_millisecond",
|
||||
MapUtil.<String, String>builder().putAll(tags).build()
|
||||
);
|
||||
setRequests(countMetrics, millisecondMetrics, perSecondMetrics);
|
||||
}
|
||||
|
||||
public void increment() {
|
||||
count.increment();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void makePoint() {
|
||||
double count = this.count.doubleValue();
|
||||
if (count != 0) {
|
||||
long millis = Duration.between(startInstant, Instant.now()).toMillis();
|
||||
countMetrics.addMetric(count);
|
||||
millisecondMetrics.addMetric((double) millis);
|
||||
perSecondMetrics.addMetric(count / millis);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<HttpMetricsRequest> getMetrics() {
|
||||
return ListUtil.toList(countMetrics, millisecondMetrics, perSecondMetrics);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,98 @@
|
||||
package com.lanyuanxiaoyao.service.sync.utils;
|
||||
|
||||
import com.lanyuanxiaoyao.service.common.Constants;
|
||||
import com.lanyuanxiaoyao.service.common.entity.FlinkJob;
|
||||
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
|
||||
import com.lanyuanxiaoyao.service.common.exception.MissingArgumentException;
|
||||
import java.util.List;
|
||||
import org.apache.flink.api.java.utils.ParameterTool;
|
||||
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.type.TypeReference;
|
||||
|
||||
/**
|
||||
* 入参解析相关内容
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @date 2022-03-10
|
||||
*/
|
||||
public class ArgumentsUtils {
|
||||
public static long getJobId(String[] args) throws MissingArgumentException {
|
||||
ParameterTool argsTool = ParameterTool.fromArgs(args);
|
||||
if (!argsTool.has(Constants.JOB_ID)) {
|
||||
throw new MissingArgumentException(Constants.JOB_ID);
|
||||
}
|
||||
return argsTool.getLong(Constants.JOB_ID);
|
||||
}
|
||||
|
||||
public static String getTable(String[] args) throws MissingArgumentException {
|
||||
ParameterTool argsTool = ParameterTool.fromArgs(args);
|
||||
if (!argsTool.has(Constants.TABLE_NAME)) {
|
||||
throw new MissingArgumentException(Constants.TABLE_NAME);
|
||||
}
|
||||
return argsTool.get(Constants.TABLE_NAME);
|
||||
}
|
||||
|
||||
public static Boolean getServiceMode(String[] args) throws Exception {
|
||||
ParameterTool argsTool = ParameterTool.fromArgs(args);
|
||||
if (!argsTool.has(Constants.SERVICE_MODE)) {
|
||||
return false;
|
||||
}
|
||||
return argsTool.getBoolean(Constants.SERVICE_MODE);
|
||||
}
|
||||
|
||||
public static String getMessageId(String[] args) throws MissingArgumentException {
|
||||
ParameterTool argsTool = ParameterTool.fromArgs(args);
|
||||
if (!argsTool.has(Constants.MESSAGE_ID)) {
|
||||
throw new MissingArgumentException(Constants.MESSAGE_ID);
|
||||
}
|
||||
return argsTool.get(Constants.MESSAGE_ID);
|
||||
}
|
||||
|
||||
public static TableMeta getTableMeta(String[] args) throws Exception {
|
||||
ParameterTool argsTool = ParameterTool.fromArgs(args);
|
||||
if (!argsTool.has(Constants.TABLE_META)) {
|
||||
throw new MissingArgumentException(Constants.TABLE_META);
|
||||
}
|
||||
return JacksonUtils.getMapper().readValue(argsTool.get(Constants.TABLE_META), TableMeta.class);
|
||||
}
|
||||
|
||||
public static List<TableMeta> getTableMetaList(String[] args) throws Exception {
|
||||
ParameterTool argsTool = ParameterTool.fromArgs(args);
|
||||
if (!argsTool.has(Constants.TABLE_META_LIST)) {
|
||||
throw new MissingArgumentException(Constants.TABLE_META_LIST);
|
||||
}
|
||||
return JacksonUtils.getMapper().readValue(argsTool.get(Constants.TABLE_META_LIST), new TypeReference<List<TableMeta>>() {});
|
||||
}
|
||||
|
||||
public static FlinkJob getFlinkJob(String[] args) throws MissingArgumentException, JsonProcessingException {
|
||||
ParameterTool argsTool = ParameterTool.fromArgs(args);
|
||||
if (!argsTool.has(Constants.FLINK_JOB)) {
|
||||
throw new MissingArgumentException(Constants.FLINK_JOB);
|
||||
}
|
||||
return JacksonUtils.getMapper().readValue(argsTool.get(Constants.FLINK_JOB), FlinkJob.class);
|
||||
}
|
||||
|
||||
public static String getInstants(String[] args) throws JsonProcessingException {
|
||||
ParameterTool argsTool = ParameterTool.fromArgs(args);
|
||||
if (!argsTool.has(Constants.INSTANTS)) {
|
||||
return "";
|
||||
}
|
||||
return argsTool.get(Constants.INSTANTS);
|
||||
}
|
||||
|
||||
public static Boolean getBetaMode(String[] args) {
|
||||
ParameterTool argsTool = ParameterTool.fromArgs(args);
|
||||
if (!argsTool.has(Constants.BETA)) {
|
||||
return false;
|
||||
}
|
||||
return argsTool.getBoolean(Constants.BETA);
|
||||
}
|
||||
|
||||
public static String getCluster(String[] args) {
|
||||
ParameterTool argsTool = ParameterTool.fromArgs(args);
|
||||
if (!argsTool.has(Constants.CLUSTER)) {
|
||||
return "";
|
||||
}
|
||||
return argsTool.get(Constants.CLUSTER);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
package com.lanyuanxiaoyao.service.sync.utils;
|
||||
|
||||
import com.lanyuanxiaoyao.service.common.Constants;
|
||||
|
||||
/**
|
||||
* 环境变量相关参数
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @date 2022-06-21
|
||||
*/
|
||||
public class EnvUtils {
|
||||
public static void setEnv() {
|
||||
}
|
||||
|
||||
public static void setCompactionEnv() {
|
||||
System.setProperty(Constants.METRICS_LABEL_RUN_TYPE, Constants.METRICS_RUN_TYPE_COMPACTION);
|
||||
}
|
||||
|
||||
public static void setSyncEnv() {
|
||||
System.setProperty(Constants.METRICS_LABEL_RUN_TYPE, Constants.METRICS_RUN_TYPE_SYNC);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,31 @@
|
||||
package com.lanyuanxiaoyao.service.sync.utils;
|
||||
|
||||
import cn.hutool.core.util.ObjectUtil;
|
||||
import java.io.Serializable;
|
||||
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.DeserializationFeature;
|
||||
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.MapperFeature;
|
||||
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* Json 解析相关工具
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @date 2022-06-12
|
||||
*/
|
||||
public class JacksonUtils implements Serializable {
|
||||
private static final Logger logger = LoggerFactory.getLogger(JacksonUtils.class);
|
||||
|
||||
private static ObjectMapper INSTANCE = null;
|
||||
|
||||
public static ObjectMapper getMapper() {
|
||||
if (ObjectUtil.isNull(INSTANCE)) {
|
||||
INSTANCE = new ObjectMapper();
|
||||
INSTANCE.configure(MapperFeature.ACCEPT_CASE_INSENSITIVE_PROPERTIES, true);
|
||||
INSTANCE.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
||||
INSTANCE.configure(DeserializationFeature.FAIL_ON_IGNORED_PROPERTIES, false);
|
||||
}
|
||||
return INSTANCE;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
package com.lanyuanxiaoyao.service.sync.utils;
|
||||
|
||||
import cn.hutool.core.util.ObjectUtil;
|
||||
import cn.hutool.core.util.RandomUtil;
|
||||
import com.lanyuanxiaoyao.service.sync.configuration.GlobalConfiguration;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* 动态平衡获取指标 url
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @date 2022-07-05
|
||||
*/
|
||||
public class LoadBalance {
|
||||
private static final Logger logger = LoggerFactory.getLogger(LoadBalance.class);
|
||||
private static String[] urls = null;
|
||||
private static int length = 0;
|
||||
|
||||
public static String getCustomPublishUrl(GlobalConfiguration globalConfiguration) {
|
||||
if (ObjectUtil.isNull(urls)) {
|
||||
urls = globalConfiguration.getMetricsPublishCustomUrl().split(",");
|
||||
length = urls.length;
|
||||
}
|
||||
String url = urls[RandomUtil.randomInt(length)];
|
||||
logger.info("Random url: {}", url);
|
||||
return url;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
package com.lanyuanxiaoyao.service.sync.utils;
|
||||
|
||||
import cn.hutool.core.collection.ListUtil;
|
||||
import cn.hutool.core.map.MapUtil;
|
||||
import com.lanyuanxiaoyao.service.common.Constants;
|
||||
import com.lanyuanxiaoyao.service.common.entity.FlinkJob;
|
||||
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
|
||||
import com.lanyuanxiaoyao.service.sync.configuration.GlobalConfiguration;
|
||||
import com.lanyuanxiaoyao.service.sync.metrics.AbstractMetric;
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Timer;
|
||||
import java.util.TimerTask;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* 指标工具
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @date 2022-06-12
|
||||
*/
|
||||
public class MetricsUtils implements Serializable {
|
||||
private static final Logger logger = LoggerFactory.getLogger(MetricsUtils.class);
|
||||
|
||||
public static Map<String, String> commonTags(FlinkJob job, TableMeta meta) {
|
||||
return MapUtil.<String, String>builder()
|
||||
.put(Constants.METRICS_LABEL_FLINK_JOB_ID, job.getId().toString())
|
||||
.put(Constants.METRICS_LABEL_FLINK_JOB_NAME, job.getName())
|
||||
.put(Constants.METRICS_LABEL_SCHEMA, meta.getSchema())
|
||||
.put(Constants.METRICS_LABEL_TABLE, meta.getTable())
|
||||
.put(Constants.METRICS_LABEL_ALIAS, meta.getAlias())
|
||||
.build();
|
||||
}
|
||||
|
||||
@SafeVarargs
|
||||
public static <T extends AbstractMetric> void createMakePointTimer(GlobalConfiguration globalConfiguration, T... metrics) {
|
||||
createMakePointTimer(globalConfiguration, ListUtil.toList(metrics));
|
||||
}
|
||||
|
||||
public static <T extends AbstractMetric> void createMakePointTimer(GlobalConfiguration globalConfiguration, List<T> metrics) {
|
||||
logger.info("Create timer: {}", metrics);
|
||||
new Timer().schedule(new TimerTask() {
|
||||
@Override
|
||||
public void run() {
|
||||
for (AbstractMetric metric : metrics) {
|
||||
metric.makePoint(true, globalConfiguration.getMetricPublishBatch());
|
||||
}
|
||||
}
|
||||
}, globalConfiguration.getMetricPublishDelay(), globalConfiguration.getMetricPublishPeriod());
|
||||
}
|
||||
|
||||
@SafeVarargs
|
||||
public static <T extends AbstractMetric> void publishAllMetrics(T... metrics) {
|
||||
publishAllMetrics(ListUtil.toList(metrics));
|
||||
}
|
||||
|
||||
public static <T extends AbstractMetric> void publishAllMetrics(List<T> metrics) {
|
||||
for (AbstractMetric metric : metrics) {
|
||||
metric.publish();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,271 @@
|
||||
package com.lanyuanxiaoyao.service.sync.utils;
|
||||
|
||||
import cn.hutool.core.util.EnumUtil;
|
||||
import cn.hutool.core.util.ObjectUtil;
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import cn.hutool.http.HttpUtil;
|
||||
import com.lanyuanxiaoyao.service.common.Constants;
|
||||
import com.lanyuanxiaoyao.service.common.entity.FlinkJob;
|
||||
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
|
||||
import com.lanyuanxiaoyao.service.sync.configuration.GlobalConfiguration;
|
||||
import com.lanyuanxiaoyao.service.sync.configuration.RetryPolicyProvider;
|
||||
import dev.failsafe.Failsafe;
|
||||
import java.time.Instant;
|
||||
import java.util.Map;
|
||||
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import org.apache.hudi.common.model.HoodieCommitMetadata;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* 状态输出
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @date 2022-07-05
|
||||
*/
|
||||
public class StatusUtils {
|
||||
private static final Logger logger = LoggerFactory.getLogger(StatusUtils.class);
|
||||
|
||||
private static final ObjectMapper MAPPER = JacksonUtils.getMapper();
|
||||
private static final int HTTP_TIMEOUT = (int) Constants.MINUTE;
|
||||
|
||||
public static void syncStart(GlobalConfiguration configuration, FlinkJob flinkJob, TableMeta tableMeta) {
|
||||
logger.info("Enter method: syncStart[configuration, flinkJob, tableMeta]. " + "configuration:" + configuration + "," + "flinkJob:" + flinkJob + "," + "tableMeta:" + tableMeta);
|
||||
try {
|
||||
Failsafe.with(RetryPolicyProvider.HTTP_RETRY)
|
||||
.run(() ->
|
||||
HttpUtil.createGet(
|
||||
StrUtil.format(
|
||||
"{}/api/sync_start?flink_job_id={}&alias={}&database={}&schema={}&table={}&cluster={}&application_id={}",
|
||||
LoadBalance.getCustomPublishUrl(configuration),
|
||||
flinkJob.getId(),
|
||||
tableMeta.getAlias(),
|
||||
tableMeta.getSource(),
|
||||
tableMeta.getSchema(),
|
||||
tableMeta.getTable(),
|
||||
configuration.getCluster(),
|
||||
configuration.getApplicationId()
|
||||
)
|
||||
)
|
||||
.header(Constants.API_HEADER_NAME, Constants.API_VERSION)
|
||||
.basicAuth(Constants.SPRING_SECURITY_USERNAME, Constants.SPRING_SECURITY_PASSWORD_PLAIN)
|
||||
.timeout(HTTP_TIMEOUT)
|
||||
.execute()
|
||||
);
|
||||
} catch (Exception e) {
|
||||
logger.warn("sync start metrics submit failure");
|
||||
}
|
||||
}
|
||||
|
||||
public static void syncCheckpoint(GlobalConfiguration configuration, FlinkJob flinkJob, TableMeta tableMeta, String messageId, Long publishTime) {
|
||||
logger.info("Enter method: syncCheckpoint[configuration, flinkJob, tableMeta, messageId, publishTime]. " + "configuration:" + configuration + "," + "flinkJob:" + flinkJob + "," + "tableMeta:" + tableMeta + "," + "messageId:" + messageId + "," + "publishTime:" + publishTime);
|
||||
try {
|
||||
Failsafe.with(RetryPolicyProvider.HTTP_RETRY)
|
||||
.run(() -> HttpUtil.createGet(
|
||||
StrUtil.format(
|
||||
"{}/api/sync_checkpoint_state?flink_job_id={}&alias={}&message_id={}&publish_time={}",
|
||||
LoadBalance.getCustomPublishUrl(configuration),
|
||||
flinkJob.getId(),
|
||||
tableMeta.getAlias(),
|
||||
messageId,
|
||||
publishTime
|
||||
)
|
||||
)
|
||||
.header(Constants.API_HEADER_NAME, Constants.API_VERSION)
|
||||
.basicAuth(Constants.SPRING_SECURITY_USERNAME, Constants.SPRING_SECURITY_PASSWORD_PLAIN)
|
||||
.timeout(HTTP_TIMEOUT)
|
||||
.execute()
|
||||
);
|
||||
} catch (Exception e) {
|
||||
logger.warn("sync checkpoint metrics submit failure");
|
||||
}
|
||||
}
|
||||
|
||||
public static void syncOperation(GlobalConfiguration configuration, FlinkJob flinkJob, TableMeta tableMeta) {
|
||||
syncOperation(configuration, flinkJob, tableMeta, null);
|
||||
}
|
||||
|
||||
public static void syncOperation(GlobalConfiguration configuration, FlinkJob flinkJob, TableMeta tableMeta, Long operationTime) {
|
||||
logger.info("Enter method: syncOperation[configuration, flinkJob, tableMeta, operationTime]. " + "configuration:" + configuration + "," + "flinkJob:" + flinkJob + "," + "tableMeta:" + tableMeta + "," + "operationTime:" + operationTime);
|
||||
try {
|
||||
Failsafe.with(RetryPolicyProvider.HTTP_RETRY)
|
||||
.run(() -> {
|
||||
if (ObjectUtil.isNull(operationTime)) {
|
||||
HttpUtil.createGet(
|
||||
StrUtil.format(
|
||||
"{}/api/sync_operation_state?flink_job_id={}&alias={}",
|
||||
LoadBalance.getCustomPublishUrl(configuration),
|
||||
flinkJob.getId(),
|
||||
tableMeta.getAlias()
|
||||
)
|
||||
)
|
||||
.header(Constants.API_HEADER_NAME, Constants.API_VERSION)
|
||||
.basicAuth(Constants.SPRING_SECURITY_USERNAME, Constants.SPRING_SECURITY_PASSWORD_PLAIN)
|
||||
.timeout(HTTP_TIMEOUT)
|
||||
.execute();
|
||||
} else {
|
||||
HttpUtil.createGet(
|
||||
StrUtil.format(
|
||||
"{}/api/sync_operation_state?flink_job_id={}&alias={}&operation_time={}",
|
||||
LoadBalance.getCustomPublishUrl(configuration),
|
||||
flinkJob.getId(),
|
||||
tableMeta.getAlias(),
|
||||
operationTime
|
||||
)
|
||||
)
|
||||
.header(Constants.API_HEADER_NAME, Constants.API_VERSION)
|
||||
.basicAuth(Constants.SPRING_SECURITY_USERNAME, Constants.SPRING_SECURITY_PASSWORD_PLAIN)
|
||||
.timeout(HTTP_TIMEOUT)
|
||||
.execute();
|
||||
}
|
||||
});
|
||||
} catch (Exception e) {
|
||||
logger.warn("sync operation metrics submit failure");
|
||||
}
|
||||
}
|
||||
|
||||
public static void compactionStart(GlobalConfiguration configuration, FlinkJob flinkJob, TableMeta tableMeta) {
|
||||
logger.info("Enter method: compactionStart[configuration, flinkJob, tableMeta]. " + "configuration:" + configuration + "," + "flinkJob:" + flinkJob + "," + "tableMeta:" + tableMeta);
|
||||
try {
|
||||
Failsafe.with(RetryPolicyProvider.HTTP_RETRY)
|
||||
.run(() ->
|
||||
HttpUtil.createGet(
|
||||
StrUtil.format(
|
||||
"{}/api/compaction_start?flink_job_id={}&alias={}&type={}&cluster={}&application_id={}",
|
||||
LoadBalance.getCustomPublishUrl(configuration),
|
||||
flinkJob.getId(),
|
||||
tableMeta.getAlias(),
|
||||
EnumUtil.toString(tableMeta.getSourceType()),
|
||||
configuration.getCluster(),
|
||||
configuration.getApplicationId()
|
||||
)
|
||||
)
|
||||
.header(Constants.API_HEADER_NAME, Constants.API_VERSION)
|
||||
.basicAuth(Constants.SPRING_SECURITY_USERNAME, Constants.SPRING_SECURITY_PASSWORD_PLAIN)
|
||||
.timeout(HTTP_TIMEOUT)
|
||||
.execute()
|
||||
);
|
||||
} catch (Exception e) {
|
||||
logger.warn("compaction start metrics submit failure");
|
||||
}
|
||||
}
|
||||
|
||||
public static void compactionPreCommit(GlobalConfiguration configuration, FlinkJob flinkJob, TableMeta tableMeta, String instant, Map<String, Long> metadata) {
|
||||
logger.info("Enter method: compactionPreCommit[configuration, flinkJob, tableMeta, instant, metadata]. " + "configuration:" + configuration + "," + "flinkJob:" + flinkJob + "," + "tableMeta:" + tableMeta + "," + "instant:" + instant + "," + "metadata:" + metadata);
|
||||
try {
|
||||
Failsafe.with(RetryPolicyProvider.HTTP_RETRY)
|
||||
.run(() -> HttpUtil.createPost(
|
||||
StrUtil.format(
|
||||
"{}/api/compaction_pre_commit?flink_job_id={}&alias={}&instant={}&cluster={}&application_id={}",
|
||||
LoadBalance.getCustomPublishUrl(configuration),
|
||||
flinkJob.getId(),
|
||||
tableMeta.getAlias(),
|
||||
instant,
|
||||
configuration.getCluster(),
|
||||
configuration.getApplicationId()
|
||||
)
|
||||
)
|
||||
.header(Constants.API_HEADER_NAME, Constants.API_VERSION)
|
||||
.basicAuth(Constants.SPRING_SECURITY_USERNAME, Constants.SPRING_SECURITY_PASSWORD_PLAIN)
|
||||
.body(MAPPER.writeValueAsString(metadata))
|
||||
.timeout(HTTP_TIMEOUT)
|
||||
.execute()
|
||||
);
|
||||
} catch (Exception e) {
|
||||
logger.warn("compaction pre commit metrics submit failure");
|
||||
}
|
||||
}
|
||||
|
||||
public static void compactionCommit(GlobalConfiguration configuration, FlinkJob flinkJob, TableMeta tableMeta, String instant, HoodieCommitMetadata metadata) {
|
||||
logger.info("Enter method: compactionCommit[configuration, flinkJob, tableMeta, instant, metadata]. " + "configuration:" + configuration + "," + "flinkJob:" + flinkJob + "," + "tableMeta:" + tableMeta + "," + "instant:" + instant + "," + "metadata:" + metadata);
|
||||
try {
|
||||
Failsafe.with(RetryPolicyProvider.HTTP_RETRY)
|
||||
.run(() -> HttpUtil.createPost(
|
||||
StrUtil.format(
|
||||
"{}/api/compaction_commit?flink_job_id={}&alias={}&instant={}&cluster={}&application_id={}",
|
||||
LoadBalance.getCustomPublishUrl(configuration),
|
||||
flinkJob.getId(),
|
||||
tableMeta.getAlias(),
|
||||
instant,
|
||||
configuration.getCluster(),
|
||||
configuration.getApplicationId()
|
||||
)
|
||||
)
|
||||
.header(Constants.API_HEADER_NAME, Constants.API_VERSION)
|
||||
.basicAuth(Constants.SPRING_SECURITY_USERNAME, Constants.SPRING_SECURITY_PASSWORD_PLAIN)
|
||||
.body(MAPPER.writeValueAsString(metadata))
|
||||
.timeout(HTTP_TIMEOUT)
|
||||
.execute()
|
||||
);
|
||||
} catch (Exception e) {
|
||||
logger.warn("compaction commit metrics submit failure");
|
||||
}
|
||||
}
|
||||
|
||||
public static void compactionFinish(GlobalConfiguration configuration, FlinkJob flinkJob, TableMeta tableMeta, String message, Exception exception) {
|
||||
logger.info("Enter method: compactionFinish[configuration, flinkJob, tableMeta, message, exception]. " + "configuration:" + configuration + "," + "flinkJob:" + flinkJob + "," + "tableMeta:" + tableMeta + "," + "message:" + message + "," + "exception:" + exception);
|
||||
try {
|
||||
Failsafe.with(RetryPolicyProvider.HTTP_RETRY)
|
||||
.run(() -> {
|
||||
boolean success = (exception == null);
|
||||
HttpUtil.createPost(StrUtil.format(
|
||||
"{}/api/compaction_finish?flink_job_id={}&alias={}&time={}&state={}",
|
||||
LoadBalance.getCustomPublishUrl(configuration),
|
||||
flinkJob.getId(),
|
||||
tableMeta.getAlias(),
|
||||
Instant.now().toEpochMilli(),
|
||||
success
|
||||
))
|
||||
.header(Constants.API_HEADER_NAME, Constants.API_VERSION)
|
||||
.basicAuth(Constants.SPRING_SECURITY_USERNAME, Constants.SPRING_SECURITY_PASSWORD_PLAIN)
|
||||
.body(success ? message == null ? "" : message : exception.toString(), "text/plain")
|
||||
.timeout(HTTP_TIMEOUT)
|
||||
.execute()
|
||||
.close();
|
||||
});
|
||||
} catch (Exception e) {
|
||||
logger.warn("compaction finish metrics submit failure");
|
||||
}
|
||||
}
|
||||
|
||||
public static void versionUpdate(GlobalConfiguration configuration, FlinkJob flinkJob, TableMeta tableMeta, String version, String opts) {
|
||||
logger.info("Enter method: versionUpdate[configuration, flinkJob, tableMeta, version, opts]. " + "configuration:" + configuration + "," + "flinkJob:" + flinkJob + "," + "tableMeta:" + tableMeta + "," + "version:" + version + "," + "opts:" + opts);
|
||||
Failsafe.with(RetryPolicyProvider.HTTP_RETRY)
|
||||
.run(() ->
|
||||
HttpUtil.createGet(
|
||||
StrUtil.format(
|
||||
"{}/api/version_update?flink_job_id={}&alias={}&version={}&opts={}",
|
||||
LoadBalance.getCustomPublishUrl(configuration),
|
||||
flinkJob.getId(),
|
||||
tableMeta.getAlias(),
|
||||
version,
|
||||
opts
|
||||
)
|
||||
)
|
||||
.header(Constants.API_HEADER_NAME, Constants.API_VERSION)
|
||||
.basicAuth(Constants.SPRING_SECURITY_USERNAME, Constants.SPRING_SECURITY_PASSWORD_PLAIN)
|
||||
.timeout(HTTP_TIMEOUT)
|
||||
.execute()
|
||||
);
|
||||
}
|
||||
|
||||
public static void compactionLatestOpTs(GlobalConfiguration configuration, FlinkJob flinkJob, TableMeta tableMeta, Long latestOpTs) {
|
||||
logger.info("Enter method: compactionLatestOpTs[configuration, flinkJob, tableMeta, latestOpTs]. " + "configuration:" + configuration + "," + "flinkJob:" + flinkJob + "," + "tableMeta:" + tableMeta + "," + "latestOpTs:" + latestOpTs);
|
||||
Failsafe.with(RetryPolicyProvider.HTTP_RETRY)
|
||||
.run(() ->
|
||||
HttpUtil.createGet(
|
||||
StrUtil.format(
|
||||
"{}/api/compaction_latest_operation_time?flink_job_id={}&alias={}&latest_op_ts={}",
|
||||
LoadBalance.getCustomPublishUrl(configuration),
|
||||
flinkJob.getId(),
|
||||
tableMeta.getAlias(),
|
||||
latestOpTs
|
||||
)
|
||||
)
|
||||
.header(Constants.API_HEADER_NAME, Constants.API_VERSION)
|
||||
.basicAuth(Constants.SPRING_SECURITY_USERNAME, Constants.SPRING_SECURITY_PASSWORD_PLAIN)
|
||||
.timeout(HTTP_TIMEOUT)
|
||||
.execute()
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,257 @@
|
||||
package com.lanyuanxiaoyao.service.sync.utils;
|
||||
|
||||
import cn.hutool.core.collection.ListUtil;
|
||||
import cn.hutool.core.util.EnumUtil;
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import com.lanyuanxiaoyao.service.common.Constants;
|
||||
import com.lanyuanxiaoyao.service.common.entity.FlinkJob;
|
||||
import com.lanyuanxiaoyao.service.common.entity.Record;
|
||||
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
|
||||
import com.lanyuanxiaoyao.service.common.utils.TableMetaHelper;
|
||||
import com.lanyuanxiaoyao.service.sync.configuration.DefaultPartitionNameKeyGenerator;
|
||||
import com.lanyuanxiaoyao.service.sync.configuration.GlobalConfiguration;
|
||||
import com.lanyuanxiaoyao.service.sync.configuration.TraceOverwriteWithLatestAvroPayload;
|
||||
import com.lanyuanxiaoyao.service.sync.configuration.TraceWriteStatus;
|
||||
import com.lanyuanxiaoyao.service.sync.functions.OperationTypeFilter;
|
||||
import com.lanyuanxiaoyao.service.sync.functions.Record2RowDataFunction;
|
||||
import com.lanyuanxiaoyao.service.sync.functions.type.TypeConverter;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.flink.api.common.typeinfo.TypeHint;
|
||||
import org.apache.flink.api.common.typeinfo.TypeInformation;
|
||||
import org.apache.flink.configuration.Configuration;
|
||||
import org.apache.flink.streaming.api.datastream.DataStream;
|
||||
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
|
||||
import org.apache.flink.table.api.EnvironmentSettings;
|
||||
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
|
||||
import org.apache.flink.table.data.RowData;
|
||||
import org.apache.flink.table.types.logical.RowType;
|
||||
import org.apache.hudi.common.model.HoodieCleaningPolicy;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.model.HoodieTableType;
|
||||
import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
|
||||
import org.apache.hudi.common.table.view.FileSystemViewStorageType;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
import org.apache.hudi.config.HoodieCompactionConfig;
|
||||
import org.apache.hudi.config.HoodieStorageConfig;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.config.metrics.HoodieMetricsConfig;
|
||||
import org.apache.hudi.config.metrics.HoodieMetricsVictoriaConfig;
|
||||
import org.apache.hudi.configuration.FlinkOptions;
|
||||
import org.apache.hudi.configuration.OptionsResolver;
|
||||
import org.apache.hudi.index.HoodieIndex;
|
||||
import org.apache.hudi.metrics.MetricsReporterType;
|
||||
import org.apache.hudi.org.apache.avro.Schema;
|
||||
import org.apache.hudi.sink.utils.Pipelines;
|
||||
import org.apache.hudi.table.action.compact.strategy.UnBoundedCompactionStrategy;
|
||||
import org.apache.hudi.util.AvroSchemaConverter;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static com.lanyuanxiaoyao.service.common.Constants.HOUR;
|
||||
|
||||
/**
|
||||
* Flink 相关的工具
|
||||
*
|
||||
* @author lanyuanxiaoyao
|
||||
* @version 0.0.2
|
||||
* @date 2022-04-20
|
||||
*/
|
||||
@SuppressWarnings("UnusedAssignment")
|
||||
public class SyncUtils {
|
||||
private static final Logger logger = LoggerFactory.getLogger(SyncUtils.class);
|
||||
|
||||
private static final long K = 1024;
|
||||
private static final long M = 1024 * K;
|
||||
private static final long G = 1024 * M;
|
||||
|
||||
public static Schema avroSchemaWithExtraFields(TableMeta meta) {
|
||||
return TypeConverter.getInstance(meta).convertToSchema(meta);
|
||||
}
|
||||
|
||||
public static Configuration getSyncFlinkConfiguration(GlobalConfiguration globalConfiguration, Configuration inputConfiguration, FlinkJob flinkJob, TableMeta tableMeta, Schema schema, Integer defaultParallelism) {
|
||||
Configuration configuration = new Configuration();
|
||||
if (inputConfiguration != null) {
|
||||
configuration = inputConfiguration;
|
||||
}
|
||||
|
||||
configuration.setBoolean(HoodieMetricsConfig.TURN_METRICS_ON.key(), false);
|
||||
configuration.setString(HoodieMetricsConfig.METRICS_REPORTER_TYPE_VALUE.key(), MetricsReporterType.VICTORIA.name());
|
||||
configuration.setString(HoodieMetricsVictoriaConfig.VICTORIA_ENDPOINT.key(), globalConfiguration.getMetricPublishPrometheusUrl());
|
||||
configuration.setInteger(HoodieMetricsVictoriaConfig.VICTORIA_TIMEOUT.key(), 60000);
|
||||
configuration.setBoolean(HoodieMetricsVictoriaConfig.VICTORIA_BASIC_AUTH_ENABLE.key(), true);
|
||||
configuration.setString(HoodieMetricsVictoriaConfig.VICTORIA_BASIC_AUTH_USERNAME.key(), Constants.VICTORIA_USERNAME);
|
||||
configuration.setString(HoodieMetricsVictoriaConfig.VICTORIA_BASIC_AUTH_PASSWORD.key(), Constants.VICTORIA_PASSWORD);
|
||||
configuration.setString(HoodieMetricsVictoriaConfig.VICTORIA_TAGS.key(), ListUtil.toList(
|
||||
Pair.of(Constants.METRICS_LABEL_RUN_TYPE, Constants.METRICS_RUN_TYPE_SYNC),
|
||||
Pair.of(Constants.METRICS_LABEL_FLINK_JOB_ID, flinkJob.getId()),
|
||||
Pair.of(Constants.METRICS_LABEL_FLINK_JOB_NAME, flinkJob.getName().replaceAll("\\s", "_")),
|
||||
Pair.of(Constants.METRICS_LABEL_SCHEMA, tableMeta.getSchema()),
|
||||
Pair.of(Constants.METRICS_LABEL_TABLE, tableMeta.getTable()),
|
||||
Pair.of(Constants.METRICS_LABEL_ALIAS, tableMeta.getAlias())
|
||||
).stream().map(pair -> StrUtil.format("{}={}", pair.getLeft(), pair.getRight())).collect(Collectors.joining(";")));
|
||||
|
||||
return getFlinkConfiguration(configuration, tableMeta, schema, defaultParallelism);
|
||||
}
|
||||
|
||||
public static Configuration getCompactionFlinkConfiguration(GlobalConfiguration globalConfiguration, Configuration inputConfiguration, FlinkJob flinkJob, TableMeta tableMeta, Schema schema, Integer defaultParallelism) {
|
||||
Configuration configuration = new Configuration();
|
||||
if (inputConfiguration != null) {
|
||||
configuration = inputConfiguration;
|
||||
}
|
||||
|
||||
configuration.setBoolean(HoodieMetricsConfig.TURN_METRICS_ON.key(), false);
|
||||
configuration.setString(HoodieMetricsConfig.METRICS_REPORTER_TYPE_VALUE.key(), MetricsReporterType.VICTORIA.name());
|
||||
configuration.setString(HoodieMetricsVictoriaConfig.VICTORIA_ENDPOINT.key(), globalConfiguration.getMetricPublishPrometheusUrl());
|
||||
configuration.setInteger(HoodieMetricsVictoriaConfig.VICTORIA_TIMEOUT.key(), 60000);
|
||||
configuration.setBoolean(HoodieMetricsVictoriaConfig.VICTORIA_BASIC_AUTH_ENABLE.key(), true);
|
||||
configuration.setString(HoodieMetricsVictoriaConfig.VICTORIA_BASIC_AUTH_USERNAME.key(), Constants.VICTORIA_USERNAME);
|
||||
configuration.setString(HoodieMetricsVictoriaConfig.VICTORIA_BASIC_AUTH_PASSWORD.key(), Constants.VICTORIA_PASSWORD);
|
||||
configuration.setString(HoodieMetricsVictoriaConfig.VICTORIA_TAGS.key(), ListUtil.toList(
|
||||
Pair.of(Constants.METRICS_LABEL_RUN_TYPE, Constants.METRICS_RUN_TYPE_COMPACTION),
|
||||
Pair.of(Constants.METRICS_LABEL_FLINK_JOB_ID, flinkJob.getId()),
|
||||
Pair.of(Constants.METRICS_LABEL_FLINK_JOB_NAME, flinkJob.getName().replaceAll("\\s", "_")),
|
||||
Pair.of(Constants.METRICS_LABEL_SCHEMA, tableMeta.getSchema()),
|
||||
Pair.of(Constants.METRICS_LABEL_TABLE, tableMeta.getTable()),
|
||||
Pair.of(Constants.METRICS_LABEL_ALIAS, tableMeta.getAlias())
|
||||
).stream().map(pair -> StrUtil.format("{}={}", pair.getLeft(), pair.getRight())).collect(Collectors.joining(";")));
|
||||
|
||||
return getFlinkConfiguration(configuration, tableMeta, schema, defaultParallelism);
|
||||
}
|
||||
|
||||
public static Configuration getFlinkConfiguration(Configuration inputConfiguration, TableMeta tableMeta, Schema schema, Integer defaultParallelism) {
|
||||
Configuration configuration = new Configuration();
|
||||
if (inputConfiguration != null) {
|
||||
configuration = inputConfiguration;
|
||||
}
|
||||
String tableType = tableMeta.getHudi().getTargetTableType();
|
||||
logger.info("Hudi table type: {}", tableMeta.getHudi().getTargetTableType());
|
||||
// 基本信息
|
||||
configuration.setString(FlinkOptions.TABLE_NAME, tableMeta.getHudi().getTargetTable());
|
||||
configuration.setString(FlinkOptions.TABLE_TYPE, tableType);
|
||||
configuration.setString(FlinkOptions.PATH, tableMeta.getHudi().getTargetHdfsPath());
|
||||
configuration.setString(FlinkOptions.RECORD_KEY_FIELD, Constants.UNION_KEY_NAME);
|
||||
configuration.setBoolean(FlinkOptions.PRE_COMBINE, false);
|
||||
if (TableMetaHelper.existsTag(tableMeta, Constants.TAGS_PRE_COMBINE)) {
|
||||
configuration.setBoolean(FlinkOptions.PRE_COMBINE, true);
|
||||
}
|
||||
configuration.setString(FlinkOptions.PRECOMBINE_FIELD, Constants.UPDATE_TIMESTAMP_KEY_NAME);
|
||||
configuration.setString(FlinkOptions.SOURCE_AVRO_SCHEMA, schema.toString());
|
||||
|
||||
if (TableMetaHelper.existsTag(tableMeta, Constants.TAGS_NO_IGNORE_FAILED)) {
|
||||
configuration.setBoolean(FlinkOptions.IGNORE_FAILED, false);
|
||||
}
|
||||
|
||||
configuration.setString(FlinkOptions.PARTITION_DEFAULT_NAME, "default");
|
||||
configuration.setString(FlinkOptions.KEYGEN_CLASS_NAME, DefaultPartitionNameKeyGenerator.class.getName());
|
||||
|
||||
Optional<String> partitionPath = TableMetaHelper.getPartitionField(tableMeta);
|
||||
logger.info("Partition field: {}", partitionPath.orElse(""));
|
||||
if (partitionPath.isPresent()) {
|
||||
configuration.setString(FlinkOptions.PARTITION_PATH_FIELD, partitionPath.get());
|
||||
}
|
||||
|
||||
if (TableMetaHelper.existsTag(tableMeta, Constants.TAGS_TRACE_LATEST_OP_TS)) {
|
||||
logger.info("Enable trace latest op ts");
|
||||
configuration.setString(FlinkOptions.PAYLOAD_CLASS_NAME, TraceOverwriteWithLatestAvroPayload.class.getName());
|
||||
configuration.setString(HoodieWriteConfig.WRITE_STATUS_CLASS_NAME.key(), TraceWriteStatus.class.getName());
|
||||
}
|
||||
|
||||
configuration.setBoolean(FlinkOptions.METADATA_ENABLED, false);
|
||||
configuration.setInteger(HoodieStorageConfig.LOGFILE_DATA_BLOCK_MAX_SIZE.key(), Integer.MAX_VALUE);
|
||||
configuration.setString(FileSystemViewStorageConfig.SECONDARY_VIEW_TYPE.key(), FileSystemViewStorageType.SPILLABLE_DISK.name());
|
||||
|
||||
// Write
|
||||
configuration.setInteger(FlinkOptions.WRITE_TASKS, tableMeta.getHudi().getWriteTasks() == 0 ? defaultParallelism : tableMeta.getHudi().getWriteTasks());
|
||||
configuration.setInteger(FlinkOptions.WRITE_MERGE_MAX_MEMORY, 0);
|
||||
configuration.setDouble(FlinkOptions.WRITE_TASK_MAX_SIZE, tableMeta.getHudi().getWriteTaskMaxMemory() == 0 ? FlinkOptions.WRITE_TASK_MAX_SIZE.defaultValue() : tableMeta.getHudi().getWriteTaskMaxMemory());
|
||||
configuration.setDouble(FlinkOptions.WRITE_BATCH_SIZE, tableMeta.getHudi().getWriteBatchSize() == 0 ? FlinkOptions.WRITE_BATCH_SIZE.defaultValue() : tableMeta.getHudi().getWriteBatchSize());
|
||||
configuration.setLong(FlinkOptions.WRITE_RATE_LIMIT, tableMeta.getHudi().getWriteRateLimit());
|
||||
configuration.setLong(FlinkOptions.WRITE_COMMIT_ACK_TIMEOUT, HOUR);
|
||||
|
||||
// 索引
|
||||
configuration.setString(FlinkOptions.INDEX_TYPE, HoodieIndex.IndexType.BUCKET.name());
|
||||
configuration.setInteger(FlinkOptions.BUCKET_INDEX_NUM_BUCKETS, tableMeta.getHudi().getBucketIndexNumber() == 0 ? 50 : tableMeta.getHudi().getBucketIndexNumber());
|
||||
configuration.setString(FlinkOptions.INDEX_KEY_FIELD, Constants.UNION_KEY_NAME);
|
||||
|
||||
configuration.setBoolean(FlinkOptions.INDEX_BOOTSTRAP_ENABLED, false);
|
||||
configuration.setBoolean(FlinkOptions.INDEX_GLOBAL_ENABLED, false);
|
||||
configuration.setDouble(FlinkOptions.INDEX_STATE_TTL, -1);
|
||||
|
||||
// 增大 就 OOM
|
||||
// configuration.setDouble(HoodieMemoryConfig.MAX_DFS_STREAM_BUFFER_SIZE.key(), 64 * M);
|
||||
// 增大 就 OOM
|
||||
// configuration.setDouble(HoodieStorageConfig.LOGFILE_DATA_BLOCK_MAX_SIZE.key(), 128 * M);
|
||||
|
||||
// Compaction
|
||||
configuration.setBoolean(FlinkOptions.COMPACTION_ASYNC_ENABLED, false);
|
||||
if (EnumUtil.equals(HoodieTableType.COPY_ON_WRITE, tableType)) {
|
||||
configuration.setBoolean(FlinkOptions.COMPACTION_ASYNC_ENABLED, true);
|
||||
}
|
||||
configuration.setBoolean(FlinkOptions.COMPACTION_SCHEDULE_ENABLED, true);
|
||||
configuration.setInteger(FlinkOptions.COMPACTION_TASKS, tableMeta.getHudi().getCompactionTasks());
|
||||
configuration.setString(FlinkOptions.COMPACTION_TRIGGER_STRATEGY, StrUtil.isBlank(tableMeta.getHudi().getCompactionStrategy()) ? FlinkOptions.NUM_OR_TIME : tableMeta.getHudi().getCompactionStrategy());
|
||||
configuration.setInteger(FlinkOptions.COMPACTION_MAX_MEMORY, 1024);
|
||||
configuration.setInteger(FlinkOptions.COMPACTION_DELTA_SECONDS, tableMeta.getHudi().getCompactionDeltaSeconds() == 0 ? 15 * 60 : tableMeta.getHudi().getCompactionDeltaSeconds());
|
||||
configuration.setInteger(FlinkOptions.COMPACTION_DELTA_COMMITS, tableMeta.getHudi().getCompactionDeltaCommits() == 0 ? 5 : tableMeta.getHudi().getCompactionDeltaCommits());
|
||||
|
||||
configuration.setString(HoodieCompactionConfig.COMPACTION_STRATEGY.key(), UnBoundedCompactionStrategy.class.getName());
|
||||
// configuration.setString(HoodieCompactionConfig.COMPACTION_STRATEGY.key(), CombineAllCompactionStrategy.class.getName());
|
||||
// configuration.setBoolean(FlinkOptions.COMPACTION_SCHEDULE_ENABLED, true);
|
||||
// configuration.setInteger(FlinkOptions.COMPACTION_TASKS, tableMeta.getHudi().getCompactionTasks() == 0 ? defaultParallelism : tableMeta.getHudi().getCompactionTasks());
|
||||
// configuration.setInteger(FlinkOptions.COMPACTION_MAX_MEMORY, tableMeta.getHudi().getCompactionMaxMemory());
|
||||
// configuration.setString(FlinkOptions.COMPACTION_TRIGGER_STRATEGY, tableMeta.getHudi().getCompactionStrategy());
|
||||
// configuration.setInteger(FlinkOptions.COMPACTION_DELTA_COMMITS, tableMeta.getHudi().getCompactionDeltaCommits());
|
||||
// configuration.setInteger(FlinkOptions.COMPACTION_DELTA_SECONDS, tableMeta.getHudi().getCompactionDeltaSeconds());
|
||||
|
||||
// 时间线保留个数
|
||||
configuration.setInteger(FlinkOptions.CLEAN_RETAIN_COMMITS, tableMeta.getHudi().getKeepCommitVersion());
|
||||
// 时间线归档最小保留个数,要比上一个参数大
|
||||
configuration.setInteger(FlinkOptions.ARCHIVE_MIN_COMMITS, tableMeta.getHudi().getKeepCommitVersion() + 50);
|
||||
// 时间线归档最大保留个数,要比上一个参数大
|
||||
configuration.setInteger(FlinkOptions.ARCHIVE_MAX_COMMITS, tableMeta.getHudi().getKeepCommitVersion() + 100);
|
||||
// log文件和data文件保留版本数
|
||||
configuration.setString(FlinkOptions.CLEAN_POLICY, HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS.name());
|
||||
configuration.setInteger(FlinkOptions.CLEAN_RETAIN_FILE_VERSIONS, tableMeta.getHudi().getKeepFileVersion());
|
||||
|
||||
// 关闭一个内置的 http 服务
|
||||
// configuration.setBoolean(HoodieWriteConfig.EMBEDDED_TIMELINE_SERVER_ENABLE.key(), false);
|
||||
|
||||
return configuration;
|
||||
}
|
||||
|
||||
public static void sinkToHoodieByTable(GlobalConfiguration globalConfiguration, FlinkJob flinkJob, TableMeta tableMeta, StreamExecutionEnvironment environment, DataStream<Record> inputDataStream) {
|
||||
Schema schema = avroSchemaWithExtraFields(tableMeta);
|
||||
DataStream<RowData> dataStream = inputDataStream
|
||||
.filter(new OperationTypeFilter(globalConfiguration, flinkJob, tableMeta))
|
||||
.name("Count operation type")
|
||||
.map(new Record2RowDataFunction(globalConfiguration, flinkJob, tableMeta), TypeInformation.of(new TypeHint<List<RowData>>() {
|
||||
}))
|
||||
.name("Covert Row ( " + tableMeta.getSchema() + "-" + tableMeta.getTable() + " )")
|
||||
.flatMap((list, collector) -> list.forEach(collector::collect), TypeInformation.of(RowData.class))
|
||||
.filter(Objects::nonNull)
|
||||
.name("Filter not null");
|
||||
|
||||
RowType rowType = (RowType) AvroSchemaConverter.convertToDataType(schema).getLogicalType();
|
||||
StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(
|
||||
environment,
|
||||
EnvironmentSettings.newInstance()
|
||||
.inStreamingMode()
|
||||
.useBlinkPlanner()
|
||||
.build()
|
||||
);
|
||||
Configuration configuration = tableEnvironment.getConfig().getConfiguration();
|
||||
int parallelism = configuration.getInteger("parallelism", 1);
|
||||
configuration = getSyncFlinkConfiguration(globalConfiguration, configuration, flinkJob, tableMeta, schema, parallelism);
|
||||
|
||||
DataStream<HoodieRecord> hoodieRecordDataStream = Pipelines.bootstrap(configuration, rowType, parallelism, dataStream);
|
||||
DataStream<Object> pipeline = Pipelines.hoodieStreamWrite(configuration, parallelism, hoodieRecordDataStream);
|
||||
if (OptionsResolver.needsAsyncCompaction(configuration)) {
|
||||
Pipelines.compact(configuration, pipeline);
|
||||
} else {
|
||||
Pipelines.clean(configuration, pipeline);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,98 @@
|
||||
package com.lanyuanxiaoyao.service.sync.utils;
|
||||
|
||||
import cn.hutool.core.util.ObjectUtil;
|
||||
import com.lanyuanxiaoyao.service.common.Constants;
|
||||
import com.lanyuanxiaoyao.service.common.entity.FlinkJob;
|
||||
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
|
||||
import com.lanyuanxiaoyao.service.common.utils.NameHelper;
|
||||
import org.apache.flink.shaded.curator4.org.apache.curator.framework.CuratorFramework;
|
||||
import org.apache.flink.shaded.curator4.org.apache.curator.framework.CuratorFrameworkFactory;
|
||||
import org.apache.flink.shaded.curator4.org.apache.curator.framework.imps.CuratorFrameworkState;
|
||||
import org.apache.flink.shaded.curator4.org.apache.curator.retry.ExponentialBackoffRetry;
|
||||
import org.apache.flink.shaded.curator4.org.apache.curator.utils.CloseableUtils;
|
||||
import org.apache.flink.shaded.zookeeper3.org.apache.zookeeper.CreateMode;
|
||||
import org.apache.flink.shaded.zookeeper3.org.apache.zookeeper.KeeperException;
|
||||
import org.apache.flink.shaded.zookeeper3.org.apache.zookeeper.client.ZooKeeperSaslClient;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* Zk 操作
|
||||
*
|
||||
* @author ZhangJiacheng
|
||||
* @date 2023-05-10
|
||||
*/
|
||||
public class ZkUtils {
|
||||
private static final Logger logger = LoggerFactory.getLogger(ZkUtils.class);
|
||||
private static CuratorFramework client;
|
||||
|
||||
private static void createClient(String url) {
|
||||
System.setProperty(ZooKeeperSaslClient.ENABLE_CLIENT_SASL_KEY, "false");
|
||||
if (ObjectUtil.isNull(client)) {
|
||||
client = CuratorFrameworkFactory.builder()
|
||||
.connectString(url)
|
||||
.retryPolicy(new ExponentialBackoffRetry((int) (5 * Constants.SECOND), 5))
|
||||
.sessionTimeoutMs((int) (10 * Constants.SECOND))
|
||||
.connectionTimeoutMs((int) (10 * Constants.SECOND))
|
||||
.build();
|
||||
}
|
||||
if (!CuratorFrameworkState.STARTED.equals(client.getState())) {
|
||||
client.start();
|
||||
}
|
||||
}
|
||||
|
||||
public static void closeClient() {
|
||||
if (ObjectUtil.isNotNull(client)) {
|
||||
CloseableUtils.closeQuietly(client);
|
||||
}
|
||||
}
|
||||
|
||||
public static void createSynchronizerLock(FlinkJob job, String zookeeperUrl, String runMeta) {
|
||||
createLock(zookeeperUrl, runMeta, NameHelper.syncRunningLockPath(job.getId()));
|
||||
}
|
||||
|
||||
public static void createSynchronizerLock(FlinkJob job, TableMeta meta, String zookeeperUrl, String runMeta) {
|
||||
createLock(zookeeperUrl, runMeta, NameHelper.syncRunningLockPath(job.getId(), meta.getAlias()));
|
||||
}
|
||||
|
||||
public static void releaseSynchronizerLock(FlinkJob job, TableMeta meta) {
|
||||
releaseLock(NameHelper.syncRunningLockPath(job.getId(), meta.getAlias()));
|
||||
}
|
||||
|
||||
public static void createCompactionLock(FlinkJob job, TableMeta meta, String zookeeperUrl, String runMeta) {
|
||||
createLock(zookeeperUrl, runMeta, NameHelper.compactionRunningLockPath(job.getId(), meta.getAlias()));
|
||||
}
|
||||
|
||||
public static void releaseCompactionLock(FlinkJob job, TableMeta meta) {
|
||||
releaseLock(NameHelper.compactionRunningLockPath(job.getId(), meta.getAlias()));
|
||||
}
|
||||
|
||||
private static void createLock(String zookeeperUrl, String runMeta, String lockPath) {
|
||||
try {
|
||||
createClient(zookeeperUrl);
|
||||
client.create()
|
||||
.creatingParentsIfNeeded()
|
||||
.withMode(CreateMode.EPHEMERAL)
|
||||
.forPath(lockPath, runMeta.getBytes());
|
||||
} catch (KeeperException.NodeExistsException e) {
|
||||
logger.error("Lock exists for " + lockPath, e);
|
||||
throw new RuntimeException(e);
|
||||
} catch (Exception e) {
|
||||
logger.error("Unknown error", e);
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private static void releaseLock(String lockPath) {
|
||||
try {
|
||||
if (ObjectUtil.isNotNull(client)) {
|
||||
if (ObjectUtil.isNotNull(client.checkExists().forPath(lockPath))) {
|
||||
client.delete().forPath(lockPath);
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
logger.error("Unknown error", e);
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
87
utils/sync/src/main/resources/logback.xml
Normal file
87
utils/sync/src/main/resources/logback.xml
Normal file
@@ -0,0 +1,87 @@
|
||||
<configuration>
|
||||
<appender name="Loki" class="pl.tkowalcz.tjahzi.logback.LokiAppender">
|
||||
<filter class="ch.qos.logback.classic.filter.ThresholdFilter">
|
||||
<level>INFO</level>
|
||||
</filter>
|
||||
<url>${loki_push_url:- }</url>
|
||||
<encoder>
|
||||
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} %p [${HOSTNAME}] [%t] %logger #@# %m%n%ex{full}</pattern>
|
||||
</encoder>
|
||||
<label>
|
||||
<name>app</name>
|
||||
<value>hudi-${run_type:- }</value>
|
||||
</label>
|
||||
<label>
|
||||
<name>host</name>
|
||||
<value>${HOSTNAME}</value>
|
||||
</label>
|
||||
<label>
|
||||
<name>run_type</name>
|
||||
<value>${run_type:- }</value>
|
||||
</label>
|
||||
<label>
|
||||
<name>flink_job_id</name>
|
||||
<value>${flink_job_id:- }</value>
|
||||
</label>
|
||||
<label>
|
||||
<name>flink_job_name</name>
|
||||
<value>${flink_job_name:- }</value>
|
||||
</label>
|
||||
<label>
|
||||
<name>schema</name>
|
||||
<value>${schema:- }</value>
|
||||
</label>
|
||||
<label>
|
||||
<name>table</name>
|
||||
<value>${table:- }</value>
|
||||
</label>
|
||||
<label>
|
||||
<name>batch_id</name>
|
||||
<value>${batch_id:- }</value>
|
||||
</label>
|
||||
<label>
|
||||
<name>alias</name>
|
||||
<value>${alias:- }</value>
|
||||
</label>
|
||||
<label>
|
||||
<name>app_id</name>
|
||||
<value>${_APP_ID:- }</value>
|
||||
</label>
|
||||
<label>
|
||||
<name>container_id</name>
|
||||
<value>${CONTAINER_ID:- }</value>
|
||||
</label>
|
||||
<logLevelLabel>level</logLevelLabel>
|
||||
</appender>
|
||||
|
||||
<appender name="File" class="ch.qos.logback.core.FileAppender">
|
||||
<file>run.log</file>
|
||||
<append>false</append>
|
||||
<encoder>
|
||||
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} %p [${HOSTNAME}] [%t] %logger #@# %m%n%ex{full}</pattern>
|
||||
</encoder>
|
||||
</appender>
|
||||
|
||||
<appender name="Console" class="ch.qos.logback.core.ConsoleAppender">
|
||||
<encoder>
|
||||
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} %p [${HOSTNAME}] [%t] %logger #@# %m%n%ex{full}</pattern>
|
||||
</encoder>
|
||||
</appender>
|
||||
|
||||
<root level="INFO">
|
||||
<appender-ref ref="Loki"/>
|
||||
<appender-ref ref="File"/>
|
||||
<appender-ref ref="Console"/>
|
||||
</root>
|
||||
|
||||
<logger name="org.apache.hadoop.conf.Configuration" level="ERROR"/>
|
||||
<logger name="org.apache.hadoop.util.NativeCodeLoader" level="ERROR"/>
|
||||
<logger name="org.apache.hadoop.hdfs.shortcircuit.DomainSocketFactory" level="ERROR"/>
|
||||
<logger name="akka" level="ERROR"/>
|
||||
<logger name="org.apache.flink.runtime" level="ERROR"/>
|
||||
<logger name="org.apache.flink.runtime.taskexecutor.TaskExecutor" level="WARN"/>
|
||||
<logger name="org.apache.flink.core.plugin.PluginConfig" level="ERROR"/>
|
||||
|
||||
<logger name="org.apache.hudi" level="INFO"/>
|
||||
<logger name="com.eshore.odcp.hudi.connector.sync" level="INFO"/>
|
||||
</configuration>
|
||||
Reference in New Issue
Block a user