feat(executor): 完成任务提交执行的验证
This commit is contained in:
@@ -1,4 +1,4 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
mvn -pl service-dependencies,service-configuration,service-forest,service-executor,service-executor/service-executor-core clean deploy -D skipTests -P local -s ~/.m2/settings-development.xml
|
mvn -pl service-dependencies,service-configuration,service-forest,service-executor,service-executor/service-executor-core clean deploy -D skipTests -P local -s ~/.m2/settings-development.xml
|
||||||
mvn -pl service-executor/service-executor-manager clean package -D skipTests -s ~/.m2/settings-development.xml -P b2e1
|
mvn -pl service-executor/service-executor-manager clean package -D skipTests -s ~/.m2/settings-development.xml -P b2b12
|
||||||
ytp-transfer2 /Users/lanyuanxiaoyao/Project/IdeaProjects/hudi-service/service-executor/service-executor-manager/target/service-executor-manager-1.0.0-SNAPSHOT.jar
|
ytp-transfer2 /Users/lanyuanxiaoyao/Project/IdeaProjects/hudi-service/service-executor/service-executor-manager/target/service-executor-manager-1.0.0-SNAPSHOT.jar
|
||||||
1
pom.xml
1
pom.xml
@@ -39,6 +39,7 @@
|
|||||||
<scala.major.version>2.11</scala.major.version>
|
<scala.major.version>2.11</scala.major.version>
|
||||||
<hudi.version>0.12.0-eshore-SNAPSHOT</hudi.version>
|
<hudi.version>0.12.0-eshore-SNAPSHOT</hudi.version>
|
||||||
<flink.version>1.13.3</flink.version>
|
<flink.version>1.13.3</flink.version>
|
||||||
|
<hadoop.version>1.13.3</hadoop.version>
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
<profiles>
|
<profiles>
|
||||||
|
|||||||
@@ -14,6 +14,11 @@ deploy:
|
|||||||
app-hdfs-path: hdfs://b2/apps/datalake/jars/app-b12
|
app-hdfs-path: hdfs://b2/apps/datalake/jars/app-b12
|
||||||
archive-hdfs-path: hdfs://b2/apps/datalake/flink/completed-jobs-hudi
|
archive-hdfs-path: hdfs://b2/apps/datalake/flink/completed-jobs-hudi
|
||||||
victoria-push-url: http://132.126.207.125:35710/api/v1/import/prometheus
|
victoria-push-url: http://132.126.207.125:35710/api/v1/import/prometheus
|
||||||
|
executor:
|
||||||
|
staging-path: hdfs://b2/apps/datalake/yarn
|
||||||
|
archive-hdfs-path: hdfs://b2/apps/flink/completed-jobs/
|
||||||
|
task-jar-path: hdfs://b2/apps/datalake/jars/service/service-executor-task-1.0.0-SNAPSHOT.jar
|
||||||
|
task-result-path: hdfs://b2/apps/datalake/task-results
|
||||||
security:
|
security:
|
||||||
authority: ENC(GXKnbq1LS11U2HaONspvH+D/TkIx13aWTaokdkzaF7HSvq6Z0Rv1+JUWFnYopVXu)
|
authority: ENC(GXKnbq1LS11U2HaONspvH+D/TkIx13aWTaokdkzaF7HSvq6Z0Rv1+JUWFnYopVXu)
|
||||||
username: ENC(moIO5mO39V1Z+RDwROK9JXY4GfM8ZjDgM6Si7wRZ1MPVjbhTpmLz3lz28rAiw7c2LeCmizfJzHkEXIwGlB280g==)
|
username: ENC(moIO5mO39V1Z+RDwROK9JXY4GfM8ZjDgM6Si7wRZ1MPVjbhTpmLz3lz28rAiw7c2LeCmizfJzHkEXIwGlB280g==)
|
||||||
|
|||||||
@@ -14,6 +14,11 @@ deploy:
|
|||||||
app-hdfs-path: hdfs://b2/apps/datalake/jars/app
|
app-hdfs-path: hdfs://b2/apps/datalake/jars/app
|
||||||
archive-hdfs-path: hdfs://b2/apps/datalake/flink/completed-jobs-hudi
|
archive-hdfs-path: hdfs://b2/apps/datalake/flink/completed-jobs-hudi
|
||||||
victoria-push-url: http://132.122.116.142:35710/api/v1/import/prometheus
|
victoria-push-url: http://132.122.116.142:35710/api/v1/import/prometheus
|
||||||
|
executor:
|
||||||
|
staging-path: hdfs://b2/apps/datalake/yarn
|
||||||
|
archive-hdfs-path: hdfs://b2/apps/flink/completed-jobs/
|
||||||
|
task-jar-path: hdfs://b2/apps/datalake/jars/service/service-executor-task-1.0.0-SNAPSHOT.jar
|
||||||
|
task-result-path: hdfs://b2/apps/datalake/task-results
|
||||||
security:
|
security:
|
||||||
authority: ENC(GXKnbq1LS11U2HaONspvH+D/TkIx13aWTaokdkzaF7HSvq6Z0Rv1+JUWFnYopVXu)
|
authority: ENC(GXKnbq1LS11U2HaONspvH+D/TkIx13aWTaokdkzaF7HSvq6Z0Rv1+JUWFnYopVXu)
|
||||||
username: ENC(moIO5mO39V1Z+RDwROK9JXY4GfM8ZjDgM6Si7wRZ1MPVjbhTpmLz3lz28rAiw7c2LeCmizfJzHkEXIwGlB280g==)
|
username: ENC(moIO5mO39V1Z+RDwROK9JXY4GfM8ZjDgM6Si7wRZ1MPVjbhTpmLz3lz28rAiw7c2LeCmizfJzHkEXIwGlB280g==)
|
||||||
|
|||||||
@@ -14,6 +14,11 @@ deploy:
|
|||||||
app-hdfs-path: jfs://ns1/apps/datalake/hudi/jars
|
app-hdfs-path: jfs://ns1/apps/datalake/hudi/jars
|
||||||
archive-hdfs-path: jfs://ns1/apps/datalake/hudi/archive
|
archive-hdfs-path: jfs://ns1/apps/datalake/hudi/archive
|
||||||
victoria-push-url: http://132.121.126.84:35710/api/v1/import/prometheus
|
victoria-push-url: http://132.121.126.84:35710/api/v1/import/prometheus
|
||||||
|
executor:
|
||||||
|
staging-path: hdfs://b2/apps/datalake/yarn
|
||||||
|
archive-hdfs-path: hdfs://b2/apps/flink/completed-jobs/
|
||||||
|
task-jar-path: hdfs://b2/apps/datalake/jars/service/service-executor-task-1.0.0-SNAPSHOT.jar
|
||||||
|
task-result-path: hdfs://b2/apps/datalake/task-results
|
||||||
security:
|
security:
|
||||||
authority: ENC(GXKnbq1LS11U2HaONspvH+D/TkIx13aWTaokdkzaF7HSvq6Z0Rv1+JUWFnYopVXu)
|
authority: ENC(GXKnbq1LS11U2HaONspvH+D/TkIx13aWTaokdkzaF7HSvq6Z0Rv1+JUWFnYopVXu)
|
||||||
username: ENC(moIO5mO39V1Z+RDwROK9JXY4GfM8ZjDgM6Si7wRZ1MPVjbhTpmLz3lz28rAiw7c2LeCmizfJzHkEXIwGlB280g==)
|
username: ENC(moIO5mO39V1Z+RDwROK9JXY4GfM8ZjDgM6Si7wRZ1MPVjbhTpmLz3lz28rAiw7c2LeCmizfJzHkEXIwGlB280g==)
|
||||||
|
|||||||
@@ -135,3 +135,11 @@ deploy:
|
|||||||
- name: service-executor-manager
|
- name: service-executor-manager
|
||||||
source-jar: service-executor-manager-1.0.0-SNAPSHOT.jar
|
source-jar: service-executor-manager-1.0.0-SNAPSHOT.jar
|
||||||
replicas: 1
|
replicas: 1
|
||||||
|
environments:
|
||||||
|
connector_hadoop_kerberos-principal: ${deploy.runtime.user}/$\{hostname}.hdp.dc@ECLD.COM
|
||||||
|
connector_hadoop_kerberos-keytab-path: ${deploy.runtime.kerberos-keytab-path}
|
||||||
|
arguments:
|
||||||
|
executor_staging-directory: ${deploy.runtime.executor.staging-path}
|
||||||
|
executor_history-server-archive-dir: ${deploy.runtime.executor.archive-hdfs-path}
|
||||||
|
executor_task-jar-path: ${deploy.runtime.executor.task-jar-path}
|
||||||
|
executor_task-result-path: ${deploy.runtime.executor.task-result-path}
|
||||||
|
|||||||
@@ -7,6 +7,10 @@ package com.lanyuanxiaoyao.service.executor.core;
|
|||||||
* @date 2023-12-04
|
* @date 2023-12-04
|
||||||
*/
|
*/
|
||||||
public interface TaskConstants {
|
public interface TaskConstants {
|
||||||
|
String TASK_ID = "task-id";
|
||||||
|
String TASK_ID_OPTION = "-" + TASK_ID;
|
||||||
|
String TASK_RESULT_PATH = "task-result-path";
|
||||||
|
String TASK_RESULT_PATH_OPTION = "-" + TASK_RESULT_PATH;
|
||||||
String TASK_CONTEXT = "task-context";
|
String TASK_CONTEXT = "task-context";
|
||||||
String TASK_CONTEXT_OPTION = "-" + TASK_CONTEXT;
|
String TASK_CONTEXT_OPTION = "-" + TASK_CONTEXT;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package com.lanyuanxiaoyao.service.executor.core;
|
package com.lanyuanxiaoyao.service.executor.core;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -10,15 +11,31 @@ import java.util.Map;
|
|||||||
* @date 2023-12-04
|
* @date 2023-12-04
|
||||||
*/
|
*/
|
||||||
public class TaskContext implements Serializable {
|
public class TaskContext implements Serializable {
|
||||||
|
private String taskId;
|
||||||
|
private String resultPath;
|
||||||
private Map<String, Object> metadata;
|
private Map<String, Object> metadata;
|
||||||
|
|
||||||
public TaskContext() {
|
public TaskContext() {
|
||||||
}
|
}
|
||||||
|
|
||||||
public TaskContext(Map<String, Object> metadata) {
|
public TaskContext(String taskId, String resultPath) {
|
||||||
|
this(taskId, resultPath, new HashMap<>());
|
||||||
|
}
|
||||||
|
|
||||||
|
public TaskContext(String taskId, String resultPath, Map<String, Object> metadata) {
|
||||||
|
this.taskId = taskId;
|
||||||
|
this.resultPath = resultPath;
|
||||||
this.metadata = metadata;
|
this.metadata = metadata;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getTaskId() {
|
||||||
|
return taskId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getResultPath() {
|
||||||
|
return resultPath;
|
||||||
|
}
|
||||||
|
|
||||||
public Map<String, Object> getMetadata() {
|
public Map<String, Object> getMetadata() {
|
||||||
return metadata;
|
return metadata;
|
||||||
}
|
}
|
||||||
@@ -26,7 +43,9 @@ public class TaskContext implements Serializable {
|
|||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "TaskContext{" +
|
return "TaskContext{" +
|
||||||
"metadata=" + metadata +
|
"taskId='" + taskId + '\'' +
|
||||||
'}';
|
", resultPath='" + resultPath + '\'' +
|
||||||
|
", metadata=" + metadata +
|
||||||
|
'}';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,12 @@
|
|||||||
package com.lanyuanxiaoyao.service.executor.manager;
|
package com.lanyuanxiaoyao.service.executor.manager;
|
||||||
|
|
||||||
|
import cn.hutool.core.util.IdUtil;
|
||||||
import com.eshore.odcp.hudi.connector.utils.executor.Runner;
|
import com.eshore.odcp.hudi.connector.utils.executor.Runner;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import com.lanyuanxiaoyao.service.executor.core.TaskConstants;
|
import com.lanyuanxiaoyao.service.executor.core.TaskConstants;
|
||||||
|
import com.lanyuanxiaoyao.service.executor.core.TaskContext;
|
||||||
|
import com.lanyuanxiaoyao.service.executor.manager.configuration.ExecutorConfiguration;
|
||||||
|
import com.lanyuanxiaoyao.service.executor.manager.configuration.HadoopConfiguration;
|
||||||
import com.ulisesbocchio.jasyptspringboot.annotation.EnableEncryptableProperties;
|
import com.ulisesbocchio.jasyptspringboot.annotation.EnableEncryptableProperties;
|
||||||
import java.time.Duration;
|
import java.time.Duration;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
@@ -11,6 +16,7 @@ import org.apache.flink.yarn.configuration.YarnConfigOptions;
|
|||||||
import org.apache.flink.yarn.configuration.YarnDeploymentTarget;
|
import org.apache.flink.yarn.configuration.YarnDeploymentTarget;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
|
import org.eclipse.collections.api.factory.Maps;
|
||||||
import org.springframework.boot.ApplicationArguments;
|
import org.springframework.boot.ApplicationArguments;
|
||||||
import org.springframework.boot.ApplicationRunner;
|
import org.springframework.boot.ApplicationRunner;
|
||||||
import org.springframework.boot.SpringApplication;
|
import org.springframework.boot.SpringApplication;
|
||||||
@@ -19,6 +25,7 @@ import org.springframework.boot.autoconfigure.gson.GsonAutoConfiguration;
|
|||||||
import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration;
|
import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration;
|
||||||
import org.springframework.boot.context.properties.EnableConfigurationProperties;
|
import org.springframework.boot.context.properties.EnableConfigurationProperties;
|
||||||
import org.springframework.cloud.client.discovery.EnableDiscoveryClient;
|
import org.springframework.cloud.client.discovery.EnableDiscoveryClient;
|
||||||
|
import org.springframework.http.converter.json.Jackson2ObjectMapperBuilder;
|
||||||
import org.springframework.retry.annotation.EnableRetry;
|
import org.springframework.retry.annotation.EnableRetry;
|
||||||
|
|
||||||
import static com.eshore.odcp.hudi.connector.Constants.HALF_HOUR;
|
import static com.eshore.odcp.hudi.connector.Constants.HALF_HOUR;
|
||||||
@@ -40,12 +47,24 @@ import static com.eshore.odcp.hudi.connector.Constants.MINUTE;
|
|||||||
@EnableEncryptableProperties
|
@EnableEncryptableProperties
|
||||||
@EnableRetry
|
@EnableRetry
|
||||||
public class ExecutorManagerApplication implements ApplicationRunner {
|
public class ExecutorManagerApplication implements ApplicationRunner {
|
||||||
|
private final HadoopConfiguration hadoopConfiguration;
|
||||||
|
private final ExecutorConfiguration executorConfiguration;
|
||||||
|
private final ObjectMapper mapper;
|
||||||
|
|
||||||
|
public ExecutorManagerApplication(HadoopConfiguration hadoopConfiguration, ExecutorConfiguration executorConfiguration, Jackson2ObjectMapperBuilder builder) {
|
||||||
|
this.hadoopConfiguration = hadoopConfiguration;
|
||||||
|
this.executorConfiguration = executorConfiguration;
|
||||||
|
this.mapper = builder.build();
|
||||||
|
}
|
||||||
|
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
SpringApplication.run(ExecutorManagerApplication.class, args);
|
SpringApplication.run(ExecutorManagerApplication.class, args);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void run(ApplicationArguments args) throws Exception {
|
public void run(ApplicationArguments args) throws Exception {
|
||||||
|
String taskId = IdUtil.nanoId(8);
|
||||||
|
|
||||||
Configuration configuration = new Configuration();
|
Configuration configuration = new Configuration();
|
||||||
configuration.setBoolean(JobManagerOptions.JVM_DIRECT_MEMORY_LIMIT_ENABLED, true);
|
configuration.setBoolean(JobManagerOptions.JVM_DIRECT_MEMORY_LIMIT_ENABLED, true);
|
||||||
configuration.setString(AkkaOptions.ASK_TIMEOUT, "10 min");
|
configuration.setString(AkkaOptions.ASK_TIMEOUT, "10 min");
|
||||||
@@ -54,15 +73,15 @@ public class ExecutorManagerApplication implements ApplicationRunner {
|
|||||||
configuration.set(ClientOptions.CLIENT_TIMEOUT, Duration.ofMinutes(30));
|
configuration.set(ClientOptions.CLIENT_TIMEOUT, Duration.ofMinutes(30));
|
||||||
// Kerberos认证
|
// Kerberos认证
|
||||||
configuration.setBoolean(SecurityOptions.KERBEROS_LOGIN_USETICKETCACHE, true);
|
configuration.setBoolean(SecurityOptions.KERBEROS_LOGIN_USETICKETCACHE, true);
|
||||||
configuration.setString(SecurityOptions.KERBEROS_LOGIN_KEYTAB, "/etc/security/keytabs/datalake.app.keytab");
|
configuration.setString(SecurityOptions.KERBEROS_LOGIN_KEYTAB, hadoopConfiguration.getKerberosKeytabPath());
|
||||||
configuration.setString(SecurityOptions.KERBEROS_LOGIN_PRINCIPAL, "datalake/b5s119.hdp.dc@ECLD.COM");
|
configuration.setString(SecurityOptions.KERBEROS_LOGIN_PRINCIPAL, hadoopConfiguration.getKerberosPrincipal());
|
||||||
configuration.setLong(HeartbeatManagerOptions.HEARTBEAT_INTERVAL, MINUTE);
|
configuration.setLong(HeartbeatManagerOptions.HEARTBEAT_INTERVAL, MINUTE);
|
||||||
configuration.setLong(HeartbeatManagerOptions.HEARTBEAT_TIMEOUT, HALF_HOUR);
|
configuration.setLong(HeartbeatManagerOptions.HEARTBEAT_TIMEOUT, HALF_HOUR);
|
||||||
configuration.setString(AkkaOptions.ASK_TIMEOUT, "1 min");
|
configuration.setString(AkkaOptions.ASK_TIMEOUT, "1 min");
|
||||||
configuration.setString(AkkaOptions.TCP_TIMEOUT, "2 min");
|
configuration.setString(AkkaOptions.TCP_TIMEOUT, "2 min");
|
||||||
configuration.setBoolean(CoreOptions.CHECK_LEAKED_CLASSLOADER, false);
|
configuration.setBoolean(CoreOptions.CHECK_LEAKED_CLASSLOADER, false);
|
||||||
configuration.setString(YarnConfigOptions.APPLICATION_ATTEMPTS, "4");
|
configuration.setString(YarnConfigOptions.APPLICATION_ATTEMPTS, "4");
|
||||||
configuration.setString(YarnConfigOptions.STAGING_DIRECTORY, "hdfs://b2/apps/datalake/yarn");
|
configuration.setString(YarnConfigOptions.STAGING_DIRECTORY, executorConfiguration.getStagingDirectory());
|
||||||
configuration.setString(ResourceManagerOptions.CONTAINERIZED_MASTER_ENV_PREFIX + "MALLOC_ARENA_MAX", "1");
|
configuration.setString(ResourceManagerOptions.CONTAINERIZED_MASTER_ENV_PREFIX + "MALLOC_ARENA_MAX", "1");
|
||||||
configuration.setString(ResourceManagerOptions.CONTAINERIZED_TASK_MANAGER_ENV_PREFIX + "MALLOC_ARENA_MAX", "1");
|
configuration.setString(ResourceManagerOptions.CONTAINERIZED_TASK_MANAGER_ENV_PREFIX + "MALLOC_ARENA_MAX", "1");
|
||||||
configuration.setInteger(RestOptions.PORT, 8081);
|
configuration.setInteger(RestOptions.PORT, 8081);
|
||||||
@@ -74,22 +93,28 @@ public class ExecutorManagerApplication implements ApplicationRunner {
|
|||||||
configuration.set(TaskManagerOptions.MANAGED_MEMORY_SIZE, MemorySize.parse("0m"));
|
configuration.set(TaskManagerOptions.MANAGED_MEMORY_SIZE, MemorySize.parse("0m"));
|
||||||
configuration.set(TaskManagerOptions.JVM_METASPACE, MemorySize.parse("128m"));
|
configuration.set(TaskManagerOptions.JVM_METASPACE, MemorySize.parse("128m"));
|
||||||
configuration.setInteger(TaskManagerOptions.NUM_TASK_SLOTS, 10);
|
configuration.setInteger(TaskManagerOptions.NUM_TASK_SLOTS, 10);
|
||||||
configuration.setString(YarnConfigOptions.APPLICATION_NAME, "HudiService_faee2e95-660d-4b1c-9cec-13473b3cd5b7");
|
configuration.setString(YarnConfigOptions.APPLICATION_NAME, "Service_Task_" + taskId);
|
||||||
configuration.setString(HistoryServerOptions.HISTORY_SERVER_ARCHIVE_DIRS, "hdfs://b2/apps/flink/completed-jobs/");
|
configuration.setString(HistoryServerOptions.HISTORY_SERVER_ARCHIVE_DIRS, executorConfiguration.getHistoryServerArchiveDir());
|
||||||
configuration.setLong(HistoryServerOptions.HISTORY_SERVER_ARCHIVE_REFRESH_INTERVAL, 10000);
|
configuration.setLong(HistoryServerOptions.HISTORY_SERVER_ARCHIVE_REFRESH_INTERVAL, 10000);
|
||||||
|
|
||||||
configuration.setBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLED, true);
|
configuration.setBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLED, true);
|
||||||
configuration.setString(YarnConfiguration.LOG_AGGREGATION_REMOTE_APP_LOG_DIR_FMT, "/app-logs");
|
configuration.setString(YarnConfiguration.LOG_AGGREGATION_REMOTE_APP_LOG_DIR_FMT, "/app-logs");
|
||||||
|
|
||||||
configuration.set(PipelineOptions.JARS, new ArrayList<String>() {{
|
configuration.set(PipelineOptions.JARS, new ArrayList<String>() {{
|
||||||
add("hdfs://b2/apps/datalake/jars/service/service-executor-task.jar");
|
add(executorConfiguration.getTaskJarPath());
|
||||||
}});
|
}});
|
||||||
ApplicationId applicationId = Runner.run(
|
ApplicationId applicationId = Runner.run(
|
||||||
configuration,
|
configuration,
|
||||||
"com.lanyuanxiaoyao.service.executor.task.Hello",
|
"com.lanyuanxiaoyao.service.executor.task.AvroScanner",
|
||||||
new String[]{
|
new String[]{
|
||||||
TaskConstants.TASK_CONTEXT_OPTION,
|
TaskConstants.TASK_CONTEXT_OPTION,
|
||||||
"{\"metadata\": {\"id\":\"faee2e95-660d-4b1c-9cec-13473b3cd5b7\"}}"
|
mapper.writeValueAsString(
|
||||||
|
new TaskContext(
|
||||||
|
taskId,
|
||||||
|
executorConfiguration.getTaskResultPath(),
|
||||||
|
Maps.mutable.of("key", "123456", "hdfs", "hdfs://b2/apps/datalake/hive/dws_test/external_table_hudi/dws_ord_prod_inst_attr")
|
||||||
|
)
|
||||||
|
)
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
System.out.println(applicationId);
|
System.out.println(applicationId);
|
||||||
|
|||||||
@@ -0,0 +1,66 @@
|
|||||||
|
package com.lanyuanxiaoyao.service.executor.manager.configuration;
|
||||||
|
|
||||||
|
import javax.annotation.PostConstruct;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* yarn 配置
|
||||||
|
*
|
||||||
|
* @author ZhangJiacheng
|
||||||
|
* @date 2022-03-30
|
||||||
|
*/
|
||||||
|
@ConfigurationProperties("executor")
|
||||||
|
@Component
|
||||||
|
public class ExecutorConfiguration {
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(ExecutorConfiguration.class);
|
||||||
|
|
||||||
|
private String stagingDirectory;
|
||||||
|
private String historyServerArchiveDir;
|
||||||
|
private String taskJarPath;
|
||||||
|
private String taskResultPath;
|
||||||
|
|
||||||
|
public String getStagingDirectory() {
|
||||||
|
return stagingDirectory;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setStagingDirectory(String stagingDirectory) {
|
||||||
|
this.stagingDirectory = stagingDirectory;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getHistoryServerArchiveDir() {
|
||||||
|
return historyServerArchiveDir;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setHistoryServerArchiveDir(String historyServerArchiveDir) {
|
||||||
|
this.historyServerArchiveDir = historyServerArchiveDir;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTaskJarPath() {
|
||||||
|
return taskJarPath;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTaskJarPath(String taskJarPath) {
|
||||||
|
this.taskJarPath = taskJarPath;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTaskResultPath() {
|
||||||
|
return taskResultPath;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTaskResultPath(String taskResultPath) {
|
||||||
|
this.taskResultPath = taskResultPath;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "ExecutorConfiguration{" +
|
||||||
|
"stagingDirectory='" + stagingDirectory + '\'' +
|
||||||
|
", historyServerArchiveDir='" + historyServerArchiveDir + '\'' +
|
||||||
|
", taskJarPath='" + taskJarPath + '\'' +
|
||||||
|
", taskResultPath='" + taskResultPath + '\'' +
|
||||||
|
'}';
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,51 @@
|
|||||||
|
package com.lanyuanxiaoyao.service.executor.manager.configuration;
|
||||||
|
|
||||||
|
import javax.annotation.PostConstruct;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* yarn 配置
|
||||||
|
*
|
||||||
|
* @author ZhangJiacheng
|
||||||
|
* @date 2022-03-30
|
||||||
|
*/
|
||||||
|
@ConfigurationProperties("connector.hadoop")
|
||||||
|
@Component
|
||||||
|
public class HadoopConfiguration {
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(HadoopConfiguration.class);
|
||||||
|
|
||||||
|
private String kerberosPrincipal;
|
||||||
|
private String kerberosKeytabPath;
|
||||||
|
|
||||||
|
@PostConstruct
|
||||||
|
private void init() {
|
||||||
|
logger.info("Configuration initial: {}", this);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getKerberosPrincipal() {
|
||||||
|
return kerberosPrincipal;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setKerberosPrincipal(String kerberosPrincipal) {
|
||||||
|
this.kerberosPrincipal = kerberosPrincipal;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getKerberosKeytabPath() {
|
||||||
|
return kerberosKeytabPath;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setKerberosKeytabPath(String kerberosKeytabPath) {
|
||||||
|
this.kerberosKeytabPath = kerberosKeytabPath;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "HadoopConfiguration{" +
|
||||||
|
"kerberosPrincipal='" + kerberosPrincipal + '\'' +
|
||||||
|
", kerberosKeytabPath='" + kerberosKeytabPath + '\'' +
|
||||||
|
'}';
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -2,4 +2,9 @@ spring:
|
|||||||
application:
|
application:
|
||||||
name: service-executor-manager
|
name: service-executor-manager
|
||||||
profiles:
|
profiles:
|
||||||
include: random-port,common,discovery,metrics
|
include: random-port,common,discovery,metrics
|
||||||
|
executor:
|
||||||
|
staging-directory: hdfs://b2/apps/datalake/yarn
|
||||||
|
history-server-archive-dir: hdfs://b2/apps/flink/completed-jobs/
|
||||||
|
task-jar-path: hdfs://b2/apps/datalake/jars/service/service-executor-task-1.0.0-SNAPSHOT.jar
|
||||||
|
task-result-path: hdfs://b2/apps/datalake/task-results
|
||||||
@@ -65,6 +65,17 @@
|
|||||||
<version>30.1.1-jre-15.0</version>
|
<version>30.1.1-jre-15.0</version>
|
||||||
<scope>provided</scope>
|
<scope>provided</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-client</artifactId>
|
||||||
|
<version>3.1.2</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.eclipse.collections</groupId>
|
||||||
|
<artifactId>eclipse-collections-api</artifactId>
|
||||||
|
<version>10.4.0</version>
|
||||||
|
<scope>provided</scope>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
<build>
|
<build>
|
||||||
|
|||||||
@@ -0,0 +1,56 @@
|
|||||||
|
package com.lanyuanxiaoyao.service.executor.task;
|
||||||
|
|
||||||
|
import cn.hutool.core.util.StrUtil;
|
||||||
|
import com.lanyuanxiaoyao.service.executor.core.TaskContext;
|
||||||
|
import com.lanyuanxiaoyao.service.executor.task.entity.RecordView;
|
||||||
|
import com.lanyuanxiaoyao.service.executor.task.functions.ReadLogFile;
|
||||||
|
import com.lanyuanxiaoyao.service.executor.task.helper.ArgumentsHelper;
|
||||||
|
import com.lanyuanxiaoyao.service.executor.task.helper.FlinkHelper;
|
||||||
|
import java.util.Map;
|
||||||
|
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.eclipse.collections.api.factory.Lists;
|
||||||
|
import org.eclipse.collections.api.list.ImmutableList;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* avro 日志扫描
|
||||||
|
*
|
||||||
|
* @author lanyuanxiaoyao
|
||||||
|
* @date 2024-01-08
|
||||||
|
*/
|
||||||
|
public class AvroScanner {
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(AvroScanner.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
TaskContext taskContext = ArgumentsHelper.getContext(args);
|
||||||
|
logger.info("Context: {}", taskContext);
|
||||||
|
|
||||||
|
Map<String, Object> metadata = taskContext.getMetadata();
|
||||||
|
ArgumentsHelper.checkMetadata(taskContext, "hdfs");
|
||||||
|
String hdfs = (String) metadata.get("hdfs");
|
||||||
|
ArgumentsHelper.checkMetadata(taskContext, "key");
|
||||||
|
String key = (String) metadata.get("key");
|
||||||
|
|
||||||
|
Configuration configuration = new Configuration();
|
||||||
|
FileSystem fileSystem = FileSystem.get(configuration);
|
||||||
|
if (!fileSystem.exists(new Path(hdfs))) {
|
||||||
|
throw new RuntimeException(StrUtil.format("HDFS {} is not exists", hdfs));
|
||||||
|
}
|
||||||
|
|
||||||
|
ImmutableList<String> paths = Lists.immutable.of(fileSystem.listStatus(new Path(hdfs)))
|
||||||
|
.reject(status -> StrUtil.equals(".hoodie", status.getPath().getName()))
|
||||||
|
.collect(status -> status.getPath().toString());
|
||||||
|
|
||||||
|
StreamExecutionEnvironment environment = FlinkHelper.getBatchEnvironment();
|
||||||
|
environment.setParallelism(20);
|
||||||
|
FlinkHelper.getAllLogFilePaths(environment.fromCollection(paths.toList()))
|
||||||
|
.flatMap(new ReadLogFile())
|
||||||
|
.map(RecordView::toString)
|
||||||
|
.sinkTo(FlinkHelper.createFileSink(taskContext));
|
||||||
|
environment.execute(StrUtil.format("Search {} in {}", key, hdfs));
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,50 +0,0 @@
|
|||||||
package com.lanyuanxiaoyao.service.executor.task;
|
|
||||||
|
|
||||||
import com.lanyuanxiaoyao.service.executor.core.TaskContext;
|
|
||||||
import com.lanyuanxiaoyao.service.executor.task.helper.ArgumentsHelper;
|
|
||||||
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
|
|
||||||
import org.apache.flink.streaming.api.functions.sink.SinkFunction;
|
|
||||||
import org.apache.flink.streaming.api.functions.source.SourceFunction;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Hello world
|
|
||||||
*
|
|
||||||
* @author lanyuanxiaoyao
|
|
||||||
* @date 2023-12-04
|
|
||||||
*/
|
|
||||||
public class Hello {
|
|
||||||
private static final Logger logger = LoggerFactory.getLogger(Hello.class);
|
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
|
||||||
TaskContext taskContext = ArgumentsHelper.getContext(args);
|
|
||||||
logger.info("Context: {}", taskContext);
|
|
||||||
|
|
||||||
StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment();
|
|
||||||
|
|
||||||
environment
|
|
||||||
.addSource(new SourceFunction<Integer>() {
|
|
||||||
@Override
|
|
||||||
public void run(SourceContext<Integer> context) {
|
|
||||||
for (int index = 0; index < 10; index++) {
|
|
||||||
context.collect(index);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void cancel() {
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.map(value -> "Index: " + value)
|
|
||||||
.addSink(new SinkFunction<String>() {
|
|
||||||
@Override
|
|
||||||
public void invoke(String value, Context context) throws Exception {
|
|
||||||
logger.info("Value: {}", value);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
|
|
||||||
environment.execute("Service task: Hello");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -0,0 +1,65 @@
|
|||||||
|
package com.lanyuanxiaoyao.service.executor.task.entity;
|
||||||
|
|
||||||
|
import cn.hutool.core.util.StrUtil;
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 记录
|
||||||
|
*
|
||||||
|
* @author lanyuanxiaoyao
|
||||||
|
* @date 2024-01-09
|
||||||
|
*/
|
||||||
|
public class RecordView implements Serializable, Comparable<RecordView> {
|
||||||
|
private final Operation operation;
|
||||||
|
private final String data;
|
||||||
|
private final String timestamp;
|
||||||
|
private final String file;
|
||||||
|
private final Map<String, Object> attributes;
|
||||||
|
|
||||||
|
public RecordView(Operation operation, String data, String timestamp, String file) {
|
||||||
|
this.operation = operation;
|
||||||
|
this.data = data;
|
||||||
|
this.timestamp = timestamp;
|
||||||
|
this.file = file;
|
||||||
|
this.attributes = new HashMap<>();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Operation getOperation() {
|
||||||
|
return operation;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getData() {
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTimestamp() {
|
||||||
|
return timestamp;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getFile() {
|
||||||
|
return file;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<String, Object> getAttributes() {
|
||||||
|
return attributes;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return StrUtil.format("\n{} {} {}\n{}", operation, timestamp, file, data);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compareTo(RecordView o) {
|
||||||
|
if (o != null) {
|
||||||
|
return this.timestamp.compareTo(o.timestamp);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public enum Operation {
|
||||||
|
DELETE, UPSERT, ROLLBACK, RESULT, SOURCE
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,112 @@
|
|||||||
|
package com.lanyuanxiaoyao.service.executor.task.functions;
|
||||||
|
|
||||||
|
import cn.hutool.core.util.ObjectUtil;
|
||||||
|
import com.eshore.odcp.hudi.connector.Constants;
|
||||||
|
import com.lanyuanxiaoyao.service.executor.task.entity.RecordView;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import org.apache.flink.api.common.functions.FlatMapFunction;
|
||||||
|
import org.apache.flink.util.Collector;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hudi.common.model.HoodieLogFile;
|
||||||
|
import org.apache.hudi.common.model.HoodieRecord;
|
||||||
|
import org.apache.hudi.common.table.TableSchemaResolver;
|
||||||
|
import org.apache.hudi.common.table.log.HoodieLogFormat;
|
||||||
|
import org.apache.hudi.common.table.log.block.*;
|
||||||
|
import org.apache.hudi.common.util.ClosableIterator;
|
||||||
|
import org.apache.hudi.org.apache.avro.Schema;
|
||||||
|
import org.apache.hudi.org.apache.avro.generic.IndexedRecord;
|
||||||
|
import org.apache.hudi.org.apache.avro.util.Utf8;
|
||||||
|
import org.apache.parquet.avro.AvroSchemaConverter;
|
||||||
|
import org.apache.parquet.schema.MessageType;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 读取log文件
|
||||||
|
*
|
||||||
|
* @author lanyuanxiaoyao
|
||||||
|
* @date 2024-01-09
|
||||||
|
*/
|
||||||
|
public class ReadLogFile implements FlatMapFunction<String, RecordView> {
|
||||||
|
private RecordView parseData(String source, IndexedRecord record) {
|
||||||
|
Schema schema = record.getSchema();
|
||||||
|
StringBuilder builder = new StringBuilder();
|
||||||
|
for (Schema.Field field : schema.getFields()) {
|
||||||
|
builder.append(field.name())
|
||||||
|
.append("=")
|
||||||
|
.append(record.get(field.pos()))
|
||||||
|
.append(" ");
|
||||||
|
}
|
||||||
|
String timestamp = null;
|
||||||
|
Schema.Field commitTimeField = schema.getField(HoodieRecord.COMMIT_TIME_METADATA_FIELD);
|
||||||
|
if (ObjectUtil.isNotNull(commitTimeField)) {
|
||||||
|
timestamp = ((Utf8) record.get(commitTimeField.pos())).toString();
|
||||||
|
}
|
||||||
|
String latestOpTs = null;
|
||||||
|
Schema.Field latestOpTsField = schema.getField(Constants.LATEST_OPERATION_TIMESTAMP_KEY_NAME);
|
||||||
|
if (ObjectUtil.isNotNull(latestOpTsField)) {
|
||||||
|
latestOpTs = ((Utf8) record.get(latestOpTsField.pos())).toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
String data = builder.toString();
|
||||||
|
RecordView recordView = new RecordView(RecordView.Operation.UPSERT, data, timestamp, source);
|
||||||
|
recordView.getAttributes().put(Constants.LATEST_OPERATION_TIMESTAMP_KEY_NAME, latestOpTs);
|
||||||
|
|
||||||
|
return recordView;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void flatMap(String logFilePath, Collector<RecordView> out) throws IOException {
|
||||||
|
Configuration readerConfiguration = new Configuration();
|
||||||
|
FileSystem readerFilesystem = FileSystem.get(readerConfiguration);
|
||||||
|
MessageType messageType = TableSchemaResolver.readSchemaFromLogFile(readerFilesystem, new Path(logFilePath));
|
||||||
|
Schema schema = new AvroSchemaConverter().convert(Objects.requireNonNull(messageType));
|
||||||
|
try (HoodieLogFormat.Reader reader = HoodieLogFormat.newReader(readerFilesystem, new HoodieLogFile(new Path(logFilePath)), schema)) {
|
||||||
|
while (reader.hasNext()) {
|
||||||
|
HoodieLogBlock block = reader.next();
|
||||||
|
Map<HoodieLogBlock.HeaderMetadataType, String> logBlockHeader = block.getLogBlockHeader();
|
||||||
|
String instant = logBlockHeader.getOrDefault(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, null);
|
||||||
|
switch (block.getBlockType()) {
|
||||||
|
case AVRO_DATA_BLOCK:
|
||||||
|
HoodieAvroDataBlock avroDataBlock = (HoodieAvroDataBlock) block;
|
||||||
|
try (ClosableIterator<IndexedRecord> avroDataBlockRecordIterator = avroDataBlock.getRecordIterator()) {
|
||||||
|
while (avroDataBlockRecordIterator.hasNext()) {
|
||||||
|
RecordView recordView = parseData(logFilePath, avroDataBlockRecordIterator.next());
|
||||||
|
out.collect(recordView);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case PARQUET_DATA_BLOCK:
|
||||||
|
HoodieParquetDataBlock parquetDataBlock = (HoodieParquetDataBlock) block;
|
||||||
|
try (ClosableIterator<IndexedRecord> parquetDataBlockRecordIterator = parquetDataBlock.getRecordIterator()) {
|
||||||
|
while (parquetDataBlockRecordIterator.hasNext()) {
|
||||||
|
RecordView recordView = parseData(logFilePath, parquetDataBlockRecordIterator.next());
|
||||||
|
out.collect(recordView);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case CORRUPT_BLOCK:
|
||||||
|
break;
|
||||||
|
case DELETE_BLOCK:
|
||||||
|
HoodieDeleteBlock deleteBlock = (HoodieDeleteBlock) block;
|
||||||
|
String keys = Arrays.stream(deleteBlock.getRecordsToDelete())
|
||||||
|
.map(deleteRecord -> deleteRecord.getHoodieKey().toString())
|
||||||
|
.collect(Collectors.joining(" "));
|
||||||
|
out.collect(new RecordView(RecordView.Operation.DELETE, keys, instant, logFilePath));
|
||||||
|
break;
|
||||||
|
case COMMAND_BLOCK:
|
||||||
|
HoodieCommandBlock commandBlock = (HoodieCommandBlock) block;
|
||||||
|
Map<HoodieLogBlock.HeaderMetadataType, String> header = commandBlock.getLogBlockHeader();
|
||||||
|
out.collect(new RecordView(RecordView.Operation.ROLLBACK, header.get(HoodieLogBlock.HeaderMetadataType.TARGET_INSTANT_TIME), instant, logFilePath));
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,8 +1,10 @@
|
|||||||
package com.lanyuanxiaoyao.service.executor.task.helper;
|
package com.lanyuanxiaoyao.service.executor.task.helper;
|
||||||
|
|
||||||
|
import cn.hutool.core.util.ObjectUtil;
|
||||||
import cn.hutool.core.util.StrUtil;
|
import cn.hutool.core.util.StrUtil;
|
||||||
import com.lanyuanxiaoyao.service.executor.core.TaskConstants;
|
import com.lanyuanxiaoyao.service.executor.core.TaskConstants;
|
||||||
import com.lanyuanxiaoyao.service.executor.core.TaskContext;
|
import com.lanyuanxiaoyao.service.executor.core.TaskContext;
|
||||||
|
import java.util.Map;
|
||||||
import org.apache.flink.api.java.utils.ParameterTool;
|
import org.apache.flink.api.java.utils.ParameterTool;
|
||||||
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonProcessingException;
|
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonProcessingException;
|
||||||
|
|
||||||
@@ -20,4 +22,14 @@ public class ArgumentsHelper {
|
|||||||
}
|
}
|
||||||
return JacksonHelper.getMapper().readValue(argsTool.get(TaskConstants.TASK_CONTEXT), TaskContext.class);
|
return JacksonHelper.getMapper().readValue(argsTool.get(TaskConstants.TASK_CONTEXT), TaskContext.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static void checkMetadata(TaskContext context, String key) {
|
||||||
|
Map<String, Object> metadata = context.getMetadata();
|
||||||
|
if (ObjectUtil.isEmpty(metadata)) {
|
||||||
|
throw new RuntimeException("Metadata is empty");
|
||||||
|
}
|
||||||
|
if (!metadata.containsKey(key)) {
|
||||||
|
throw new RuntimeException(key + " argument is not found");
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,73 @@
|
|||||||
|
package com.lanyuanxiaoyao.service.executor.task.helper;
|
||||||
|
|
||||||
|
import com.lanyuanxiaoyao.service.executor.core.TaskContext;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import org.apache.flink.api.common.RuntimeExecutionMode;
|
||||||
|
import org.apache.flink.api.common.serialization.SimpleStringEncoder;
|
||||||
|
import org.apache.flink.api.java.functions.FlatMapIterator;
|
||||||
|
import org.apache.flink.connector.file.sink.FileSink;
|
||||||
|
import org.apache.flink.core.fs.Path;
|
||||||
|
import org.apache.flink.streaming.api.datastream.DataStream;
|
||||||
|
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
|
||||||
|
import org.apache.flink.streaming.api.functions.sink.filesystem.OutputFileConfig;
|
||||||
|
import org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.BasePathBucketAssigner;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hudi.common.fs.FSUtils;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author lanyuanxiaoyao
|
||||||
|
* @date 2024-01-08
|
||||||
|
*/
|
||||||
|
public class FlinkHelper {
|
||||||
|
public static StreamExecutionEnvironment getSteamEnvironment() {
|
||||||
|
return StreamExecutionEnvironment.getExecutionEnvironment();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static StreamExecutionEnvironment getBatchEnvironment() {
|
||||||
|
StreamExecutionEnvironment environment = getSteamEnvironment();
|
||||||
|
environment.setRuntimeMode(RuntimeExecutionMode.BATCH);
|
||||||
|
return environment;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static FileSink<String> createFileSink(TaskContext context) {
|
||||||
|
return createFileSink(context.getTaskId(), context.getResultPath());
|
||||||
|
}
|
||||||
|
|
||||||
|
public static FileSink<String> createFileSink(String taskId, String resultPath) {
|
||||||
|
return FileSink
|
||||||
|
.<String>forRowFormat(new Path(resultPath + "/" + taskId), new SimpleStringEncoder<>("UTF-8"))
|
||||||
|
.withBucketAssigner(new BasePathBucketAssigner<>())
|
||||||
|
.withOutputFileConfig(new OutputFileConfig("task", ""))
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static DataStream<String> getAllFilePaths(DataStream<String> source) {
|
||||||
|
return source
|
||||||
|
.map(path -> {
|
||||||
|
Configuration configuration = new Configuration();
|
||||||
|
FileSystem fileSystem = FileSystem.get(configuration);
|
||||||
|
FileStatus[] statuses = fileSystem.listStatus(new org.apache.hadoop.fs.Path(path));
|
||||||
|
String[] results = new String[statuses.length];
|
||||||
|
for (int index = 0; index < statuses.length; index++) {
|
||||||
|
results[index] = statuses[index].getPath().toString();
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
})
|
||||||
|
.name("Read files")
|
||||||
|
.flatMap(new FlatMapIterator<String[], String>() {
|
||||||
|
@Override
|
||||||
|
public Iterator<String> flatMap(String[] strings) {
|
||||||
|
return Arrays.asList(strings).iterator();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
public static DataStream<String> getAllLogFilePaths(DataStream<String> source) {
|
||||||
|
return getAllFilePaths(source)
|
||||||
|
.filter(FSUtils::isLogFile)
|
||||||
|
.name("Filter log files");
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user