feat(executor-task): 优化task的配置信息传递
This commit is contained in:
@@ -51,7 +51,6 @@ public class ToolCommand {
|
|||||||
logger.warn("{} not found", hdfs);
|
logger.warn("{} not found", hdfs);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
logger.info("Result:\n{}", results.makeString("\n"));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ShellMethod("降级hoodie.properties")
|
@ShellMethod("降级hoodie.properties")
|
||||||
@@ -75,6 +74,5 @@ public class ToolCommand {
|
|||||||
logger.warn("{} not found", hdfs);
|
logger.warn("{} not found", hdfs);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
logger.info("Result:\n{}", results.makeString("\n"));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
package com.lanyuanxiaoyao.service.executor.task;
|
package com.lanyuanxiaoyao.service.executor.task;
|
||||||
|
|
||||||
|
import cn.hutool.core.collection.IterUtil;
|
||||||
import cn.hutool.core.util.StrUtil;
|
import cn.hutool.core.util.StrUtil;
|
||||||
import de.vandermeer.asciitable.AsciiTable;
|
import de.vandermeer.asciitable.AsciiTable;
|
||||||
import de.vandermeer.asciitable.CWC_LongestLine;
|
import de.vandermeer.asciitable.CWC_LongestLine;
|
||||||
@@ -38,10 +39,6 @@ public class SQLExecutor {
|
|||||||
private static final Logger logger = LoggerFactory.getLogger(SQLExecutor.class);
|
private static final Logger logger = LoggerFactory.getLogger(SQLExecutor.class);
|
||||||
|
|
||||||
protected static CloseableIterator<Row> executeStream(HoodieTableMetaClient metaClient, Function<Table, TableResult> SQL) throws Exception {
|
protected static CloseableIterator<Row> executeStream(HoodieTableMetaClient metaClient, Function<Table, TableResult> SQL) throws Exception {
|
||||||
return executeStream(metaClient, 10, SQL);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected static CloseableIterator<Row> executeStream(HoodieTableMetaClient metaClient, Integer parallelism, Function<Table, TableResult> SQL) throws Exception {
|
|
||||||
HoodieTableConfig tableConfig = metaClient.getTableConfig();
|
HoodieTableConfig tableConfig = metaClient.getTableConfig();
|
||||||
Schema schema = new TableSchemaResolver(metaClient).getTableAvroSchema();
|
Schema schema = new TableSchemaResolver(metaClient).getTableAvroSchema();
|
||||||
|
|
||||||
@@ -55,18 +52,13 @@ public class SQLExecutor {
|
|||||||
tableConfig.getPartitionFields().ifPresent(builder::partition);
|
tableConfig.getPartitionFields().ifPresent(builder::partition);
|
||||||
tableConfig.getRecordKeyFields().ifPresent(builder::pk);
|
tableConfig.getRecordKeyFields().ifPresent(builder::pk);
|
||||||
builder.option("connector", "hudi");
|
builder.option("connector", "hudi");
|
||||||
builder.option(FlinkOptions.PATH, metaClient.getBasePathV2());
|
builder.options(IterUtil.toMap(metaClient.getHadoopConf()));
|
||||||
builder.option(FlinkOptions.READ_TASKS, parallelism);
|
|
||||||
|
|
||||||
Table table = tableEnvironment.fromDataStream(builder.source(executionEnvironment));
|
Table table = tableEnvironment.fromDataStream(builder.source(executionEnvironment));
|
||||||
return SQL.apply(table).collect();
|
return SQL.apply(table).collect();
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static CloseableIterator<Row> executeBatch(HoodieTableMetaClient metaClient, Function<String, String> SQL) throws Exception {
|
protected static CloseableIterator<Row> executeBatch(HoodieTableMetaClient metaClient, Function<String, String> SQL) throws Exception {
|
||||||
return executeBatch(metaClient, 10, SQL);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected static CloseableIterator<Row> executeBatch(HoodieTableMetaClient metaClient, Integer parallelism, Function<String, String> SQL) throws Exception {
|
|
||||||
HoodieTableConfig tableConfig = metaClient.getTableConfig();
|
HoodieTableConfig tableConfig = metaClient.getTableConfig();
|
||||||
Schema schema = new TableSchemaResolver(metaClient).getTableAvroSchema();
|
Schema schema = new TableSchemaResolver(metaClient).getTableAvroSchema();
|
||||||
|
|
||||||
@@ -85,12 +77,11 @@ public class SQLExecutor {
|
|||||||
}),
|
}),
|
||||||
tableConfig.getPartitionFields().map(Lists.immutable::of).orElse(Lists.immutable.empty()),
|
tableConfig.getPartitionFields().map(Lists.immutable::of).orElse(Lists.immutable.empty()),
|
||||||
tableConfig.getRecordKeyFields().map(Lists.immutable::of).orElse(Lists.immutable.empty()),
|
tableConfig.getRecordKeyFields().map(Lists.immutable::of).orElse(Lists.immutable.empty()),
|
||||||
Maps.immutable.of(
|
Maps.immutable.ofAll(IterUtil.toMap(metaClient.getHadoopConf()))
|
||||||
FlinkOptions.PATH.key(), metaClient.getBasePathV2().toString(),
|
|
||||||
FlinkOptions.READ_TASKS.key(), parallelism.toString()
|
|
||||||
)
|
|
||||||
));
|
));
|
||||||
return tableEnvironment.executeSql(SQL.apply(tableConfig.getTableName())).collect();
|
String sql = SQL.apply(tableConfig.getTableName());
|
||||||
|
logger.info("SQL: {}", sql);
|
||||||
|
return tableEnvironment.executeSql(sql).collect();
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static String generateResult(CloseableIterator<Row> iterator, ImmutableList<String> fields) {
|
protected static String generateResult(CloseableIterator<Row> iterator, ImmutableList<String> fields) {
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ import org.apache.flink.util.CloseableIterator;
|
|||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||||
|
import org.apache.hudi.configuration.FlinkOptions;
|
||||||
import org.eclipse.collections.api.factory.Lists;
|
import org.eclipse.collections.api.factory.Lists;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
@@ -30,15 +31,17 @@ public class TableSummary extends SQLExecutor {
|
|||||||
ArgumentsHelper.checkMetadata(taskContext, "hdfs");
|
ArgumentsHelper.checkMetadata(taskContext, "hdfs");
|
||||||
String hdfs = (String) taskContext.getMetadata().get("hdfs");
|
String hdfs = (String) taskContext.getMetadata().get("hdfs");
|
||||||
|
|
||||||
|
Configuration configuration = new Configuration();
|
||||||
|
configuration.setStrings(FlinkOptions.PATH.key(), hdfs);
|
||||||
|
configuration.setInt(FlinkOptions.READ_TASKS.key(), 50);
|
||||||
|
|
||||||
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder()
|
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder()
|
||||||
.setConf(new Configuration())
|
.setConf(configuration)
|
||||||
.setBasePath(hdfs)
|
.setBasePath(hdfs)
|
||||||
.build();
|
.build();
|
||||||
int parallelism = HdfsHelper.baseScanParallelismPredict(HdfsHelper.basePaths(HdfsHelper.hdfsPaths(metaClient.getRawFs(), hdfs)));
|
|
||||||
try (CloseableIterator<Row> iterator = executeBatch(
|
try (CloseableIterator<Row> iterator = executeBatch(
|
||||||
metaClient,
|
metaClient,
|
||||||
parallelism,
|
tableName -> StrUtil.format("select count({}) as `count`, max({}) as latest_op_ts from `{}`", Constants.UNION_KEY_NAME, Constants.LATEST_OPERATION_TIMESTAMP_KEY_NAME, tableName)
|
||||||
tableName -> StrUtil.format("select count({}) as `count`, max({}) as latest_op_ts from {}", Constants.UNION_KEY_NAME, Constants.LATEST_OPERATION_TIMESTAMP_KEY_NAME, tableName)
|
|
||||||
)) {
|
)) {
|
||||||
HdfsHelper.createResult(FileSystem.get(metaClient.getHadoopConf()), taskContext, generateResult(iterator, Lists.immutable.of("count", "latest_op_ts")));
|
HdfsHelper.createResult(FileSystem.get(metaClient.getHadoopConf()), taskContext, generateResult(iterator, Lists.immutable.of("count", "latest_op_ts")));
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user