feat(executor-manager): 增加表数据查询10条采样任务
This commit is contained in:
@@ -0,0 +1,57 @@
|
||||
package com.lanyuanxiaoyao.service.executor.task;
|
||||
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import com.lanyuanxiaoyao.service.common.Constants;
|
||||
import com.lanyuanxiaoyao.service.executor.core.TaskContext;
|
||||
import com.lanyuanxiaoyao.service.executor.task.helper.ArgumentsHelper;
|
||||
import com.lanyuanxiaoyao.service.executor.task.helper.HdfsHelper;
|
||||
import org.apache.flink.types.Row;
|
||||
import org.apache.flink.util.CloseableIterator;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hudi.common.table.HoodieTableConfig;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.configuration.FlinkOptions;
|
||||
import org.apache.hudi.org.apache.avro.Schema;
|
||||
import org.eclipse.collections.api.factory.Lists;
|
||||
import org.eclipse.collections.api.list.ImmutableList;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static com.lanyuanxiaoyao.service.executor.task.SQLExecutor.executeBatch;
|
||||
import static com.lanyuanxiaoyao.service.executor.task.SQLExecutor.generateResult;
|
||||
|
||||
/**
|
||||
* 表采样
|
||||
*
|
||||
* @author lanyuanxiaoyao
|
||||
*/
|
||||
public class TableSampling {
|
||||
private static final Logger logger = LoggerFactory.getLogger(TableSampling.class);
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
TaskContext taskContext = ArgumentsHelper.getContext(args);
|
||||
logger.info("Context: {}", taskContext);
|
||||
|
||||
ArgumentsHelper.checkMetadata(taskContext, "hdfs");
|
||||
String hdfs = (String) taskContext.getMetadata().get("hdfs");
|
||||
|
||||
Configuration configuration = new Configuration();
|
||||
configuration.setStrings(FlinkOptions.PATH.key(), hdfs);
|
||||
configuration.setInt(FlinkOptions.READ_TASKS.key(), 50);
|
||||
|
||||
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder()
|
||||
.setConf(configuration)
|
||||
.setBasePath(hdfs)
|
||||
.build();
|
||||
HoodieTableConfig tableConfig = metaClient.getTableConfig();
|
||||
Schema schema = tableConfig.getTableCreateSchema().orElseThrow(() -> new Exception("Cannot parse schema from " + hdfs));
|
||||
ImmutableList<String> fields = Lists.immutable.ofAll(schema.getFields()).collect(Schema.Field::name);
|
||||
try (CloseableIterator<Row> iterator = executeBatch(
|
||||
metaClient,
|
||||
tableName -> StrUtil.format("select {} from `{}` order by {} desc limit 10", fields.makeString(", "), tableName, Constants.LATEST_OPERATION_TIMESTAMP_KEY_NAME)
|
||||
)) {
|
||||
HdfsHelper.createResult(FileSystem.get(metaClient.getHadoopConf()), taskContext, generateResult(iterator, fields));
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user