feat(executor-manager): 增加表数据查询10条采样任务

This commit is contained in:
v-zhangjc9
2024-06-03 16:17:41 +08:00
parent 8aba2475be
commit 6f2fce4359
6 changed files with 121 additions and 4 deletions

View File

@@ -0,0 +1,57 @@
package com.lanyuanxiaoyao.service.executor.task;
import cn.hutool.core.util.StrUtil;
import com.lanyuanxiaoyao.service.common.Constants;
import com.lanyuanxiaoyao.service.executor.core.TaskContext;
import com.lanyuanxiaoyao.service.executor.task.helper.ArgumentsHelper;
import com.lanyuanxiaoyao.service.executor.task.helper.HdfsHelper;
import org.apache.flink.types.Row;
import org.apache.flink.util.CloseableIterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.configuration.FlinkOptions;
import org.apache.hudi.org.apache.avro.Schema;
import org.eclipse.collections.api.factory.Lists;
import org.eclipse.collections.api.list.ImmutableList;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static com.lanyuanxiaoyao.service.executor.task.SQLExecutor.executeBatch;
import static com.lanyuanxiaoyao.service.executor.task.SQLExecutor.generateResult;
/**
* 表采样
*
* @author lanyuanxiaoyao
*/
public class TableSampling {
private static final Logger logger = LoggerFactory.getLogger(TableSampling.class);
public static void main(String[] args) throws Exception {
TaskContext taskContext = ArgumentsHelper.getContext(args);
logger.info("Context: {}", taskContext);
ArgumentsHelper.checkMetadata(taskContext, "hdfs");
String hdfs = (String) taskContext.getMetadata().get("hdfs");
Configuration configuration = new Configuration();
configuration.setStrings(FlinkOptions.PATH.key(), hdfs);
configuration.setInt(FlinkOptions.READ_TASKS.key(), 50);
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder()
.setConf(configuration)
.setBasePath(hdfs)
.build();
HoodieTableConfig tableConfig = metaClient.getTableConfig();
Schema schema = tableConfig.getTableCreateSchema().orElseThrow(() -> new Exception("Cannot parse schema from " + hdfs));
ImmutableList<String> fields = Lists.immutable.ofAll(schema.getFields()).collect(Schema.Field::name);
try (CloseableIterator<Row> iterator = executeBatch(
metaClient,
tableName -> StrUtil.format("select {} from `{}` order by {} desc limit 10", fields.makeString(", "), tableName, Constants.LATEST_OPERATION_TIMESTAMP_KEY_NAME)
)) {
HdfsHelper.createResult(FileSystem.get(metaClient.getHadoopConf()), taskContext, generateResult(iterator, fields));
}
}
}