feat(executor-task): parquet文件读取增加选择列
指定列名可以提高检索速度,默认选择所有列
This commit is contained in:
@@ -36,9 +36,10 @@ public class ExecutorTaskController {
|
||||
@RequestParam(value = "scan_queue", defaultValue = "false") Boolean scanQueue,
|
||||
@RequestParam(value = "scan_log", defaultValue = "false") Boolean scanLog,
|
||||
@RequestParam(value = "scan_base", defaultValue = "false") Boolean scanBase,
|
||||
@RequestParam(value = "scan_target", defaultValue = "false") Boolean scanTarget
|
||||
@RequestParam(value = "scan_target", defaultValue = "false") Boolean scanTarget,
|
||||
@RequestParam(value = "filter_fields", required = false) String filterFields
|
||||
) throws Exception {
|
||||
logger.info("Enter method: scan[key, hdfs, pulsar, pulsarTopic, scanSource, scanQueue, scanLog, scanBase, scanTarget]. " + "key:" + key + "," + "hdfs:" + hdfs + "," + "pulsar:" + pulsar + "," + "pulsarTopic:" + pulsarTopic + "," + "scanSource:" + scanSource + "," + "scanQueue:" + scanQueue + "," + "scanLog:" + scanLog + "," + "scanBase:" + scanBase + "," + "scanTarget:" + scanTarget);
|
||||
logger.info("Enter method: scan[key, hdfs, pulsar, pulsarTopic, scanSource, scanQueue, scanLog, scanBase, scanTarget, filter_fields]. " + "key:" + key + "," + "hdfs:" + hdfs + "," + "pulsar:" + pulsar + "," + "pulsarTopic:" + pulsarTopic + "," + "scanSource:" + scanSource + "," + "scanQueue:" + scanQueue + "," + "scanLog:" + scanLog + "," + "scanBase:" + scanBase + "," + "scanTarget:" + scanTarget + "," + "filter_fields:" + filterFields);
|
||||
if (!scanSource && !scanQueue && !scanLog && !scanBase && !scanTarget) {
|
||||
throw new RuntimeException("Must choose one mode");
|
||||
}
|
||||
@@ -48,7 +49,7 @@ public class ExecutorTaskController {
|
||||
if ((scanLog || scanBase) && StrUtil.isBlank(hdfs)) {
|
||||
throw new RuntimeException("Hdfs path cannot be empty");
|
||||
}
|
||||
return executorTaskService.scanAvro(key, hdfs, pulsar, pulsarTopic, scanSource, scanQueue, scanLog, scanBase, scanTarget);
|
||||
return executorTaskService.scanAvro(key, hdfs, pulsar, pulsarTopic, scanSource, scanQueue, scanLog, scanBase, scanTarget, filterFields);
|
||||
}
|
||||
|
||||
@GetMapping("latest_op_ts")
|
||||
|
||||
@@ -21,7 +21,18 @@ import java.util.Optional;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import org.apache.flink.client.cli.ClientOptions;
|
||||
import org.apache.flink.configuration.*;
|
||||
import org.apache.flink.configuration.AkkaOptions;
|
||||
import org.apache.flink.configuration.Configuration;
|
||||
import org.apache.flink.configuration.CoreOptions;
|
||||
import org.apache.flink.configuration.DeploymentOptions;
|
||||
import org.apache.flink.configuration.HeartbeatManagerOptions;
|
||||
import org.apache.flink.configuration.JobManagerOptions;
|
||||
import org.apache.flink.configuration.MemorySize;
|
||||
import org.apache.flink.configuration.PipelineOptions;
|
||||
import org.apache.flink.configuration.ResourceManagerOptions;
|
||||
import org.apache.flink.configuration.RestOptions;
|
||||
import org.apache.flink.configuration.SecurityOptions;
|
||||
import org.apache.flink.configuration.TaskManagerOptions;
|
||||
import org.apache.flink.yarn.configuration.YarnConfigOptions;
|
||||
import org.apache.flink.yarn.configuration.YarnDeploymentTarget;
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
@@ -141,10 +152,23 @@ public class ExecutorTaskService {
|
||||
Boolean scanQueue,
|
||||
Boolean scanLog,
|
||||
Boolean scanBase,
|
||||
Boolean scanTarget
|
||||
Boolean scanTarget,
|
||||
String filterFields
|
||||
) throws Exception {
|
||||
String taskId = taskId();
|
||||
Configuration configuration = generateConfiguration(taskId, "scan " + key);
|
||||
|
||||
MutableList<String> types = Lists.mutable.empty();
|
||||
if (scanSource)
|
||||
types.add("source");
|
||||
if (scanQueue)
|
||||
types.add("queue");
|
||||
if (scanLog)
|
||||
types.add("log");
|
||||
if (scanBase)
|
||||
types.add("base");
|
||||
if (scanTarget)
|
||||
types.add("target");
|
||||
Configuration configuration = generateConfiguration(taskId, StrUtil.format("scan {} {}", types.makeString(","), key));
|
||||
MapBuilder<String, Object> builder = MapUtil.builder();
|
||||
|
||||
setEnvironment(configuration, "key", key);
|
||||
@@ -164,6 +188,10 @@ public class ExecutorTaskService {
|
||||
builder.put("pulsar", pulsar);
|
||||
builder.put("pulsar_topic", pulsarTopic);
|
||||
}
|
||||
|
||||
if (StrUtil.isNotBlank(filterFields)) {
|
||||
builder.put("filter_fields", filterFields);
|
||||
}
|
||||
ApplicationId applicationId = Runner.run(
|
||||
configuration,
|
||||
"com.lanyuanxiaoyao.service.executor.task.DataScanner",
|
||||
|
||||
Reference in New Issue
Block a user