refactor(executor-task): 优化pulsar扫描配置
This commit is contained in:
@@ -171,6 +171,6 @@ public class DataScanner {
|
||||
.sinkTo(FlinkHelper.createFileSink(taskContext))
|
||||
.setParallelism(10)
|
||||
.name("Output results");
|
||||
environment.execute();
|
||||
environment.execute(StrUtil.format("Search {}", key));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -82,6 +82,6 @@ public class RecordView implements Serializable, Comparable<RecordView> {
|
||||
}
|
||||
|
||||
public enum Operation {
|
||||
DELETE, UPSERT, ROLLBACK, RESULT, SOURCE
|
||||
DELETE, UPSERT, ROLLBACK, RESULT, SOURCE, QUEUE, TARGET
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,13 +9,15 @@ import java.time.ZoneId;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import org.apache.flink.api.common.typeinfo.TypeInformation;
|
||||
import org.apache.flink.api.connector.source.*;
|
||||
import org.apache.flink.api.java.typeutils.ResultTypeQueryable;
|
||||
import org.apache.flink.core.io.SimpleVersionedSerializer;
|
||||
import org.apache.pulsar.client.api.*;
|
||||
import org.eclipse.collections.api.factory.Lists;
|
||||
import org.eclipse.collections.api.list.MutableList;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@@ -25,7 +27,7 @@ import org.slf4j.LoggerFactory;
|
||||
*/
|
||||
public class ReadPulsarSource implements Source<RecordView, ReadPulsarSplit, Collection<ReadPulsarSplit>>, ResultTypeQueryable<RecordView>, Serializable {
|
||||
private static final Logger logger = LoggerFactory.getLogger(ReadPulsarSource.class);
|
||||
private static final Long TASK_GAP = 6 * 60 * 60 * 1000L;
|
||||
private static final Long TASK_GAP = TimeUnit.MINUTES.toMillis(30);
|
||||
private static final DateTimeFormatter FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSS")
|
||||
.withLocale(Locale.CHINA)
|
||||
.withZone(ZoneId.systemDefault());
|
||||
@@ -47,9 +49,7 @@ public class ReadPulsarSource implements Source<RecordView, ReadPulsarSplit, Col
|
||||
Message<byte[]> message = consumer.receive();
|
||||
long startTimestamp = message.getPublishTime();
|
||||
long endTimestamp = Instant.now().toEpochMilli();
|
||||
long gap = Math.max((endTimestamp - startTimestamp) / (parallelism - 1), 1000 * 60 * 60);
|
||||
logger.info("Gap: {}, Parallelism: {}", gap, parallelism);
|
||||
List<ReadPulsarSplit> tasks = new ArrayList<>();
|
||||
MutableList<ReadPulsarSplit> tasks = Lists.mutable.empty();
|
||||
while (startTimestamp < endTimestamp) {
|
||||
tasks.add(new ReadPulsarSplit(
|
||||
taskContext.getTaskId(),
|
||||
@@ -57,14 +57,15 @@ public class ReadPulsarSource implements Source<RecordView, ReadPulsarSplit, Col
|
||||
pulsarTopic,
|
||||
latestMessageId.toString(),
|
||||
startTimestamp,
|
||||
startTimestamp + gap
|
||||
Math.min(endTimestamp, startTimestamp + TASK_GAP)
|
||||
));
|
||||
startTimestamp += gap;
|
||||
startTimestamp += TASK_GAP;
|
||||
}
|
||||
splits = tasks;
|
||||
for (ReadPulsarSplit split : splits) {
|
||||
logger.info("Gap: {}, Parallelism: {}, Splits: {}", TASK_GAP, parallelism, tasks.size());
|
||||
for (ReadPulsarSplit split : tasks) {
|
||||
logger.info("Read split: {} -> {}", covertTimestamp(split.getStartTimestamp()), covertTimestamp(split.getEndTimestamp()));
|
||||
}
|
||||
splits = new ArrayList<>(tasks.shuffleThis());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,10 +35,10 @@ public class ReadPulsarSourceEnumerator implements SplitEnumerator<ReadPulsarSpl
|
||||
public void handleSplitRequest(int subtaskId, @Nullable String requesterHostname) {
|
||||
final ReadPulsarSplit split = readQueue.poll();
|
||||
if (ObjectUtil.isNotNull(split)) {
|
||||
logger.info("Assign split for {}, split: {}", subtaskId, split);
|
||||
logger.info("t{} Assign split for {}, Queue rest: {}", subtaskId, subtaskId, readQueue.size());
|
||||
context.assignSplit(split, subtaskId);
|
||||
} else {
|
||||
logger.info("No more split for {}", subtaskId);
|
||||
logger.info("t{} No more split for {}", subtaskId, subtaskId);
|
||||
context.signalNoMoreSplits(subtaskId);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,6 +14,7 @@ import java.util.Locale;
|
||||
import java.util.Queue;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.flink.api.connector.source.ReaderOutput;
|
||||
import org.apache.flink.api.connector.source.SourceReader;
|
||||
import org.apache.flink.api.connector.source.SourceReaderContext;
|
||||
@@ -56,7 +57,7 @@ public class ReadPulsarSourceReader implements SourceReader<RecordView, ReadPuls
|
||||
|
||||
private RecordView parsePulsarMessage(Message<byte[]> message) {
|
||||
return new RecordView(
|
||||
RecordView.Operation.SOURCE,
|
||||
RecordView.Operation.QUEUE,
|
||||
new String(message.getValue()),
|
||||
FORMATTER.format(Instant.ofEpochMilli(message.getPublishTime())),
|
||||
message.getMessageId().toString()
|
||||
@@ -66,7 +67,7 @@ public class ReadPulsarSourceReader implements SourceReader<RecordView, ReadPuls
|
||||
@Override
|
||||
public InputStatus pollNext(ReaderOutput<RecordView> output) throws Exception {
|
||||
if (ObjectUtil.isNotNull(currentSplit)) {
|
||||
logger.info("Read split: {}", currentSplit);
|
||||
logger.info("t{} Read split: {}", readerContext.getIndexOfSubtask(), currentSplit.getStartTimestamp());
|
||||
try (PulsarClient client = PulsarClient.builder()
|
||||
.serviceUrl(currentSplit.getPulsarUrl())
|
||||
.build()) {
|
||||
@@ -83,7 +84,13 @@ public class ReadPulsarSourceReader implements SourceReader<RecordView, ReadPuls
|
||||
.subscriptionInitialPosition(SubscriptionInitialPosition.Earliest)
|
||||
.subscriptionMode(SubscriptionMode.NonDurable)
|
||||
.subscriptionType(SubscriptionType.Exclusive)
|
||||
.subscriptionName(StrUtil.format("Task_Reader_{}_{}", currentSplit.getTaskId(), readerContext.getIndexOfSubtask()))
|
||||
.subscriptionName(StrUtil.format(
|
||||
"Task_Reader_{}_{}_{}_{}",
|
||||
currentSplit.getTaskId(),
|
||||
readerContext.getIndexOfSubtask(),
|
||||
currentSplit.getStartTimestamp(),
|
||||
currentSplit.getEndTimestamp()
|
||||
))
|
||||
.startMessageIdInclusive()
|
||||
.subscribe()) {
|
||||
consumer.seek(currentSplit.getStartTimestamp());
|
||||
@@ -92,24 +99,29 @@ public class ReadPulsarSourceReader implements SourceReader<RecordView, ReadPuls
|
||||
long currentTimestamp = 0;
|
||||
for (Message<byte[]> message : messages) {
|
||||
currentTimestamp = message.getPublishTime();
|
||||
}
|
||||
if (currentTimestamp > currentSplit.getEndTimestamp()) {
|
||||
logger.info("t{} Break for {} -> {}, Queue rest: {}", readerContext.getIndexOfSubtask(), currentTimestamp, currentSplit.getEndTimestamp(), readQueue.size());
|
||||
break;
|
||||
}
|
||||
for (Message<byte[]> message : messages) {
|
||||
output.collect(parsePulsarMessage(message));
|
||||
}
|
||||
consumer.acknowledge(messages);
|
||||
if (currentTimestamp > currentSplit.getEndTimestamp()) {
|
||||
logger.info("Break for {} -> {}", currentTimestamp, currentSplit.getEndTimestamp());
|
||||
break;
|
||||
}
|
||||
messages = consumer.batchReceive();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return tryMoveToNextSplit();
|
||||
}
|
||||
|
||||
private InputStatus tryMoveToNextSplit() {
|
||||
currentSplit = null;
|
||||
if (ObjectUtil.isEmpty(readQueue) && !noMoreSplits) {
|
||||
readerContext.sendSplitRequest();
|
||||
logger.info("t{} Request new split", readerContext.getIndexOfSubtask());
|
||||
}
|
||||
|
||||
currentSplit = readQueue.poll();
|
||||
logger.info("Current split: {}", currentSplit);
|
||||
logger.info("t{} Queue rest: {}, Current split: {}", readerContext.getIndexOfSubtask(), readQueue.size(), currentSplit);
|
||||
if (ObjectUtil.isNotNull(currentSplit)) {
|
||||
return InputStatus.MORE_AVAILABLE;
|
||||
} else if (noMoreSplits) {
|
||||
@@ -134,14 +146,14 @@ public class ReadPulsarSourceReader implements SourceReader<RecordView, ReadPuls
|
||||
|
||||
@Override
|
||||
public void addSplits(List<ReadPulsarSplit> splits) {
|
||||
logger.info("Add splits: {}", splits);
|
||||
logger.info("t{} Add splits: {}", readerContext.getIndexOfSubtask(), splits.stream().map(ReadPulsarSplit::getStartTimestamp).collect(Collectors.toList()));
|
||||
readQueue.addAll(splits);
|
||||
availability.complete(null);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void notifyNoMoreSplits() {
|
||||
logger.info("No more splits for {}", readerContext.getIndexOfSubtask());
|
||||
logger.info("t{} No more splits for {}", readerContext.getIndexOfSubtask(), readerContext.getIndexOfSubtask());
|
||||
noMoreSplits = true;
|
||||
availability.complete(null);
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
<filter class="ch.qos.logback.classic.filter.ThresholdFilter">
|
||||
<level>INFO</level>
|
||||
</filter>
|
||||
<url>http://132.126.207.125:33100/loki/api/v1/push</url>
|
||||
<url>http://132.126.207.126:33100/loki/api/v1/push</url>
|
||||
<encoder>
|
||||
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [${HOSTNAME}] %-5level ${PID:- } --- [%t] %-40.40logger{39} #@# : %m%n</pattern>
|
||||
</encoder>
|
||||
|
||||
Reference in New Issue
Block a user