[HUDI-4044] When reading data from flink-hudi to external storage, the … (#5516)
Co-authored-by: aliceyyan <aliceyyan@tencent.com>
This commit is contained in:
@@ -226,7 +226,7 @@ public class IncrementalInputSplits implements Serializable {
|
|||||||
String basePath = fileSlice.getBaseFile().map(BaseFile::getPath).orElse(null);
|
String basePath = fileSlice.getBaseFile().map(BaseFile::getPath).orElse(null);
|
||||||
return new MergeOnReadInputSplit(cnt.getAndAdd(1),
|
return new MergeOnReadInputSplit(cnt.getAndAdd(1),
|
||||||
basePath, logPaths, endInstant,
|
basePath, logPaths, endInstant,
|
||||||
metaClient.getBasePath(), maxCompactionMemoryInBytes, mergeType, instantRange);
|
metaClient.getBasePath(), maxCompactionMemoryInBytes, mergeType, instantRange, fileSlice.getFileId());
|
||||||
}).collect(Collectors.toList()))
|
}).collect(Collectors.toList()))
|
||||||
.flatMap(Collection::stream)
|
.flatMap(Collection::stream)
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
|
|||||||
@@ -181,6 +181,7 @@ public class HoodieTableSource implements
|
|||||||
OneInputStreamOperatorFactory<MergeOnReadInputSplit, RowData> factory = StreamReadOperator.factory((MergeOnReadInputFormat) inputFormat);
|
OneInputStreamOperatorFactory<MergeOnReadInputSplit, RowData> factory = StreamReadOperator.factory((MergeOnReadInputFormat) inputFormat);
|
||||||
SingleOutputStreamOperator<RowData> source = execEnv.addSource(monitoringFunction, getSourceOperatorName("split_monitor"))
|
SingleOutputStreamOperator<RowData> source = execEnv.addSource(monitoringFunction, getSourceOperatorName("split_monitor"))
|
||||||
.setParallelism(1)
|
.setParallelism(1)
|
||||||
|
.keyBy(inputSplit -> inputSplit.getFileId())
|
||||||
.transform("split_reader", typeInfo, factory)
|
.transform("split_reader", typeInfo, factory)
|
||||||
.setParallelism(conf.getInteger(FlinkOptions.READ_TASKS));
|
.setParallelism(conf.getInteger(FlinkOptions.READ_TASKS));
|
||||||
return new DataStreamSource<>(source);
|
return new DataStreamSource<>(source);
|
||||||
@@ -316,7 +317,7 @@ public class HoodieTableSource implements
|
|||||||
.map(logFile -> logFile.getPath().toString())
|
.map(logFile -> logFile.getPath().toString())
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
return new MergeOnReadInputSplit(cnt.getAndAdd(1), basePath, logPaths, latestCommit,
|
return new MergeOnReadInputSplit(cnt.getAndAdd(1), basePath, logPaths, latestCommit,
|
||||||
metaClient.getBasePath(), maxCompactionMemoryInBytes, mergeType, null);
|
metaClient.getBasePath(), maxCompactionMemoryInBytes, mergeType, null, fileSlice.getFileId());
|
||||||
}).collect(Collectors.toList()))
|
}).collect(Collectors.toList()))
|
||||||
.flatMap(Collection::stream)
|
.flatMap(Collection::stream)
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
|
|||||||
@@ -43,6 +43,7 @@ public class MergeOnReadInputSplit implements InputSplit {
|
|||||||
private final long maxCompactionMemoryInBytes;
|
private final long maxCompactionMemoryInBytes;
|
||||||
private final String mergeType;
|
private final String mergeType;
|
||||||
private final Option<InstantRange> instantRange;
|
private final Option<InstantRange> instantRange;
|
||||||
|
private String fileId;
|
||||||
|
|
||||||
// for streaming reader to record the consumed offset,
|
// for streaming reader to record the consumed offset,
|
||||||
// which is the start of next round reading.
|
// which is the start of next round reading.
|
||||||
@@ -56,7 +57,8 @@ public class MergeOnReadInputSplit implements InputSplit {
|
|||||||
String tablePath,
|
String tablePath,
|
||||||
long maxCompactionMemoryInBytes,
|
long maxCompactionMemoryInBytes,
|
||||||
String mergeType,
|
String mergeType,
|
||||||
@Nullable InstantRange instantRange) {
|
@Nullable InstantRange instantRange,
|
||||||
|
String fileId) {
|
||||||
this.splitNum = splitNum;
|
this.splitNum = splitNum;
|
||||||
this.basePath = Option.ofNullable(basePath);
|
this.basePath = Option.ofNullable(basePath);
|
||||||
this.logPaths = logPaths;
|
this.logPaths = logPaths;
|
||||||
@@ -65,6 +67,15 @@ public class MergeOnReadInputSplit implements InputSplit {
|
|||||||
this.maxCompactionMemoryInBytes = maxCompactionMemoryInBytes;
|
this.maxCompactionMemoryInBytes = maxCompactionMemoryInBytes;
|
||||||
this.mergeType = mergeType;
|
this.mergeType = mergeType;
|
||||||
this.instantRange = Option.ofNullable(instantRange);
|
this.instantRange = Option.ofNullable(instantRange);
|
||||||
|
this.fileId = fileId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getFileId() {
|
||||||
|
return fileId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFileId(String fileId) {
|
||||||
|
this.fileId = fileId;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Option<String> getBasePath() {
|
public Option<String> getBasePath() {
|
||||||
|
|||||||
Reference in New Issue
Block a user