1
0

[HUDI-3446] Supports batch reader in BootstrapOperator#loadRecords (#4837)

* [HUDI-3446] Supports batch Reader in BootstrapOperator#loadRecords
This commit is contained in:
Bo Cui
2022-02-19 21:21:48 +08:00
committed by GitHub
parent f15125c0cd
commit 83279971a1
8 changed files with 197 additions and 126 deletions

View File

@@ -30,6 +30,7 @@ import org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.util.BaseFileUtils;
import org.apache.hudi.common.util.ClosableIterator;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.config.HoodieWriteConfig;
@@ -210,17 +211,10 @@ public class BootstrapOperator<I, O extends HoodieRecord<?>>
if (!isValidFile(baseFile.getFileStatus())) {
return;
}
final List<HoodieKey> hoodieKeys;
try {
hoodieKeys =
fileUtils.fetchRecordKeyPartitionPath(this.hadoopConf, new Path(baseFile.getPath()));
} catch (Exception e) {
throw new HoodieException(String.format("Error when loading record keys from file: %s", baseFile), e);
}
for (HoodieKey hoodieKey : hoodieKeys) {
output.collect(new StreamRecord(new IndexRecord(generateHoodieRecord(hoodieKey, fileSlice))));
try (ClosableIterator<HoodieKey> iterator = fileUtils.getHoodieKeyIterator(this.hadoopConf, new Path(baseFile.getPath()))) {
iterator.forEachRemaining(hoodieKey -> {
output.collect(new StreamRecord(new IndexRecord(generateHoodieRecord(hoodieKey, fileSlice))));
});
}
});