[HUDI-1445] Refactor AbstractHoodieLogRecordScanner to use Builder (#2313)
This commit is contained in:
@@ -460,9 +460,20 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
||||
writer.close();
|
||||
|
||||
// scan all log blocks (across multiple log files)
|
||||
HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath,
|
||||
logFiles.stream().map(logFile -> logFile.getPath().toString()).collect(Collectors.toList()), schema, "100",
|
||||
10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
|
||||
HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder()
|
||||
.withFileSystem(fs)
|
||||
.withBasePath(basePath)
|
||||
.withLogFilePaths(
|
||||
logFiles.stream()
|
||||
.map(logFile -> logFile.getPath().toString()).collect(Collectors.toList()))
|
||||
.withReaderSchema(schema)
|
||||
.withLatestInstantTime("100")
|
||||
.withMaxMemorySizeInBytes(10240L)
|
||||
.withReadBlocksLazily(readBlocksLazily)
|
||||
.withReverseReader(false)
|
||||
.withBufferSize(bufferSize)
|
||||
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
||||
.build();
|
||||
|
||||
List<IndexedRecord> scannedRecords = new ArrayList<>();
|
||||
for (HoodieRecord record : scanner) {
|
||||
@@ -601,8 +612,18 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
||||
FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100")
|
||||
.map(s -> s.getPath().toString()).collect(Collectors.toList());
|
||||
|
||||
HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema, "100",
|
||||
10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
|
||||
HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder()
|
||||
.withFileSystem(fs)
|
||||
.withBasePath(basePath)
|
||||
.withLogFilePaths(allLogFiles)
|
||||
.withReaderSchema(schema)
|
||||
.withLatestInstantTime("100")
|
||||
.withMaxMemorySizeInBytes(10240L)
|
||||
.withReadBlocksLazily(readBlocksLazily)
|
||||
.withReverseReader(false)
|
||||
.withBufferSize(bufferSize)
|
||||
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
||||
.build();
|
||||
assertEquals(200, scanner.getTotalLogRecords());
|
||||
Set<String> readKeys = new HashSet<>(200);
|
||||
scanner.forEach(s -> readKeys.add(s.getKey().getRecordKey()));
|
||||
@@ -663,8 +684,18 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
||||
FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100")
|
||||
.map(s -> s.getPath().toString()).collect(Collectors.toList());
|
||||
|
||||
HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema, "102",
|
||||
10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
|
||||
HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder()
|
||||
.withFileSystem(fs)
|
||||
.withBasePath(basePath)
|
||||
.withLogFilePaths(allLogFiles)
|
||||
.withReaderSchema(schema)
|
||||
.withLatestInstantTime("102")
|
||||
.withMaxMemorySizeInBytes(10240L)
|
||||
.withReadBlocksLazily(readBlocksLazily)
|
||||
.withReverseReader(false)
|
||||
.withBufferSize(bufferSize)
|
||||
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
||||
.build();
|
||||
assertEquals(200, scanner.getTotalLogRecords(), "We read 200 records from 2 write batches");
|
||||
Set<String> readKeys = new HashSet<>(200);
|
||||
scanner.forEach(s -> readKeys.add(s.getKey().getRecordKey()));
|
||||
@@ -742,8 +773,18 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
||||
FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100")
|
||||
.map(s -> s.getPath().toString()).collect(Collectors.toList());
|
||||
|
||||
HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema, "103",
|
||||
10240L, true, false, bufferSize, BASE_OUTPUT_PATH);
|
||||
HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder()
|
||||
.withFileSystem(fs)
|
||||
.withBasePath(basePath)
|
||||
.withLogFilePaths(allLogFiles)
|
||||
.withReaderSchema(schema)
|
||||
.withLatestInstantTime("103")
|
||||
.withMaxMemorySizeInBytes(10240L)
|
||||
.withReadBlocksLazily(true)
|
||||
.withReverseReader(false)
|
||||
.withBufferSize(bufferSize)
|
||||
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
||||
.build();
|
||||
assertEquals(200, scanner.getTotalLogRecords(), "We would read 200 records");
|
||||
Set<String> readKeys = new HashSet<>(200);
|
||||
scanner.forEach(s -> readKeys.add(s.getKey().getRecordKey()));
|
||||
@@ -802,8 +843,18 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
||||
FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100")
|
||||
.map(s -> s.getPath().toString()).collect(Collectors.toList());
|
||||
|
||||
HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema, "102",
|
||||
10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
|
||||
HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder()
|
||||
.withFileSystem(fs)
|
||||
.withBasePath(basePath)
|
||||
.withLogFilePaths(allLogFiles)
|
||||
.withReaderSchema(schema)
|
||||
.withLatestInstantTime("102")
|
||||
.withMaxMemorySizeInBytes(10240L)
|
||||
.withReadBlocksLazily(readBlocksLazily)
|
||||
.withReverseReader(false)
|
||||
.withBufferSize(bufferSize)
|
||||
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
||||
.build();
|
||||
assertEquals(200, scanner.getTotalLogRecords(), "We still would read 200 records");
|
||||
final List<String> readKeys = new ArrayList<>(200);
|
||||
final List<Boolean> emptyPayloads = new ArrayList<>();
|
||||
@@ -833,8 +884,18 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
||||
writer.appendBlock(commandBlock);
|
||||
|
||||
readKeys.clear();
|
||||
scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema, "101", 10240L, readBlocksLazily,
|
||||
false, bufferSize, BASE_OUTPUT_PATH);
|
||||
scanner = HoodieMergedLogRecordScanner.newBuilder()
|
||||
.withFileSystem(fs)
|
||||
.withBasePath(basePath)
|
||||
.withLogFilePaths(allLogFiles)
|
||||
.withReaderSchema(schema)
|
||||
.withLatestInstantTime("101")
|
||||
.withMaxMemorySizeInBytes(10240L)
|
||||
.withReadBlocksLazily(readBlocksLazily)
|
||||
.withReverseReader(false)
|
||||
.withBufferSize(bufferSize)
|
||||
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
||||
.build();
|
||||
scanner.forEach(s -> readKeys.add(s.getKey().getRecordKey()));
|
||||
assertEquals(200, readKeys.size(), "Stream collect should return all 200 records after rollback of delete");
|
||||
}
|
||||
@@ -898,8 +959,18 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
||||
.map(s -> s.getPath().toString()).collect(Collectors.toList());
|
||||
|
||||
// all data must be rolled back before merge
|
||||
HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema, "100",
|
||||
10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
|
||||
HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder()
|
||||
.withFileSystem(fs)
|
||||
.withBasePath(basePath)
|
||||
.withLogFilePaths(allLogFiles)
|
||||
.withReaderSchema(schema)
|
||||
.withLatestInstantTime("100")
|
||||
.withMaxMemorySizeInBytes(10240L)
|
||||
.withReadBlocksLazily(readBlocksLazily)
|
||||
.withReverseReader(false)
|
||||
.withBufferSize(bufferSize)
|
||||
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
||||
.build();
|
||||
assertEquals(0, scanner.getTotalLogRecords(), "We would have scanned 0 records because of rollback");
|
||||
|
||||
final List<String> readKeys = new ArrayList<>();
|
||||
@@ -949,8 +1020,18 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
||||
FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100")
|
||||
.map(s -> s.getPath().toString()).collect(Collectors.toList());
|
||||
|
||||
HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema, "100",
|
||||
10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
|
||||
HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder()
|
||||
.withFileSystem(fs)
|
||||
.withBasePath(basePath)
|
||||
.withLogFilePaths(allLogFiles)
|
||||
.withReaderSchema(schema)
|
||||
.withLatestInstantTime("100")
|
||||
.withMaxMemorySizeInBytes(10240L)
|
||||
.withReadBlocksLazily(readBlocksLazily)
|
||||
.withReverseReader(false)
|
||||
.withBufferSize(bufferSize)
|
||||
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
||||
.build();
|
||||
assertEquals(0, scanner.getTotalLogRecords(), "We would read 0 records");
|
||||
}
|
||||
|
||||
@@ -983,8 +1064,18 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
||||
FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100")
|
||||
.map(s -> s.getPath().toString()).collect(Collectors.toList());
|
||||
|
||||
HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema, "100",
|
||||
10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
|
||||
HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder()
|
||||
.withFileSystem(fs)
|
||||
.withBasePath(basePath)
|
||||
.withLogFilePaths(allLogFiles)
|
||||
.withReaderSchema(schema)
|
||||
.withLatestInstantTime("100")
|
||||
.withMaxMemorySizeInBytes(10240L)
|
||||
.withReadBlocksLazily(readBlocksLazily)
|
||||
.withReverseReader(false)
|
||||
.withBufferSize(bufferSize)
|
||||
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
||||
.build();
|
||||
assertEquals(100, scanner.getTotalLogRecords(), "We still would read 100 records");
|
||||
final List<String> readKeys = new ArrayList<>(100);
|
||||
scanner.forEach(s -> readKeys.add(s.getKey().getRecordKey()));
|
||||
@@ -1036,8 +1127,18 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
||||
FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100")
|
||||
.map(s -> s.getPath().toString()).collect(Collectors.toList());
|
||||
|
||||
HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema, "101",
|
||||
10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
|
||||
HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder()
|
||||
.withFileSystem(fs)
|
||||
.withBasePath(basePath)
|
||||
.withLogFilePaths(allLogFiles)
|
||||
.withReaderSchema(schema)
|
||||
.withLatestInstantTime("101")
|
||||
.withMaxMemorySizeInBytes(10240L)
|
||||
.withReadBlocksLazily(readBlocksLazily)
|
||||
.withReverseReader(false)
|
||||
.withBufferSize(bufferSize)
|
||||
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
||||
.build();
|
||||
assertEquals(0, scanner.getTotalLogRecords(), "We would read 0 records");
|
||||
}
|
||||
|
||||
@@ -1126,8 +1227,18 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
||||
FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100")
|
||||
.map(s -> s.getPath().toString()).collect(Collectors.toList());
|
||||
|
||||
HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema, "101",
|
||||
10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
|
||||
HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder()
|
||||
.withFileSystem(fs)
|
||||
.withBasePath(basePath)
|
||||
.withLogFilePaths(allLogFiles)
|
||||
.withReaderSchema(schema)
|
||||
.withLatestInstantTime("101")
|
||||
.withMaxMemorySizeInBytes(10240L)
|
||||
.withReadBlocksLazily(readBlocksLazily)
|
||||
.withReverseReader(false)
|
||||
.withBufferSize(bufferSize)
|
||||
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
||||
.build();
|
||||
assertEquals(0, scanner.getTotalLogRecords(), "We would read 0 records");
|
||||
}
|
||||
|
||||
@@ -1183,8 +1294,18 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
||||
List<String> allLogFiles = FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1",
|
||||
HoodieLogFile.DELTA_EXTENSION, "100").map(s -> s.getPath().toString()).collect(Collectors.toList());
|
||||
|
||||
HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema,
|
||||
"100", 10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
|
||||
HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder()
|
||||
.withFileSystem(fs)
|
||||
.withBasePath(basePath)
|
||||
.withLogFilePaths(allLogFiles)
|
||||
.withReaderSchema(schema)
|
||||
.withLatestInstantTime("100")
|
||||
.withMaxMemorySizeInBytes(10240L)
|
||||
.withReadBlocksLazily(readBlocksLazily)
|
||||
.withReverseReader(false)
|
||||
.withBufferSize(bufferSize)
|
||||
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
||||
.build();
|
||||
|
||||
assertEquals(Math.max(numRecordsInLog1, numRecordsInLog2), scanner.getNumMergedRecordsInLog(),
|
||||
"We would read 100 records");
|
||||
|
||||
Reference in New Issue
Block a user