[HUDI-3454] Fix partition name in all code paths for LogRecordScanner (#5252)
* Depend on FSUtils#getRelativePartitionPath(basePath, logFilePath.getParent) to get the partition. * If the list of log file paths in the split is empty, then fallback to usual behaviour.
This commit is contained in:
@@ -67,6 +67,9 @@ import java.util.stream.Collectors;
|
|||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
import scala.Tuple3;
|
import scala.Tuple3;
|
||||||
|
|
||||||
|
import static org.apache.hudi.common.fs.FSUtils.getRelativePartitionPath;
|
||||||
|
import static org.apache.hudi.common.util.ValidationUtils.checkArgument;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* CLI command to display log file options.
|
* CLI command to display log file options.
|
||||||
*/
|
*/
|
||||||
@@ -185,7 +188,7 @@ public class HoodieLogFileCommand implements CommandMarker {
|
|||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
// logFilePaths size must > 1
|
// logFilePaths size must > 1
|
||||||
assert logFilePaths.size() > 0 : "There is no log file";
|
checkArgument(logFilePaths.size() > 0, "There is no log file");
|
||||||
|
|
||||||
// TODO : readerSchema can change across blocks/log files, fix this inside Scanner
|
// TODO : readerSchema can change across blocks/log files, fix this inside Scanner
|
||||||
AvroSchemaConverter converter = new AvroSchemaConverter();
|
AvroSchemaConverter converter = new AvroSchemaConverter();
|
||||||
@@ -218,6 +221,7 @@ public class HoodieLogFileCommand implements CommandMarker {
|
|||||||
.withSpillableMapBasePath(HoodieMemoryConfig.SPILLABLE_MAP_BASE_PATH.defaultValue())
|
.withSpillableMapBasePath(HoodieMemoryConfig.SPILLABLE_MAP_BASE_PATH.defaultValue())
|
||||||
.withDiskMapType(HoodieCommonConfig.SPILLABLE_DISK_MAP_TYPE.defaultValue())
|
.withDiskMapType(HoodieCommonConfig.SPILLABLE_DISK_MAP_TYPE.defaultValue())
|
||||||
.withBitCaskDiskMapCompressionEnabled(HoodieCommonConfig.DISK_MAP_BITCASK_COMPRESSION_ENABLED.defaultValue())
|
.withBitCaskDiskMapCompressionEnabled(HoodieCommonConfig.DISK_MAP_BITCASK_COMPRESSION_ENABLED.defaultValue())
|
||||||
|
.withPartition(getRelativePartitionPath(new Path(client.getBasePath()), new Path(logFilePaths.get(0)).getParent()))
|
||||||
.build();
|
.build();
|
||||||
for (HoodieRecord<? extends HoodieRecordPayload> hoodieRecord : scanner) {
|
for (HoodieRecord<? extends HoodieRecordPayload> hoodieRecord : scanner) {
|
||||||
Option<IndexedRecord> record = hoodieRecord.getData().getInsertValue(readerSchema);
|
Option<IndexedRecord> record = hoodieRecord.getData().getInsertValue(readerSchema);
|
||||||
|
|||||||
@@ -65,6 +65,7 @@ import java.util.List;
|
|||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import static org.apache.hudi.common.fs.FSUtils.getRelativePartitionPath;
|
||||||
import static org.apache.hudi.common.testutils.SchemaTestUtil.getSimpleSchema;
|
import static org.apache.hudi.common.testutils.SchemaTestUtil.getSimpleSchema;
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||||
@@ -203,6 +204,7 @@ public class TestHoodieLogFileCommand extends CLIFunctionalTestHarness {
|
|||||||
// get expected result of 10 records.
|
// get expected result of 10 records.
|
||||||
List<String> logFilePaths = Arrays.stream(fs.globStatus(new Path(partitionPath + "/*")))
|
List<String> logFilePaths = Arrays.stream(fs.globStatus(new Path(partitionPath + "/*")))
|
||||||
.map(status -> status.getPath().toString()).collect(Collectors.toList());
|
.map(status -> status.getPath().toString()).collect(Collectors.toList());
|
||||||
|
assertTrue(logFilePaths.size() > 0);
|
||||||
HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder()
|
HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder()
|
||||||
.withFileSystem(fs)
|
.withFileSystem(fs)
|
||||||
.withBasePath(tablePath)
|
.withBasePath(tablePath)
|
||||||
@@ -221,6 +223,7 @@ public class TestHoodieLogFileCommand extends CLIFunctionalTestHarness {
|
|||||||
.withSpillableMapBasePath(HoodieMemoryConfig.SPILLABLE_MAP_BASE_PATH.defaultValue())
|
.withSpillableMapBasePath(HoodieMemoryConfig.SPILLABLE_MAP_BASE_PATH.defaultValue())
|
||||||
.withDiskMapType(HoodieCommonConfig.SPILLABLE_DISK_MAP_TYPE.defaultValue())
|
.withDiskMapType(HoodieCommonConfig.SPILLABLE_DISK_MAP_TYPE.defaultValue())
|
||||||
.withBitCaskDiskMapCompressionEnabled(HoodieCommonConfig.DISK_MAP_BITCASK_COMPRESSION_ENABLED.defaultValue())
|
.withBitCaskDiskMapCompressionEnabled(HoodieCommonConfig.DISK_MAP_BITCASK_COMPRESSION_ENABLED.defaultValue())
|
||||||
|
.withPartition(getRelativePartitionPath(new Path(tablePath), new Path(logFilePaths.get(0)).getParent()))
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
Iterator<HoodieRecord<? extends HoodieRecordPayload>> records = scanner.iterator();
|
Iterator<HoodieRecord<? extends HoodieRecordPayload>> records = scanner.iterator();
|
||||||
|
|||||||
@@ -230,7 +230,7 @@ public class FSUtils {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Obtain all the partition paths, that are present in this table, denoted by presence of
|
* Obtain all the partition paths, that are present in this table, denoted by presence of
|
||||||
* {@link HoodiePartitionMetadata#HOODIE_PARTITION_METAFILE}.
|
* {@link HoodiePartitionMetadata#HOODIE_PARTITION_METAFILE_PREFIX}.
|
||||||
*
|
*
|
||||||
* If the basePathStr is a subdirectory of .hoodie folder then we assume that the partitions of an internal
|
* If the basePathStr is a subdirectory of .hoodie folder then we assume that the partitions of an internal
|
||||||
* table (a hoodie table within the .hoodie directory) are to be obtained.
|
* table (a hoodie table within the .hoodie directory) are to be obtained.
|
||||||
|
|||||||
@@ -89,6 +89,7 @@ import java.util.Set;
|
|||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
|
import static org.apache.hudi.common.fs.FSUtils.getRelativePartitionPath;
|
||||||
import static org.apache.hudi.common.testutils.SchemaTestUtil.getSimpleSchema;
|
import static org.apache.hudi.common.testutils.SchemaTestUtil.getSimpleSchema;
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||||
@@ -574,12 +575,13 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
|||||||
writer.close();
|
writer.close();
|
||||||
FileCreateUtils.createDeltaCommit(basePath, "100", fs);
|
FileCreateUtils.createDeltaCommit(basePath, "100", fs);
|
||||||
// scan all log blocks (across multiple log files)
|
// scan all log blocks (across multiple log files)
|
||||||
|
List<String> logFilePaths = logFiles.stream()
|
||||||
|
.map(logFile -> logFile.getPath().toString()).collect(Collectors.toList());
|
||||||
|
assertTrue(logFilePaths.size() > 0);
|
||||||
HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder()
|
HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder()
|
||||||
.withFileSystem(fs)
|
.withFileSystem(fs)
|
||||||
.withBasePath(basePath)
|
.withBasePath(basePath)
|
||||||
.withLogFilePaths(
|
.withLogFilePaths(logFilePaths)
|
||||||
logFiles.stream()
|
|
||||||
.map(logFile -> logFile.getPath().toString()).collect(Collectors.toList()))
|
|
||||||
.withReaderSchema(schema)
|
.withReaderSchema(schema)
|
||||||
.withLatestInstantTime("100")
|
.withLatestInstantTime("100")
|
||||||
.withMaxMemorySizeInBytes(10240L)
|
.withMaxMemorySizeInBytes(10240L)
|
||||||
@@ -589,6 +591,7 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
|||||||
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
||||||
.withDiskMapType(diskMapType)
|
.withDiskMapType(diskMapType)
|
||||||
.withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
|
.withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
|
||||||
|
.withPartition(getRelativePartitionPath(new Path(basePath), new Path(logFilePaths.get(0)).getParent()))
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
List<IndexedRecord> scannedRecords = new ArrayList<>();
|
List<IndexedRecord> scannedRecords = new ArrayList<>();
|
||||||
@@ -803,6 +806,7 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
|||||||
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
||||||
.withDiskMapType(diskMapType)
|
.withDiskMapType(diskMapType)
|
||||||
.withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
|
.withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
|
||||||
|
.withPartition(getRelativePartitionPath(new Path(basePath), new Path(allLogFiles.get(0)).getParent()))
|
||||||
.build();
|
.build();
|
||||||
assertEquals(200, scanner.getTotalLogRecords());
|
assertEquals(200, scanner.getTotalLogRecords());
|
||||||
Set<String> readKeys = new HashSet<>(200);
|
Set<String> readKeys = new HashSet<>(200);
|
||||||
@@ -881,6 +885,7 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
|||||||
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
||||||
.withDiskMapType(diskMapType)
|
.withDiskMapType(diskMapType)
|
||||||
.withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
|
.withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
|
||||||
|
.withPartition(getRelativePartitionPath(new Path(basePath), new Path(allLogFiles.get(0)).getParent()))
|
||||||
.build();
|
.build();
|
||||||
assertEquals(200, scanner.getTotalLogRecords(), "We read 200 records from 2 write batches");
|
assertEquals(200, scanner.getTotalLogRecords(), "We read 200 records from 2 write batches");
|
||||||
Set<String> readKeys = new HashSet<>(200);
|
Set<String> readKeys = new HashSet<>(200);
|
||||||
@@ -968,6 +973,7 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
|||||||
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
||||||
.withDiskMapType(diskMapType)
|
.withDiskMapType(diskMapType)
|
||||||
.withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
|
.withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
|
||||||
|
.withPartition(getRelativePartitionPath(new Path(basePath), new Path(allLogFiles.get(0)).getParent()))
|
||||||
.build();
|
.build();
|
||||||
assertEquals(200, scanner.getTotalLogRecords(), "We would read 200 records");
|
assertEquals(200, scanner.getTotalLogRecords(), "We would read 200 records");
|
||||||
Set<String> readKeys = new HashSet<>(200);
|
Set<String> readKeys = new HashSet<>(200);
|
||||||
@@ -1046,6 +1052,7 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
|||||||
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
||||||
.withDiskMapType(diskMapType)
|
.withDiskMapType(diskMapType)
|
||||||
.withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
|
.withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
|
||||||
|
.withPartition(getRelativePartitionPath(new Path(basePath), new Path(allLogFiles.get(0)).getParent()))
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
assertEquals(200, scanner.getTotalLogRecords(), "We still would read 200 records");
|
assertEquals(200, scanner.getTotalLogRecords(), "We still would read 200 records");
|
||||||
@@ -1092,6 +1099,7 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
|||||||
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
||||||
.withDiskMapType(diskMapType)
|
.withDiskMapType(diskMapType)
|
||||||
.withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
|
.withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
|
||||||
|
.withPartition(getRelativePartitionPath(new Path(basePath), new Path(allLogFiles.get(0)).getParent()))
|
||||||
.build();
|
.build();
|
||||||
scanner.forEach(s -> readKeys.add(s.getKey().getRecordKey()));
|
scanner.forEach(s -> readKeys.add(s.getKey().getRecordKey()));
|
||||||
assertEquals(200, readKeys.size(), "Stream collect should return all 200 records after rollback of delete");
|
assertEquals(200, readKeys.size(), "Stream collect should return all 200 records after rollback of delete");
|
||||||
@@ -1187,6 +1195,7 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
|||||||
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
||||||
.withDiskMapType(diskMapType)
|
.withDiskMapType(diskMapType)
|
||||||
.withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
|
.withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
|
||||||
|
.withPartition(getRelativePartitionPath(new Path(basePath), new Path(allLogFiles.get(0)).getParent()))
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
assertEquals(200, scanner.getTotalLogRecords(), "We still would read 200 records");
|
assertEquals(200, scanner.getTotalLogRecords(), "We still would read 200 records");
|
||||||
@@ -1290,6 +1299,7 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
|||||||
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
||||||
.withDiskMapType(diskMapType)
|
.withDiskMapType(diskMapType)
|
||||||
.withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
|
.withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
|
||||||
|
.withPartition(getRelativePartitionPath(new Path(basePath), new Path(allLogFiles.get(0)).getParent()))
|
||||||
.build();
|
.build();
|
||||||
assertEquals(0, scanner.getTotalLogRecords(), "We would have scanned 0 records because of rollback");
|
assertEquals(0, scanner.getTotalLogRecords(), "We would have scanned 0 records because of rollback");
|
||||||
|
|
||||||
@@ -1358,6 +1368,7 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
|||||||
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
||||||
.withDiskMapType(diskMapType)
|
.withDiskMapType(diskMapType)
|
||||||
.withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
|
.withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
|
||||||
|
.withPartition(getRelativePartitionPath(new Path(basePath), new Path(allLogFiles.get(0)).getParent()))
|
||||||
.build();
|
.build();
|
||||||
assertEquals(0, scanner.getTotalLogRecords(), "We would read 0 records");
|
assertEquals(0, scanner.getTotalLogRecords(), "We would read 0 records");
|
||||||
FileCreateUtils.deleteDeltaCommit(basePath, "100", fs);
|
FileCreateUtils.deleteDeltaCommit(basePath, "100", fs);
|
||||||
@@ -1409,6 +1420,7 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
|||||||
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
||||||
.withDiskMapType(diskMapType)
|
.withDiskMapType(diskMapType)
|
||||||
.withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
|
.withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
|
||||||
|
.withPartition(getRelativePartitionPath(new Path(basePath), new Path(allLogFiles.get(0)).getParent()))
|
||||||
.build();
|
.build();
|
||||||
assertEquals(100, scanner.getTotalLogRecords(), "We still would read 100 records");
|
assertEquals(100, scanner.getTotalLogRecords(), "We still would read 100 records");
|
||||||
final List<String> readKeys = new ArrayList<>(100);
|
final List<String> readKeys = new ArrayList<>(100);
|
||||||
@@ -1479,6 +1491,7 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
|||||||
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
||||||
.withDiskMapType(diskMapType)
|
.withDiskMapType(diskMapType)
|
||||||
.withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
|
.withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
|
||||||
|
.withPartition(getRelativePartitionPath(new Path(basePath), new Path(allLogFiles.get(0)).getParent()))
|
||||||
.build();
|
.build();
|
||||||
assertEquals(0, scanner.getTotalLogRecords(), "We would read 0 records");
|
assertEquals(0, scanner.getTotalLogRecords(), "We would read 0 records");
|
||||||
}
|
}
|
||||||
@@ -1585,6 +1598,7 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
|||||||
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
||||||
.withDiskMapType(diskMapType)
|
.withDiskMapType(diskMapType)
|
||||||
.withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
|
.withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
|
||||||
|
.withPartition(getRelativePartitionPath(new Path(basePath), new Path(allLogFiles.get(0)).getParent()))
|
||||||
.build();
|
.build();
|
||||||
assertEquals(0, scanner.getTotalLogRecords(), "We would read 0 records");
|
assertEquals(0, scanner.getTotalLogRecords(), "We would read 0 records");
|
||||||
FileCreateUtils.deleteDeltaCommit(basePath, "100", fs);
|
FileCreateUtils.deleteDeltaCommit(basePath, "100", fs);
|
||||||
@@ -1659,6 +1673,7 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
|||||||
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
||||||
.withDiskMapType(diskMapType)
|
.withDiskMapType(diskMapType)
|
||||||
.withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
|
.withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
|
||||||
|
.withPartition(getRelativePartitionPath(new Path(basePath), new Path(allLogFiles.get(0)).getParent()))
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
assertEquals(Math.max(numRecordsInLog1, numRecordsInLog2), scanner.getNumMergedRecordsInLog(),
|
assertEquals(Math.max(numRecordsInLog1, numRecordsInLog2), scanner.getNumMergedRecordsInLog(),
|
||||||
|
|||||||
@@ -57,6 +57,8 @@ import java.util.stream.Collectors;
|
|||||||
import java.util.stream.IntStream;
|
import java.util.stream.IntStream;
|
||||||
|
|
||||||
import static junit.framework.TestCase.assertEquals;
|
import static junit.framework.TestCase.assertEquals;
|
||||||
|
import static org.apache.hudi.common.fs.FSUtils.getRelativePartitionPath;
|
||||||
|
import static org.apache.hudi.common.util.CollectionUtils.isNullOrEmpty;
|
||||||
import static org.hamcrest.CoreMatchers.is;
|
import static org.hamcrest.CoreMatchers.is;
|
||||||
import static org.hamcrest.MatcherAssert.assertThat;
|
import static org.hamcrest.MatcherAssert.assertThat;
|
||||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||||
@@ -346,7 +348,7 @@ public class TestQuickstartData {
|
|||||||
List<String> logPaths,
|
List<String> logPaths,
|
||||||
Schema readSchema,
|
Schema readSchema,
|
||||||
String instant) {
|
String instant) {
|
||||||
return HoodieMergedLogRecordScanner.newBuilder()
|
HoodieMergedLogRecordScanner.Builder logRecordScannerBuilder = HoodieMergedLogRecordScanner.newBuilder()
|
||||||
.withFileSystem(fs)
|
.withFileSystem(fs)
|
||||||
.withBasePath(basePath)
|
.withBasePath(basePath)
|
||||||
.withLogFilePaths(logPaths)
|
.withLogFilePaths(logPaths)
|
||||||
@@ -358,8 +360,12 @@ public class TestQuickstartData {
|
|||||||
.withMaxMemorySizeInBytes(1024 * 1024L)
|
.withMaxMemorySizeInBytes(1024 * 1024L)
|
||||||
.withSpillableMapBasePath("/tmp/")
|
.withSpillableMapBasePath("/tmp/")
|
||||||
.withDiskMapType(HoodieCommonConfig.SPILLABLE_DISK_MAP_TYPE.defaultValue())
|
.withDiskMapType(HoodieCommonConfig.SPILLABLE_DISK_MAP_TYPE.defaultValue())
|
||||||
.withBitCaskDiskMapCompressionEnabled(HoodieCommonConfig.DISK_MAP_BITCASK_COMPRESSION_ENABLED.defaultValue())
|
.withBitCaskDiskMapCompressionEnabled(HoodieCommonConfig.DISK_MAP_BITCASK_COMPRESSION_ENABLED.defaultValue());
|
||||||
.build();
|
if (!isNullOrEmpty(logPaths)) {
|
||||||
|
logRecordScannerBuilder
|
||||||
|
.withPartition(getRelativePartitionPath(new Path(basePath), new Path(logPaths.get(0)).getParent()));
|
||||||
|
}
|
||||||
|
return logRecordScannerBuilder.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -18,7 +18,6 @@
|
|||||||
|
|
||||||
package org.apache.hudi.table.format;
|
package org.apache.hudi.table.format;
|
||||||
|
|
||||||
import org.apache.hudi.common.fs.FSUtils;
|
|
||||||
import org.apache.hudi.common.model.HoodieOperation;
|
import org.apache.hudi.common.model.HoodieOperation;
|
||||||
import org.apache.hudi.common.model.HoodieRecord;
|
import org.apache.hudi.common.model.HoodieRecord;
|
||||||
import org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner;
|
import org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner;
|
||||||
@@ -43,6 +42,7 @@ import org.apache.flink.table.data.RowData;
|
|||||||
import org.apache.flink.types.RowKind;
|
import org.apache.flink.types.RowKind;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
@@ -52,6 +52,10 @@ import java.util.Locale;
|
|||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.function.Function;
|
import java.util.function.Function;
|
||||||
|
|
||||||
|
import static org.apache.hudi.common.fs.FSUtils.getFs;
|
||||||
|
import static org.apache.hudi.common.fs.FSUtils.getRelativePartitionPath;
|
||||||
|
import static org.apache.hudi.common.util.CollectionUtils.isNullOrEmpty;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Utilities for format.
|
* Utilities for format.
|
||||||
*/
|
*/
|
||||||
@@ -124,11 +128,13 @@ public class FormatUtils {
|
|||||||
Schema logSchema,
|
Schema logSchema,
|
||||||
Configuration config,
|
Configuration config,
|
||||||
boolean withOperationField) {
|
boolean withOperationField) {
|
||||||
FileSystem fs = FSUtils.getFs(split.getTablePath(), config);
|
String basePath = split.getTablePath();
|
||||||
return HoodieMergedLogRecordScanner.newBuilder()
|
List<String> logPaths = split.getLogPaths().get();
|
||||||
|
FileSystem fs = getFs(basePath, config);
|
||||||
|
HoodieMergedLogRecordScanner.Builder logRecordScannerBuilder = HoodieMergedLogRecordScanner.newBuilder()
|
||||||
.withFileSystem(fs)
|
.withFileSystem(fs)
|
||||||
.withBasePath(split.getTablePath())
|
.withBasePath(basePath)
|
||||||
.withLogFilePaths(split.getLogPaths().get())
|
.withLogFilePaths(logPaths)
|
||||||
.withReaderSchema(logSchema)
|
.withReaderSchema(logSchema)
|
||||||
.withLatestInstantTime(split.getLatestCommit())
|
.withLatestInstantTime(split.getLatestCommit())
|
||||||
.withReadBlocksLazily(
|
.withReadBlocksLazily(
|
||||||
@@ -144,8 +150,12 @@ public class FormatUtils {
|
|||||||
config.get(HoodieRealtimeConfig.SPILLABLE_MAP_BASE_PATH_PROP,
|
config.get(HoodieRealtimeConfig.SPILLABLE_MAP_BASE_PATH_PROP,
|
||||||
HoodieRealtimeConfig.DEFAULT_SPILLABLE_MAP_BASE_PATH))
|
HoodieRealtimeConfig.DEFAULT_SPILLABLE_MAP_BASE_PATH))
|
||||||
.withInstantRange(split.getInstantRange())
|
.withInstantRange(split.getInstantRange())
|
||||||
.withOperationField(withOperationField)
|
.withOperationField(withOperationField);
|
||||||
.build();
|
if (!isNullOrEmpty(logPaths)) {
|
||||||
|
logRecordScannerBuilder
|
||||||
|
.withPartition(getRelativePartitionPath(new Path(basePath), new Path(logPaths.get(0)).getParent()));
|
||||||
|
}
|
||||||
|
return logRecordScannerBuilder.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static HoodieUnMergedLogRecordScanner unMergedLogScanner(
|
private static HoodieUnMergedLogRecordScanner unMergedLogScanner(
|
||||||
@@ -153,7 +163,7 @@ public class FormatUtils {
|
|||||||
Schema logSchema,
|
Schema logSchema,
|
||||||
Configuration config,
|
Configuration config,
|
||||||
HoodieUnMergedLogRecordScanner.LogRecordScannerCallback callback) {
|
HoodieUnMergedLogRecordScanner.LogRecordScannerCallback callback) {
|
||||||
FileSystem fs = FSUtils.getFs(split.getTablePath(), config);
|
FileSystem fs = getFs(split.getTablePath(), config);
|
||||||
return HoodieUnMergedLogRecordScanner.newBuilder()
|
return HoodieUnMergedLogRecordScanner.newBuilder()
|
||||||
.withFileSystem(fs)
|
.withFileSystem(fs)
|
||||||
.withBasePath(split.getTablePath())
|
.withBasePath(split.getTablePath())
|
||||||
@@ -234,8 +244,8 @@ public class FormatUtils {
|
|||||||
HoodieWriteConfig writeConfig,
|
HoodieWriteConfig writeConfig,
|
||||||
Configuration hadoopConf) {
|
Configuration hadoopConf) {
|
||||||
String basePath = writeConfig.getBasePath();
|
String basePath = writeConfig.getBasePath();
|
||||||
return HoodieMergedLogRecordScanner.newBuilder()
|
HoodieMergedLogRecordScanner.Builder logRecordScannerBuilder = HoodieMergedLogRecordScanner.newBuilder()
|
||||||
.withFileSystem(FSUtils.getFs(basePath, hadoopConf))
|
.withFileSystem(getFs(basePath, hadoopConf))
|
||||||
.withBasePath(basePath)
|
.withBasePath(basePath)
|
||||||
.withLogFilePaths(logPaths)
|
.withLogFilePaths(logPaths)
|
||||||
.withReaderSchema(logSchema)
|
.withReaderSchema(logSchema)
|
||||||
@@ -246,8 +256,12 @@ public class FormatUtils {
|
|||||||
.withMaxMemorySizeInBytes(writeConfig.getMaxMemoryPerPartitionMerge())
|
.withMaxMemorySizeInBytes(writeConfig.getMaxMemoryPerPartitionMerge())
|
||||||
.withSpillableMapBasePath(writeConfig.getSpillableMapBasePath())
|
.withSpillableMapBasePath(writeConfig.getSpillableMapBasePath())
|
||||||
.withDiskMapType(writeConfig.getCommonConfig().getSpillableDiskMapType())
|
.withDiskMapType(writeConfig.getCommonConfig().getSpillableDiskMapType())
|
||||||
.withBitCaskDiskMapCompressionEnabled(writeConfig.getCommonConfig().isBitCaskDiskMapCompressionEnabled())
|
.withBitCaskDiskMapCompressionEnabled(writeConfig.getCommonConfig().isBitCaskDiskMapCompressionEnabled());
|
||||||
.build();
|
if (!isNullOrEmpty(logPaths)) {
|
||||||
|
logRecordScannerBuilder
|
||||||
|
.withPartition(getRelativePartitionPath(new Path(basePath), new Path(logPaths.get(0)).getParent()));
|
||||||
|
}
|
||||||
|
return logRecordScannerBuilder.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Boolean string2Boolean(String s) {
|
private static Boolean string2Boolean(String s) {
|
||||||
|
|||||||
@@ -67,6 +67,8 @@ import java.util.stream.Collectors;
|
|||||||
import java.util.stream.IntStream;
|
import java.util.stream.IntStream;
|
||||||
|
|
||||||
import static junit.framework.TestCase.assertEquals;
|
import static junit.framework.TestCase.assertEquals;
|
||||||
|
import static org.apache.hudi.common.fs.FSUtils.getRelativePartitionPath;
|
||||||
|
import static org.apache.hudi.common.util.CollectionUtils.isNullOrEmpty;
|
||||||
import static org.hamcrest.CoreMatchers.is;
|
import static org.hamcrest.CoreMatchers.is;
|
||||||
import static org.hamcrest.MatcherAssert.assertThat;
|
import static org.hamcrest.MatcherAssert.assertThat;
|
||||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||||
@@ -661,7 +663,7 @@ public class TestData {
|
|||||||
List<String> logPaths,
|
List<String> logPaths,
|
||||||
Schema readSchema,
|
Schema readSchema,
|
||||||
String instant) {
|
String instant) {
|
||||||
return HoodieMergedLogRecordScanner.newBuilder()
|
HoodieMergedLogRecordScanner.Builder logRecordScannerBuilder = HoodieMergedLogRecordScanner.newBuilder()
|
||||||
.withFileSystem(fs)
|
.withFileSystem(fs)
|
||||||
.withBasePath(basePath)
|
.withBasePath(basePath)
|
||||||
.withLogFilePaths(logPaths)
|
.withLogFilePaths(logPaths)
|
||||||
@@ -673,8 +675,12 @@ public class TestData {
|
|||||||
.withMaxMemorySizeInBytes(1024 * 1024L)
|
.withMaxMemorySizeInBytes(1024 * 1024L)
|
||||||
.withSpillableMapBasePath("/tmp/")
|
.withSpillableMapBasePath("/tmp/")
|
||||||
.withDiskMapType(HoodieCommonConfig.SPILLABLE_DISK_MAP_TYPE.defaultValue())
|
.withDiskMapType(HoodieCommonConfig.SPILLABLE_DISK_MAP_TYPE.defaultValue())
|
||||||
.withBitCaskDiskMapCompressionEnabled(HoodieCommonConfig.DISK_MAP_BITCASK_COMPRESSION_ENABLED.defaultValue())
|
.withBitCaskDiskMapCompressionEnabled(HoodieCommonConfig.DISK_MAP_BITCASK_COMPRESSION_ENABLED.defaultValue());
|
||||||
.build();
|
if (!isNullOrEmpty(logPaths)) {
|
||||||
|
logRecordScannerBuilder
|
||||||
|
.withPartition(getRelativePartitionPath(new Path(basePath), new Path(logPaths.get(0)).getParent()));
|
||||||
|
}
|
||||||
|
return logRecordScannerBuilder.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -18,12 +18,6 @@
|
|||||||
|
|
||||||
package org.apache.hudi.hadoop.realtime;
|
package org.apache.hudi.hadoop.realtime;
|
||||||
|
|
||||||
import org.apache.avro.generic.GenericRecord;
|
|
||||||
import org.apache.hadoop.io.ArrayWritable;
|
|
||||||
import org.apache.hadoop.io.NullWritable;
|
|
||||||
import org.apache.hadoop.io.Writable;
|
|
||||||
import org.apache.hadoop.mapred.JobConf;
|
|
||||||
import org.apache.hadoop.mapred.RecordReader;
|
|
||||||
import org.apache.hudi.avro.HoodieAvroUtils;
|
import org.apache.hudi.avro.HoodieAvroUtils;
|
||||||
import org.apache.hudi.common.config.HoodieCommonConfig;
|
import org.apache.hudi.common.config.HoodieCommonConfig;
|
||||||
import org.apache.hudi.common.fs.FSUtils;
|
import org.apache.hudi.common.fs.FSUtils;
|
||||||
@@ -35,15 +29,27 @@ import org.apache.hudi.common.util.Option;
|
|||||||
import org.apache.hudi.hadoop.config.HoodieRealtimeConfig;
|
import org.apache.hudi.hadoop.config.HoodieRealtimeConfig;
|
||||||
import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils;
|
import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils;
|
||||||
import org.apache.hudi.hadoop.utils.HoodieRealtimeRecordReaderUtils;
|
import org.apache.hudi.hadoop.utils.HoodieRealtimeRecordReaderUtils;
|
||||||
|
|
||||||
|
import org.apache.avro.generic.GenericRecord;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.io.ArrayWritable;
|
||||||
|
import org.apache.hadoop.io.NullWritable;
|
||||||
|
import org.apache.hadoop.io.Writable;
|
||||||
|
import org.apache.hadoop.mapred.JobConf;
|
||||||
|
import org.apache.hadoop.mapred.RecordReader;
|
||||||
import org.apache.log4j.LogManager;
|
import org.apache.log4j.LogManager;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import static org.apache.hudi.common.fs.FSUtils.getRelativePartitionPath;
|
||||||
|
import static org.apache.hudi.common.util.CollectionUtils.isNullOrEmpty;
|
||||||
|
|
||||||
class RealtimeCompactedRecordReader extends AbstractRealtimeRecordReader
|
class RealtimeCompactedRecordReader extends AbstractRealtimeRecordReader
|
||||||
implements RecordReader<NullWritable, ArrayWritable> {
|
implements RecordReader<NullWritable, ArrayWritable> {
|
||||||
|
|
||||||
@@ -77,10 +83,11 @@ class RealtimeCompactedRecordReader extends AbstractRealtimeRecordReader
|
|||||||
// NOTE: HoodieCompactedLogRecordScanner will not return records for an in-flight commit
|
// NOTE: HoodieCompactedLogRecordScanner will not return records for an in-flight commit
|
||||||
// but can return records for completed commits > the commit we are trying to read (if using
|
// but can return records for completed commits > the commit we are trying to read (if using
|
||||||
// readCommit() API)
|
// readCommit() API)
|
||||||
return HoodieMergedLogRecordScanner.newBuilder()
|
List<String> logPaths = split.getDeltaLogPaths();
|
||||||
|
HoodieMergedLogRecordScanner.Builder logRecordScannerBuilder = HoodieMergedLogRecordScanner.newBuilder()
|
||||||
.withFileSystem(FSUtils.getFs(split.getPath().toString(), jobConf))
|
.withFileSystem(FSUtils.getFs(split.getPath().toString(), jobConf))
|
||||||
.withBasePath(split.getBasePath())
|
.withBasePath(split.getBasePath())
|
||||||
.withLogFilePaths(split.getDeltaLogPaths())
|
.withLogFilePaths(logPaths)
|
||||||
.withReaderSchema(usesCustomPayload ? getWriterSchema() : getReaderSchema())
|
.withReaderSchema(usesCustomPayload ? getWriterSchema() : getReaderSchema())
|
||||||
.withLatestInstantTime(split.getMaxCommitTime())
|
.withLatestInstantTime(split.getMaxCommitTime())
|
||||||
.withMaxMemorySizeInBytes(HoodieRealtimeRecordReaderUtils.getMaxCompactionMemoryInBytes(jobConf))
|
.withMaxMemorySizeInBytes(HoodieRealtimeRecordReaderUtils.getMaxCompactionMemoryInBytes(jobConf))
|
||||||
@@ -90,8 +97,12 @@ class RealtimeCompactedRecordReader extends AbstractRealtimeRecordReader
|
|||||||
.withSpillableMapBasePath(jobConf.get(HoodieRealtimeConfig.SPILLABLE_MAP_BASE_PATH_PROP, HoodieRealtimeConfig.DEFAULT_SPILLABLE_MAP_BASE_PATH))
|
.withSpillableMapBasePath(jobConf.get(HoodieRealtimeConfig.SPILLABLE_MAP_BASE_PATH_PROP, HoodieRealtimeConfig.DEFAULT_SPILLABLE_MAP_BASE_PATH))
|
||||||
.withDiskMapType(jobConf.getEnum(HoodieCommonConfig.SPILLABLE_DISK_MAP_TYPE.key(), HoodieCommonConfig.SPILLABLE_DISK_MAP_TYPE.defaultValue()))
|
.withDiskMapType(jobConf.getEnum(HoodieCommonConfig.SPILLABLE_DISK_MAP_TYPE.key(), HoodieCommonConfig.SPILLABLE_DISK_MAP_TYPE.defaultValue()))
|
||||||
.withBitCaskDiskMapCompressionEnabled(jobConf.getBoolean(HoodieCommonConfig.DISK_MAP_BITCASK_COMPRESSION_ENABLED.key(),
|
.withBitCaskDiskMapCompressionEnabled(jobConf.getBoolean(HoodieCommonConfig.DISK_MAP_BITCASK_COMPRESSION_ENABLED.key(),
|
||||||
HoodieCommonConfig.DISK_MAP_BITCASK_COMPRESSION_ENABLED.defaultValue()))
|
HoodieCommonConfig.DISK_MAP_BITCASK_COMPRESSION_ENABLED.defaultValue()));
|
||||||
.build();
|
if (!isNullOrEmpty(logPaths)) {
|
||||||
|
logRecordScannerBuilder
|
||||||
|
.withPartition(getRelativePartitionPath(new Path(split.getBasePath()), new Path(logPaths.get(0)).getParent()));
|
||||||
|
}
|
||||||
|
return logRecordScannerBuilder.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
private Option<GenericRecord> buildGenericRecordwithCustomPayload(HoodieRecord record) throws IOException {
|
private Option<GenericRecord> buildGenericRecordwithCustomPayload(HoodieRecord record) throws IOException {
|
||||||
|
|||||||
Reference in New Issue
Block a user