General enhancements
This commit is contained in:
committed by
vinoth chandar
parent
30c5f8b7bd
commit
6946dd7557
@@ -21,10 +21,7 @@ package com.uber.hoodie.common.model;
|
||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||
import java.io.Serializable;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.TreeMap;
|
||||
import java.util.*;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
/**
|
||||
@@ -33,10 +30,7 @@ import java.util.stream.Stream;
|
||||
public class HoodieFileGroup implements Serializable {
|
||||
|
||||
public static Comparator<String> getReverseCommitTimeComparator() {
|
||||
return (o1, o2) -> {
|
||||
// reverse the order
|
||||
return o2.compareTo(o1);
|
||||
};
|
||||
return Comparator.reverseOrder();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -127,7 +121,7 @@ public class HoodieFileGroup implements Serializable {
|
||||
* Get all the the file slices including in-flight ones as seen in underlying file-system
|
||||
*/
|
||||
public Stream<FileSlice> getAllFileSlicesIncludingInflight() {
|
||||
return fileSlices.entrySet().stream().map(sliceEntry -> sliceEntry.getValue());
|
||||
return fileSlices.entrySet().stream().map(Map.Entry::getValue);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -143,8 +137,8 @@ public class HoodieFileGroup implements Serializable {
|
||||
public Stream<FileSlice> getAllFileSlices() {
|
||||
if (!timeline.empty()) {
|
||||
return fileSlices.entrySet().stream()
|
||||
.map(sliceEntry -> sliceEntry.getValue())
|
||||
.filter(slice -> isFileSliceCommitted(slice));
|
||||
.map(Map.Entry::getValue)
|
||||
.filter(this::isFileSliceCommitted);
|
||||
}
|
||||
return Stream.empty();
|
||||
}
|
||||
|
||||
@@ -103,7 +103,7 @@ public class HoodieLogFile implements Serializable {
|
||||
return new Integer(o2.getLogVersion()).compareTo(o1.getLogVersion());
|
||||
}
|
||||
// reverse the order by base-commits
|
||||
return new Integer(baseInstantTime2.compareTo(baseInstantTime1));
|
||||
return baseInstantTime2.compareTo(baseInstantTime1);
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -53,14 +53,13 @@ public enum StorageSchemes {
|
||||
}
|
||||
|
||||
public static boolean isSchemeSupported(String scheme) {
|
||||
return Arrays.stream(values()).filter(s -> s.getScheme().equals(scheme)).count() > 0;
|
||||
return Arrays.stream(values()).anyMatch(s -> s.getScheme().equals(scheme));
|
||||
}
|
||||
|
||||
public static boolean isAppendSupported(String scheme) {
|
||||
if (!isSchemeSupported(scheme)) {
|
||||
throw new IllegalArgumentException("Unsupported scheme :" + scheme);
|
||||
}
|
||||
return Arrays.stream(StorageSchemes.values())
|
||||
.filter(s -> s.supportsAppend() && s.scheme.equals(scheme)).count() > 0;
|
||||
return Arrays.stream(StorageSchemes.values()).anyMatch(s -> s.supportsAppend() && s.scheme.equals(scheme));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -95,27 +95,24 @@ public class HoodieTableConfig implements Serializable {
|
||||
fs.mkdirs(metadataFolder);
|
||||
}
|
||||
Path propertyPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE);
|
||||
FSDataOutputStream outputStream = fs.create(propertyPath);
|
||||
try {
|
||||
try (FSDataOutputStream outputStream = fs.create(propertyPath)) {
|
||||
if (!properties.containsKey(HOODIE_TABLE_NAME_PROP_NAME)) {
|
||||
throw new IllegalArgumentException(
|
||||
HOODIE_TABLE_NAME_PROP_NAME + " property needs to be specified");
|
||||
HOODIE_TABLE_NAME_PROP_NAME + " property needs to be specified");
|
||||
}
|
||||
if (!properties.containsKey(HOODIE_TABLE_TYPE_PROP_NAME)) {
|
||||
properties.setProperty(HOODIE_TABLE_TYPE_PROP_NAME, DEFAULT_TABLE_TYPE.name());
|
||||
}
|
||||
if (properties.getProperty(HOODIE_TABLE_TYPE_PROP_NAME) == HoodieTableType.MERGE_ON_READ
|
||||
.name()
|
||||
&& !properties.containsKey(HOODIE_PAYLOAD_CLASS_PROP_NAME)) {
|
||||
.name()
|
||||
&& !properties.containsKey(HOODIE_PAYLOAD_CLASS_PROP_NAME)) {
|
||||
properties.setProperty(HOODIE_PAYLOAD_CLASS_PROP_NAME, DEFAULT_PAYLOAD_CLASS);
|
||||
}
|
||||
if (!properties.containsKey(HOODIE_ARCHIVELOG_FOLDER_PROP_NAME)) {
|
||||
properties.setProperty(HOODIE_ARCHIVELOG_FOLDER_PROP_NAME, DEFAULT_ARCHIVELOG_FOLDER);
|
||||
}
|
||||
properties
|
||||
.store(outputStream, "Properties saved on " + new Date(System.currentTimeMillis()));
|
||||
} finally {
|
||||
outputStream.close();
|
||||
.store(outputStream, "Properties saved on " + new Date(System.currentTimeMillis()));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -57,7 +57,7 @@ public class HoodieCorruptBlock extends HoodieLogBlock {
|
||||
long blockSize,
|
||||
long blockEndPos,
|
||||
Map<HeaderMetadataType, String> header,
|
||||
Map<HeaderMetadataType, String> footer) throws IOException {
|
||||
Map<HeaderMetadataType, String> footer) {
|
||||
|
||||
return new HoodieCorruptBlock(corruptedBytes, inputStream, readBlockLazily,
|
||||
Optional.of(new HoodieLogBlockContentLocation(logFile, position, blockSize, blockEndPos)),
|
||||
|
||||
@@ -91,10 +91,10 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
|
||||
List<HoodieInstant> pendingCompactionInstants =
|
||||
metaClient.getActiveTimeline().filterPendingCompactionTimeline().getInstants().collect(Collectors.toList());
|
||||
this.fileIdToPendingCompaction = ImmutableMap.copyOf(
|
||||
CompactionUtils.getAllPendingCompactionOperations(metaClient).entrySet().stream().map(entry -> {
|
||||
return Pair.of(entry.getKey(), Pair.of(entry.getValue().getKey(),
|
||||
CompactionOperation.convertFromAvroRecordInstance(entry.getValue().getValue())));
|
||||
}).collect(Collectors.toMap(Pair::getKey, Pair::getValue)));
|
||||
CompactionUtils.getAllPendingCompactionOperations(metaClient).entrySet().stream()
|
||||
.map(entry -> Pair.of(entry.getKey(), Pair.of(entry.getValue().getKey(),
|
||||
CompactionOperation.convertFromAvroRecordInstance(entry.getValue().getValue()))))
|
||||
.collect(Collectors.toMap(Pair::getKey, Pair::getValue)));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -152,10 +152,10 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
|
||||
String fileId = pair.getValue();
|
||||
HoodieFileGroup group = new HoodieFileGroup(pair.getKey(), fileId, visibleActiveTimeline);
|
||||
if (dataFiles.containsKey(pair)) {
|
||||
dataFiles.get(pair).forEach(dataFile -> group.addDataFile(dataFile));
|
||||
dataFiles.get(pair).forEach(group::addDataFile);
|
||||
}
|
||||
if (logFiles.containsKey(pair)) {
|
||||
logFiles.get(pair).forEach(logFile -> group.addLogFile(logFile));
|
||||
logFiles.get(pair).forEach(group::addLogFile);
|
||||
}
|
||||
if (fileIdToPendingCompaction.containsKey(fileId)) {
|
||||
// If there is no delta-commit after compaction request, this step would ensure a new file-slice appears
|
||||
@@ -219,9 +219,7 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
|
||||
@Override
|
||||
public Stream<HoodieDataFile> getLatestDataFiles() {
|
||||
return fileGroupMap.values().stream()
|
||||
.map(fileGroup -> {
|
||||
return fileGroup.getAllDataFiles().filter(df -> !isDataFileDueToPendingCompaction(df)).findFirst();
|
||||
})
|
||||
.map(fileGroup -> fileGroup.getAllDataFiles().filter(df -> !isDataFileDueToPendingCompaction(df)).findFirst())
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get);
|
||||
}
|
||||
@@ -230,15 +228,13 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
|
||||
public Stream<HoodieDataFile> getLatestDataFilesBeforeOrOn(String partitionPath,
|
||||
String maxCommitTime) {
|
||||
return getAllFileGroups(partitionPath)
|
||||
.map(fileGroup -> {
|
||||
return fileGroup.getAllDataFiles()
|
||||
.filter(dataFile ->
|
||||
HoodieTimeline.compareTimestamps(dataFile.getCommitTime(),
|
||||
maxCommitTime,
|
||||
HoodieTimeline.LESSER_OR_EQUAL))
|
||||
.filter(df -> !isDataFileDueToPendingCompaction(df))
|
||||
.findFirst();
|
||||
})
|
||||
.map(fileGroup -> fileGroup.getAllDataFiles()
|
||||
.filter(dataFile ->
|
||||
HoodieTimeline.compareTimestamps(dataFile.getCommitTime(),
|
||||
maxCommitTime,
|
||||
HoodieTimeline.LESSER_OR_EQUAL))
|
||||
.filter(df -> !isDataFileDueToPendingCompaction(df))
|
||||
.findFirst())
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get);
|
||||
}
|
||||
@@ -246,12 +242,10 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
|
||||
@Override
|
||||
public Stream<HoodieDataFile> getLatestDataFilesInRange(List<String> commitsToReturn) {
|
||||
return fileGroupMap.values().stream()
|
||||
.map(fileGroup -> {
|
||||
return fileGroup.getAllDataFiles()
|
||||
.filter(dataFile -> commitsToReturn.contains(dataFile.getCommitTime())
|
||||
&& !isDataFileDueToPendingCompaction(dataFile))
|
||||
.findFirst();
|
||||
})
|
||||
.map(fileGroup -> fileGroup.getAllDataFiles()
|
||||
.filter(dataFile -> commitsToReturn.contains(dataFile.getCommitTime())
|
||||
&& !isDataFileDueToPendingCompaction(dataFile))
|
||||
.findFirst())
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get);
|
||||
}
|
||||
@@ -259,15 +253,13 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
|
||||
@Override
|
||||
public Stream<HoodieDataFile> getLatestDataFilesOn(String partitionPath, String instantTime) {
|
||||
return getAllFileGroups(partitionPath)
|
||||
.map(fileGroup -> {
|
||||
return fileGroup.getAllDataFiles()
|
||||
.filter(dataFile ->
|
||||
HoodieTimeline.compareTimestamps(dataFile.getCommitTime(),
|
||||
instantTime,
|
||||
HoodieTimeline.EQUAL))
|
||||
.filter(df -> !isDataFileDueToPendingCompaction(df))
|
||||
.findFirst();
|
||||
})
|
||||
.map(fileGroup -> fileGroup.getAllDataFiles()
|
||||
.filter(dataFile ->
|
||||
HoodieTimeline.compareTimestamps(dataFile.getCommitTime(),
|
||||
instantTime,
|
||||
HoodieTimeline.EQUAL))
|
||||
.filter(df -> !isDataFileDueToPendingCompaction(df))
|
||||
.findFirst())
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get);
|
||||
}
|
||||
@@ -275,7 +267,7 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
|
||||
@Override
|
||||
public Stream<HoodieDataFile> getAllDataFiles(String partitionPath) {
|
||||
return getAllFileGroups(partitionPath)
|
||||
.map(fileGroup -> fileGroup.getAllDataFiles())
|
||||
.map(HoodieFileGroup::getAllDataFiles)
|
||||
.flatMap(dataFileList -> dataFileList)
|
||||
.filter(df -> !isDataFileDueToPendingCompaction(df));
|
||||
}
|
||||
@@ -283,7 +275,7 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
|
||||
@Override
|
||||
public Stream<FileSlice> getLatestFileSlices(String partitionPath) {
|
||||
return getAllFileGroups(partitionPath)
|
||||
.map(fileGroup -> fileGroup.getLatestFileSlice())
|
||||
.map(HoodieFileGroup::getLatestFileSlice)
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get)
|
||||
.map(this::filterDataFileAfterPendingCompaction);
|
||||
@@ -312,11 +304,8 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
|
||||
*/
|
||||
private boolean isFileSliceAfterPendingCompaction(FileSlice fileSlice) {
|
||||
Pair<String, CompactionOperation> compactionWithInstantTime = fileIdToPendingCompaction.get(fileSlice.getFileId());
|
||||
if ((null != compactionWithInstantTime)
|
||||
&& fileSlice.getBaseInstantTime().equals(compactionWithInstantTime.getKey())) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
return (null != compactionWithInstantTime)
|
||||
&& fileSlice.getBaseInstantTime().equals(compactionWithInstantTime.getKey());
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -330,7 +319,7 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
|
||||
// Data file is filtered out of the file-slice as the corresponding compaction
|
||||
// instant not completed yet.
|
||||
FileSlice transformed = new FileSlice(fileSlice.getBaseInstantTime(), fileSlice.getFileId());
|
||||
fileSlice.getLogFiles().forEach(lf -> transformed.addLogFile(lf));
|
||||
fileSlice.getLogFiles().forEach(transformed::addLogFile);
|
||||
return transformed;
|
||||
}
|
||||
return fileSlice;
|
||||
@@ -358,8 +347,8 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
|
||||
merged.setDataFile(penultimateSlice.getDataFile().get());
|
||||
}
|
||||
// Add Log files from penultimate and last slices
|
||||
penultimateSlice.getLogFiles().forEach(lf -> merged.addLogFile(lf));
|
||||
lastSlice.getLogFiles().forEach(lf -> merged.addLogFile(lf));
|
||||
penultimateSlice.getLogFiles().forEach(merged::addLogFile);
|
||||
lastSlice.getLogFiles().forEach(merged::addLogFile);
|
||||
return merged;
|
||||
}
|
||||
|
||||
@@ -409,7 +398,7 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
|
||||
@Override
|
||||
public Stream<FileSlice> getAllFileSlices(String partitionPath) {
|
||||
return getAllFileGroups(partitionPath)
|
||||
.map(group -> group.getAllFileSlices())
|
||||
.map(HoodieFileGroup::getAllFileSlices)
|
||||
.flatMap(sliceList -> sliceList);
|
||||
}
|
||||
|
||||
|
||||
@@ -80,10 +80,8 @@ public class ParquetUtils {
|
||||
Schema readSchema = HoodieAvroUtils.getRecordKeySchema();
|
||||
AvroReadSupport.setAvroReadSchema(conf, readSchema);
|
||||
AvroReadSupport.setRequestedProjection(conf, readSchema);
|
||||
ParquetReader reader = null;
|
||||
Set<String> rowKeys = new HashSet<>();
|
||||
try {
|
||||
reader = AvroParquetReader.builder(filePath).withConf(conf).build();
|
||||
try (ParquetReader reader = AvroParquetReader.builder(filePath).withConf(conf).build()) {
|
||||
Object obj = reader.read();
|
||||
while (obj != null) {
|
||||
if (obj instanceof GenericRecord) {
|
||||
@@ -97,15 +95,8 @@ public class ParquetUtils {
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Failed to read row keys from Parquet " + filePath, e);
|
||||
|
||||
} finally {
|
||||
if (reader != null) {
|
||||
try {
|
||||
reader.close();
|
||||
} catch (IOException e) {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
}
|
||||
// ignore
|
||||
return rowKeys;
|
||||
}
|
||||
|
||||
@@ -212,10 +203,7 @@ public class ParquetUtils {
|
||||
|
||||
@Override
|
||||
public Boolean apply(String recordKey) {
|
||||
if (candidateKeys.contains(recordKey)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
return candidateKeys.contains(recordKey);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -83,7 +83,7 @@ public class ReflectionUtils {
|
||||
*/
|
||||
public static Object loadClass(String clazz, Object... constructorArgs) {
|
||||
Class<?>[] constructorArgTypes = Arrays.stream(constructorArgs)
|
||||
.map(arg -> arg.getClass()).toArray(Class<?>[]::new);
|
||||
.map(Object::getClass).toArray(Class<?>[]::new);
|
||||
return loadClass(clazz, constructorArgTypes, constructorArgs);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user