1
0

General enhancements

This commit is contained in:
arukavytsia
2018-12-12 03:19:43 +02:00
committed by vinoth chandar
parent 30c5f8b7bd
commit 6946dd7557
46 changed files with 402 additions and 373 deletions

View File

@@ -21,10 +21,7 @@ package com.uber.hoodie.common.model;
import com.uber.hoodie.common.table.HoodieTimeline;
import com.uber.hoodie.common.table.timeline.HoodieInstant;
import java.io.Serializable;
import java.util.Comparator;
import java.util.List;
import java.util.Optional;
import java.util.TreeMap;
import java.util.*;
import java.util.stream.Stream;
/**
@@ -33,10 +30,7 @@ import java.util.stream.Stream;
public class HoodieFileGroup implements Serializable {
public static Comparator<String> getReverseCommitTimeComparator() {
return (o1, o2) -> {
// reverse the order
return o2.compareTo(o1);
};
return Comparator.reverseOrder();
}
/**
@@ -127,7 +121,7 @@ public class HoodieFileGroup implements Serializable {
* Get all the the file slices including in-flight ones as seen in underlying file-system
*/
public Stream<FileSlice> getAllFileSlicesIncludingInflight() {
return fileSlices.entrySet().stream().map(sliceEntry -> sliceEntry.getValue());
return fileSlices.entrySet().stream().map(Map.Entry::getValue);
}
/**
@@ -143,8 +137,8 @@ public class HoodieFileGroup implements Serializable {
public Stream<FileSlice> getAllFileSlices() {
if (!timeline.empty()) {
return fileSlices.entrySet().stream()
.map(sliceEntry -> sliceEntry.getValue())
.filter(slice -> isFileSliceCommitted(slice));
.map(Map.Entry::getValue)
.filter(this::isFileSliceCommitted);
}
return Stream.empty();
}

View File

@@ -103,7 +103,7 @@ public class HoodieLogFile implements Serializable {
return new Integer(o2.getLogVersion()).compareTo(o1.getLogVersion());
}
// reverse the order by base-commits
return new Integer(baseInstantTime2.compareTo(baseInstantTime1));
return baseInstantTime2.compareTo(baseInstantTime1);
};
}

View File

@@ -53,14 +53,13 @@ public enum StorageSchemes {
}
public static boolean isSchemeSupported(String scheme) {
return Arrays.stream(values()).filter(s -> s.getScheme().equals(scheme)).count() > 0;
return Arrays.stream(values()).anyMatch(s -> s.getScheme().equals(scheme));
}
public static boolean isAppendSupported(String scheme) {
if (!isSchemeSupported(scheme)) {
throw new IllegalArgumentException("Unsupported scheme :" + scheme);
}
return Arrays.stream(StorageSchemes.values())
.filter(s -> s.supportsAppend() && s.scheme.equals(scheme)).count() > 0;
return Arrays.stream(StorageSchemes.values()).anyMatch(s -> s.supportsAppend() && s.scheme.equals(scheme));
}
}

View File

@@ -95,27 +95,24 @@ public class HoodieTableConfig implements Serializable {
fs.mkdirs(metadataFolder);
}
Path propertyPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE);
FSDataOutputStream outputStream = fs.create(propertyPath);
try {
try (FSDataOutputStream outputStream = fs.create(propertyPath)) {
if (!properties.containsKey(HOODIE_TABLE_NAME_PROP_NAME)) {
throw new IllegalArgumentException(
HOODIE_TABLE_NAME_PROP_NAME + " property needs to be specified");
HOODIE_TABLE_NAME_PROP_NAME + " property needs to be specified");
}
if (!properties.containsKey(HOODIE_TABLE_TYPE_PROP_NAME)) {
properties.setProperty(HOODIE_TABLE_TYPE_PROP_NAME, DEFAULT_TABLE_TYPE.name());
}
if (properties.getProperty(HOODIE_TABLE_TYPE_PROP_NAME) == HoodieTableType.MERGE_ON_READ
.name()
&& !properties.containsKey(HOODIE_PAYLOAD_CLASS_PROP_NAME)) {
.name()
&& !properties.containsKey(HOODIE_PAYLOAD_CLASS_PROP_NAME)) {
properties.setProperty(HOODIE_PAYLOAD_CLASS_PROP_NAME, DEFAULT_PAYLOAD_CLASS);
}
if (!properties.containsKey(HOODIE_ARCHIVELOG_FOLDER_PROP_NAME)) {
properties.setProperty(HOODIE_ARCHIVELOG_FOLDER_PROP_NAME, DEFAULT_ARCHIVELOG_FOLDER);
}
properties
.store(outputStream, "Properties saved on " + new Date(System.currentTimeMillis()));
} finally {
outputStream.close();
.store(outputStream, "Properties saved on " + new Date(System.currentTimeMillis()));
}
}

View File

@@ -57,7 +57,7 @@ public class HoodieCorruptBlock extends HoodieLogBlock {
long blockSize,
long blockEndPos,
Map<HeaderMetadataType, String> header,
Map<HeaderMetadataType, String> footer) throws IOException {
Map<HeaderMetadataType, String> footer) {
return new HoodieCorruptBlock(corruptedBytes, inputStream, readBlockLazily,
Optional.of(new HoodieLogBlockContentLocation(logFile, position, blockSize, blockEndPos)),

View File

@@ -91,10 +91,10 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
List<HoodieInstant> pendingCompactionInstants =
metaClient.getActiveTimeline().filterPendingCompactionTimeline().getInstants().collect(Collectors.toList());
this.fileIdToPendingCompaction = ImmutableMap.copyOf(
CompactionUtils.getAllPendingCompactionOperations(metaClient).entrySet().stream().map(entry -> {
return Pair.of(entry.getKey(), Pair.of(entry.getValue().getKey(),
CompactionOperation.convertFromAvroRecordInstance(entry.getValue().getValue())));
}).collect(Collectors.toMap(Pair::getKey, Pair::getValue)));
CompactionUtils.getAllPendingCompactionOperations(metaClient).entrySet().stream()
.map(entry -> Pair.of(entry.getKey(), Pair.of(entry.getValue().getKey(),
CompactionOperation.convertFromAvroRecordInstance(entry.getValue().getValue()))))
.collect(Collectors.toMap(Pair::getKey, Pair::getValue)));
}
/**
@@ -152,10 +152,10 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
String fileId = pair.getValue();
HoodieFileGroup group = new HoodieFileGroup(pair.getKey(), fileId, visibleActiveTimeline);
if (dataFiles.containsKey(pair)) {
dataFiles.get(pair).forEach(dataFile -> group.addDataFile(dataFile));
dataFiles.get(pair).forEach(group::addDataFile);
}
if (logFiles.containsKey(pair)) {
logFiles.get(pair).forEach(logFile -> group.addLogFile(logFile));
logFiles.get(pair).forEach(group::addLogFile);
}
if (fileIdToPendingCompaction.containsKey(fileId)) {
// If there is no delta-commit after compaction request, this step would ensure a new file-slice appears
@@ -219,9 +219,7 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
@Override
public Stream<HoodieDataFile> getLatestDataFiles() {
return fileGroupMap.values().stream()
.map(fileGroup -> {
return fileGroup.getAllDataFiles().filter(df -> !isDataFileDueToPendingCompaction(df)).findFirst();
})
.map(fileGroup -> fileGroup.getAllDataFiles().filter(df -> !isDataFileDueToPendingCompaction(df)).findFirst())
.filter(Optional::isPresent)
.map(Optional::get);
}
@@ -230,15 +228,13 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
public Stream<HoodieDataFile> getLatestDataFilesBeforeOrOn(String partitionPath,
String maxCommitTime) {
return getAllFileGroups(partitionPath)
.map(fileGroup -> {
return fileGroup.getAllDataFiles()
.filter(dataFile ->
HoodieTimeline.compareTimestamps(dataFile.getCommitTime(),
maxCommitTime,
HoodieTimeline.LESSER_OR_EQUAL))
.filter(df -> !isDataFileDueToPendingCompaction(df))
.findFirst();
})
.map(fileGroup -> fileGroup.getAllDataFiles()
.filter(dataFile ->
HoodieTimeline.compareTimestamps(dataFile.getCommitTime(),
maxCommitTime,
HoodieTimeline.LESSER_OR_EQUAL))
.filter(df -> !isDataFileDueToPendingCompaction(df))
.findFirst())
.filter(Optional::isPresent)
.map(Optional::get);
}
@@ -246,12 +242,10 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
@Override
public Stream<HoodieDataFile> getLatestDataFilesInRange(List<String> commitsToReturn) {
return fileGroupMap.values().stream()
.map(fileGroup -> {
return fileGroup.getAllDataFiles()
.filter(dataFile -> commitsToReturn.contains(dataFile.getCommitTime())
&& !isDataFileDueToPendingCompaction(dataFile))
.findFirst();
})
.map(fileGroup -> fileGroup.getAllDataFiles()
.filter(dataFile -> commitsToReturn.contains(dataFile.getCommitTime())
&& !isDataFileDueToPendingCompaction(dataFile))
.findFirst())
.filter(Optional::isPresent)
.map(Optional::get);
}
@@ -259,15 +253,13 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
@Override
public Stream<HoodieDataFile> getLatestDataFilesOn(String partitionPath, String instantTime) {
return getAllFileGroups(partitionPath)
.map(fileGroup -> {
return fileGroup.getAllDataFiles()
.filter(dataFile ->
HoodieTimeline.compareTimestamps(dataFile.getCommitTime(),
instantTime,
HoodieTimeline.EQUAL))
.filter(df -> !isDataFileDueToPendingCompaction(df))
.findFirst();
})
.map(fileGroup -> fileGroup.getAllDataFiles()
.filter(dataFile ->
HoodieTimeline.compareTimestamps(dataFile.getCommitTime(),
instantTime,
HoodieTimeline.EQUAL))
.filter(df -> !isDataFileDueToPendingCompaction(df))
.findFirst())
.filter(Optional::isPresent)
.map(Optional::get);
}
@@ -275,7 +267,7 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
@Override
public Stream<HoodieDataFile> getAllDataFiles(String partitionPath) {
return getAllFileGroups(partitionPath)
.map(fileGroup -> fileGroup.getAllDataFiles())
.map(HoodieFileGroup::getAllDataFiles)
.flatMap(dataFileList -> dataFileList)
.filter(df -> !isDataFileDueToPendingCompaction(df));
}
@@ -283,7 +275,7 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
@Override
public Stream<FileSlice> getLatestFileSlices(String partitionPath) {
return getAllFileGroups(partitionPath)
.map(fileGroup -> fileGroup.getLatestFileSlice())
.map(HoodieFileGroup::getLatestFileSlice)
.filter(Optional::isPresent)
.map(Optional::get)
.map(this::filterDataFileAfterPendingCompaction);
@@ -312,11 +304,8 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
*/
private boolean isFileSliceAfterPendingCompaction(FileSlice fileSlice) {
Pair<String, CompactionOperation> compactionWithInstantTime = fileIdToPendingCompaction.get(fileSlice.getFileId());
if ((null != compactionWithInstantTime)
&& fileSlice.getBaseInstantTime().equals(compactionWithInstantTime.getKey())) {
return true;
}
return false;
return (null != compactionWithInstantTime)
&& fileSlice.getBaseInstantTime().equals(compactionWithInstantTime.getKey());
}
/**
@@ -330,7 +319,7 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
// Data file is filtered out of the file-slice as the corresponding compaction
// instant not completed yet.
FileSlice transformed = new FileSlice(fileSlice.getBaseInstantTime(), fileSlice.getFileId());
fileSlice.getLogFiles().forEach(lf -> transformed.addLogFile(lf));
fileSlice.getLogFiles().forEach(transformed::addLogFile);
return transformed;
}
return fileSlice;
@@ -358,8 +347,8 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
merged.setDataFile(penultimateSlice.getDataFile().get());
}
// Add Log files from penultimate and last slices
penultimateSlice.getLogFiles().forEach(lf -> merged.addLogFile(lf));
lastSlice.getLogFiles().forEach(lf -> merged.addLogFile(lf));
penultimateSlice.getLogFiles().forEach(merged::addLogFile);
lastSlice.getLogFiles().forEach(merged::addLogFile);
return merged;
}
@@ -409,7 +398,7 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
@Override
public Stream<FileSlice> getAllFileSlices(String partitionPath) {
return getAllFileGroups(partitionPath)
.map(group -> group.getAllFileSlices())
.map(HoodieFileGroup::getAllFileSlices)
.flatMap(sliceList -> sliceList);
}

View File

@@ -80,10 +80,8 @@ public class ParquetUtils {
Schema readSchema = HoodieAvroUtils.getRecordKeySchema();
AvroReadSupport.setAvroReadSchema(conf, readSchema);
AvroReadSupport.setRequestedProjection(conf, readSchema);
ParquetReader reader = null;
Set<String> rowKeys = new HashSet<>();
try {
reader = AvroParquetReader.builder(filePath).withConf(conf).build();
try (ParquetReader reader = AvroParquetReader.builder(filePath).withConf(conf).build()) {
Object obj = reader.read();
while (obj != null) {
if (obj instanceof GenericRecord) {
@@ -97,15 +95,8 @@ public class ParquetUtils {
} catch (IOException e) {
throw new HoodieIOException("Failed to read row keys from Parquet " + filePath, e);
} finally {
if (reader != null) {
try {
reader.close();
} catch (IOException e) {
// ignore
}
}
}
// ignore
return rowKeys;
}
@@ -212,10 +203,7 @@ public class ParquetUtils {
@Override
public Boolean apply(String recordKey) {
if (candidateKeys.contains(recordKey)) {
return true;
}
return false;
return candidateKeys.contains(recordKey);
}
}
}

View File

@@ -83,7 +83,7 @@ public class ReflectionUtils {
*/
public static Object loadClass(String clazz, Object... constructorArgs) {
Class<?>[] constructorArgTypes = Arrays.stream(constructorArgs)
.map(arg -> arg.getClass()).toArray(Class<?>[]::new);
.map(Object::getClass).toArray(Class<?>[]::new);
return loadClass(clazz, constructorArgTypes, constructorArgs);
}