1
0

Introduce ReadOptimizedView & RealtimeView out of TableFileSystemView

- Usage now marks code as clearly using either RO or RT views, for future evolution
  - Tests on all of FileGroups and FileSlices
This commit is contained in:
Vinoth Chandar
2017-06-19 17:16:45 -07:00
committed by prazanna
parent c00f1a9ed9
commit 754ab88a2d
16 changed files with 264 additions and 166 deletions

View File

@@ -82,7 +82,7 @@ public class FileSlice implements Serializable {
}
public Optional<HoodieDataFile> getDataFile() {
return Optional.of(dataFile);
return Optional.ofNullable(dataFile);
}
@Override

View File

@@ -30,91 +30,69 @@ import java.util.stream.Stream;
/**
* Interface for viewing the table file system.
* Dependening on the Hoodie Table Type - The view of the filesystem changes.
* <p>
* ReadOptimizedView - Lets queries run only on organized columnar data files at the expense of latency
* WriteOptimizedView - Lets queries run on columnar data as well as delta files (sequential) at the expense of query execution time
*
* @since 0.3.0
*/
public interface TableFileSystemView {
/**
* Stream all the latest data files in the given partition
*
* @param partitionPath
* @return
* ReadOptimizedView - methods to provide a view of columnar data files only.
*/
Stream<HoodieDataFile> getLatestDataFiles(String partitionPath);
interface ReadOptimizedView {
/**
* Stream all the latest data files in the given partition
*/
Stream<HoodieDataFile> getLatestDataFiles(String partitionPath);
/**
* Stream all the latest data files, in the file system view
*/
Stream<HoodieDataFile> getLatestDataFiles();
/**
* Stream all the latest version data files in the given partition with precondition that
* commitTime(file) before maxCommitTime
*/
Stream<HoodieDataFile> getLatestDataFilesBeforeOrOn(String partitionPath,
String maxCommitTime);
/**
* Stream all the latest data files pass
*/
Stream<HoodieDataFile> getLatestDataFilesInRange(List<String> commitsToReturn);
/**
* Stream all the data file versions grouped by FileId for a given partition
*/
Stream<HoodieDataFile> getAllDataFiles(String partitionPath);
}
/**
* Stream all the latest data files, in the file system view
*
* @return
* RealtimeView - methods to access a combination of columnar data files + log files with real time data.
*/
Stream<HoodieDataFile> getLatestDataFiles();
interface RealtimeView {
/**
* Stream all the latest file slices in the given partition
*/
Stream<FileSlice> getLatestFileSlices(String partitionPath);
/**
* Stream all the latest version data files in the given partition
* with precondition that commitTime(file) before maxCommitTime
*
* @param partitionPath
* @param maxCommitTime
* @return
*/
Stream<HoodieDataFile> getLatestDataFilesBeforeOrOn(String partitionPath,
/**
* Stream all the latest file slices in the given partition with precondition that
* commitTime(file) before maxCommitTime
*/
Stream<FileSlice> getLatestFileSlicesBeforeOrOn(String partitionPath,
String maxCommitTime);
/**
* Stream all the latest data files pass
*
* @param commitsToReturn
* @return
*/
Stream<HoodieDataFile> getLatestDataFilesInRange(List<String> commitsToReturn);
/**
* Stream all the latest file slices, in the given range
*/
Stream<FileSlice> getLatestFileSliceInRange(List<String> commitsToReturn);
/**
* Stream all the data file versions grouped by FileId for a given partition
*
* @param partitionPath
* @return
*/
Stream<HoodieDataFile> getAllDataFiles(String partitionPath);
/**
* Stream all the latest file slices in the given partition
*
* @param partitionPath
* @return
*/
Stream<FileSlice> getLatestFileSlices(String partitionPath);
/**
* Stream all the latest file slices in the given partition
* with precondition that commitTime(file) before maxCommitTime
*
* @param partitionPath
* @param maxCommitTime
* @return
*/
Stream<FileSlice> getLatestFileSlicesBeforeOrOn(String partitionPath,
String maxCommitTime);
/**
* Stream all the latest file slices, in the given range
*
* @param commitsToReturn
* @return
*/
Stream<FileSlice> getLatestFileSliceInRange(List<String> commitsToReturn);
/**
* Stream all the file slices for a given partition, latest or not.
*
* @param partitionPath
* @return
*/
Stream<FileSlice> getAllFileSlices(String partitionPath);
/**
* Stream all the file slices for a given partition, latest or not.
*/
Stream<FileSlice> getAllFileSlices(String partitionPath);
}
/**
* Stream all the file groups for a given partition
@@ -123,12 +101,4 @@ public interface TableFileSystemView {
* @return
*/
Stream<HoodieFileGroup> getAllFileGroups(String partitionPath);
/**
* Get the file Status for the path specified
*
* @param path
* @return
*/
FileStatus getFileStatus(String path);
}

View File

@@ -16,8 +16,6 @@
package com.uber.hoodie.common.table.view;
import static java.util.stream.Collectors.toList;
import com.uber.hoodie.common.model.FileSlice;
import com.uber.hoodie.common.model.HoodieDataFile;
import com.uber.hoodie.common.model.HoodieFileGroup;
@@ -44,7 +42,6 @@ import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.function.Predicate;
import java.util.stream.Collector;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@@ -57,7 +54,8 @@ import java.util.stream.Stream;
* @see TableFileSystemView
* @since 0.3.0
*/
public class HoodieTableFileSystemView implements TableFileSystemView, Serializable {
public class HoodieTableFileSystemView implements TableFileSystemView, TableFileSystemView.ReadOptimizedView,
TableFileSystemView.RealtimeView, Serializable {
protected HoodieTableMetaClient metaClient;
protected transient FileSystem fs;
@@ -187,7 +185,7 @@ public class HoodieTableFileSystemView implements TableFileSystemView, Serializa
@Override
public Stream<HoodieDataFile> getLatestDataFiles() {
return fileGroupMap.values().stream()
return fileGroupMap.values().stream()
.map(fileGroup -> fileGroup.getLatestDataFile())
.filter(dataFileOpt -> dataFileOpt.isPresent())
.map(Optional::get);
@@ -271,13 +269,4 @@ public class HoodieTableFileSystemView implements TableFileSystemView, Serializa
"Failed to list data files in partition " + partitionPathStr, e);
}
}
@Override
public FileStatus getFileStatus(String path) {
try {
return fs.getFileStatus(new Path(path));
} catch (IOException e) {
throw new HoodieIOException("Could not get FileStatus on path " + path);
}
}
}