[HUDI-509] Renaming code in sync with cWiki restructuring (#1212)
- Storage Type replaced with Table Type (remaining instances) - View types replaced with query types; - ReadOptimized view referred as Snapshot Query - TableFileSystemView sub interfaces renamed to BaseFileOnly and Slice Views - HoodieDataFile renamed to HoodieBaseFile - Hive Sync tool will register RO tables for MOR with a `_ro` suffix - Datasource/Deltastreamer options renamed accordingly - Support fallback to old config values as well, so migration is painless - Config for controlling _ro suffix addition - Renaming DataFile to BaseFile across DTOs, HoodieFileSlice and AbstractTableFileSystemView
This commit is contained in:
@@ -60,7 +60,7 @@ public class CompactionOperation implements Serializable {
|
||||
this.metrics = metrics;
|
||||
}
|
||||
|
||||
public CompactionOperation(Option<HoodieDataFile> dataFile, String partitionPath, List<HoodieLogFile> logFiles,
|
||||
public CompactionOperation(Option<HoodieBaseFile> dataFile, String partitionPath, List<HoodieLogFile> logFiles,
|
||||
Map<String, Double> metrics) {
|
||||
if (dataFile.isPresent()) {
|
||||
this.baseInstantTime = dataFile.get().getCommitTime();
|
||||
@@ -111,9 +111,9 @@ public class CompactionOperation implements Serializable {
|
||||
return id;
|
||||
}
|
||||
|
||||
public Option<HoodieDataFile> getBaseFile(String basePath, String partitionPath) {
|
||||
public Option<HoodieBaseFile> getBaseFile(String basePath, String partitionPath) {
|
||||
Path dirPath = FSUtils.getPartitionPath(basePath, partitionPath);
|
||||
return dataFileName.map(df -> new HoodieDataFile(new Path(dirPath, df).toString()));
|
||||
return dataFileName.map(df -> new HoodieBaseFile(new Path(dirPath, df).toString()));
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -44,7 +44,7 @@ public class FileSlice implements Serializable {
|
||||
/**
|
||||
* data file, with the compacted data, for this slice.
|
||||
*/
|
||||
private HoodieDataFile dataFile;
|
||||
private HoodieBaseFile baseFile;
|
||||
|
||||
/**
|
||||
* List of appendable log files with real time data - Sorted with greater log version first - Always empty for
|
||||
@@ -59,12 +59,12 @@ public class FileSlice implements Serializable {
|
||||
public FileSlice(HoodieFileGroupId fileGroupId, String baseInstantTime) {
|
||||
this.fileGroupId = fileGroupId;
|
||||
this.baseInstantTime = baseInstantTime;
|
||||
this.dataFile = null;
|
||||
this.baseFile = null;
|
||||
this.logFiles = new TreeSet<>(HoodieLogFile.getReverseLogFileComparator());
|
||||
}
|
||||
|
||||
public void setDataFile(HoodieDataFile dataFile) {
|
||||
this.dataFile = dataFile;
|
||||
public void setBaseFile(HoodieBaseFile baseFile) {
|
||||
this.baseFile = baseFile;
|
||||
}
|
||||
|
||||
public void addLogFile(HoodieLogFile logFile) {
|
||||
@@ -91,8 +91,8 @@ public class FileSlice implements Serializable {
|
||||
return fileGroupId;
|
||||
}
|
||||
|
||||
public Option<HoodieDataFile> getDataFile() {
|
||||
return Option.ofNullable(dataFile);
|
||||
public Option<HoodieBaseFile> getBaseFile() {
|
||||
return Option.ofNullable(baseFile);
|
||||
}
|
||||
|
||||
public Option<HoodieLogFile> getLatestLogFile() {
|
||||
@@ -105,7 +105,7 @@ public class FileSlice implements Serializable {
|
||||
* @return
|
||||
*/
|
||||
public boolean isEmpty() {
|
||||
return (dataFile == null) && (logFiles.isEmpty());
|
||||
return (baseFile == null) && (logFiles.isEmpty());
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -113,7 +113,7 @@ public class FileSlice implements Serializable {
|
||||
final StringBuilder sb = new StringBuilder("FileSlice {");
|
||||
sb.append("fileGroupId=").append(fileGroupId);
|
||||
sb.append(", baseCommitTime=").append(baseInstantTime);
|
||||
sb.append(", dataFile='").append(dataFile).append('\'');
|
||||
sb.append(", baseFile='").append(baseFile).append('\'');
|
||||
sb.append(", logFiles='").append(logFiles).append('\'');
|
||||
sb.append('}');
|
||||
return sb.toString();
|
||||
@@ -129,7 +129,7 @@ public class FileSlice implements Serializable {
|
||||
}
|
||||
FileSlice slice = (FileSlice) o;
|
||||
return Objects.equals(fileGroupId, slice.fileGroupId) && Objects.equals(baseInstantTime, slice.baseInstantTime)
|
||||
&& Objects.equals(dataFile, slice.dataFile) && Objects.equals(logFiles, slice.logFiles);
|
||||
&& Objects.equals(baseFile, slice.baseFile) && Objects.equals(logFiles, slice.logFiles);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@@ -27,21 +27,21 @@ import java.io.Serializable;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Hoodie data file.
|
||||
* Hoodie base file.
|
||||
*/
|
||||
public class HoodieDataFile implements Serializable {
|
||||
public class HoodieBaseFile implements Serializable {
|
||||
|
||||
private transient FileStatus fileStatus;
|
||||
private final String fullPath;
|
||||
private long fileLen;
|
||||
|
||||
public HoodieDataFile(FileStatus fileStatus) {
|
||||
public HoodieBaseFile(FileStatus fileStatus) {
|
||||
this.fileStatus = fileStatus;
|
||||
this.fullPath = fileStatus.getPath().toString();
|
||||
this.fileLen = fileStatus.getLen();
|
||||
}
|
||||
|
||||
public HoodieDataFile(String filePath) {
|
||||
public HoodieBaseFile(String filePath) {
|
||||
this.fileStatus = null;
|
||||
this.fullPath = filePath;
|
||||
this.fileLen = -1;
|
||||
@@ -87,7 +87,7 @@ public class HoodieDataFile implements Serializable {
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
HoodieDataFile dataFile = (HoodieDataFile) o;
|
||||
HoodieBaseFile dataFile = (HoodieBaseFile) o;
|
||||
return Objects.equals(fullPath, dataFile.fullPath);
|
||||
}
|
||||
|
||||
@@ -82,11 +82,11 @@ public class HoodieFileGroup implements Serializable {
|
||||
/**
|
||||
* Add a new datafile into the file group.
|
||||
*/
|
||||
public void addDataFile(HoodieDataFile dataFile) {
|
||||
public void addBaseFile(HoodieBaseFile dataFile) {
|
||||
if (!fileSlices.containsKey(dataFile.getCommitTime())) {
|
||||
fileSlices.put(dataFile.getCommitTime(), new FileSlice(fileGroupId, dataFile.getCommitTime()));
|
||||
}
|
||||
fileSlices.get(dataFile.getCommitTime()).setDataFile(dataFile);
|
||||
fileSlices.get(dataFile.getCommitTime()).setBaseFile(dataFile);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -155,8 +155,8 @@ public class HoodieFileGroup implements Serializable {
|
||||
/**
|
||||
* Gets the latest data file.
|
||||
*/
|
||||
public Option<HoodieDataFile> getLatestDataFile() {
|
||||
return Option.fromJavaOptional(getAllDataFiles().findFirst());
|
||||
public Option<HoodieBaseFile> getLatestDataFile() {
|
||||
return Option.fromJavaOptional(getAllBaseFiles().findFirst());
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -187,8 +187,8 @@ public class HoodieFileGroup implements Serializable {
|
||||
/**
|
||||
* Stream of committed data files, sorted reverse commit time.
|
||||
*/
|
||||
public Stream<HoodieDataFile> getAllDataFiles() {
|
||||
return getAllFileSlices().filter(slice -> slice.getDataFile().isPresent()).map(slice -> slice.getDataFile().get());
|
||||
public Stream<HoodieBaseFile> getAllBaseFiles() {
|
||||
return getAllFileSlices().filter(slice -> slice.getBaseFile().isPresent()).map(slice -> slice.getBaseFile().get());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@@ -27,9 +27,6 @@ package org.apache.hudi.common.model;
|
||||
* <p>
|
||||
* MERGE_ON_READ - Speeds up upserts, by delaying merge until enough work piles up.
|
||||
* <p>
|
||||
* In the future, following might be added.
|
||||
* <p>
|
||||
* SIMPLE_LSM - A simple 2 level LSM tree.
|
||||
*/
|
||||
public enum HoodieTableType {
|
||||
COPY_ON_WRITE, MERGE_ON_READ
|
||||
|
||||
@@ -53,15 +53,19 @@ public class HoodieTableConfig implements Serializable {
|
||||
public static final String HOODIE_PROPERTIES_FILE = "hoodie.properties";
|
||||
public static final String HOODIE_TABLE_NAME_PROP_NAME = "hoodie.table.name";
|
||||
public static final String HOODIE_TABLE_TYPE_PROP_NAME = "hoodie.table.type";
|
||||
@Deprecated
|
||||
public static final String HOODIE_RO_FILE_FORMAT_PROP_NAME = "hoodie.table.ro.file.format";
|
||||
@Deprecated
|
||||
public static final String HOODIE_RT_FILE_FORMAT_PROP_NAME = "hoodie.table.rt.file.format";
|
||||
public static final String HOODIE_BASE_FILE_FORMAT_PROP_NAME = "hoodie.table.base.file.format";
|
||||
public static final String HOODIE_LOG_FILE_FORMAT_PROP_NAME = "hoodie.table.log.file.format";
|
||||
public static final String HOODIE_TIMELINE_LAYOUT_VERSION = "hoodie.timeline.layout.version";
|
||||
public static final String HOODIE_PAYLOAD_CLASS_PROP_NAME = "hoodie.compaction.payload.class";
|
||||
public static final String HOODIE_ARCHIVELOG_FOLDER_PROP_NAME = "hoodie.archivelog.folder";
|
||||
|
||||
public static final HoodieTableType DEFAULT_TABLE_TYPE = HoodieTableType.COPY_ON_WRITE;
|
||||
public static final HoodieFileFormat DEFAULT_RO_FILE_FORMAT = HoodieFileFormat.PARQUET;
|
||||
public static final HoodieFileFormat DEFAULT_RT_FILE_FORMAT = HoodieFileFormat.HOODIE_LOG;
|
||||
public static final HoodieFileFormat DEFAULT_BASE_FILE_FORMAT = HoodieFileFormat.PARQUET;
|
||||
public static final HoodieFileFormat DEFAULT_LOG_FILE_FORMAT = HoodieFileFormat.HOODIE_LOG;
|
||||
public static final String DEFAULT_PAYLOAD_CLASS = OverwriteWithLatestAvroPayload.class.getName();
|
||||
public static final Integer DEFAULT_TIMELINE_LAYOUT_VERSION = TimelineLayoutVersion.VERSION_0;
|
||||
public static final String DEFAULT_ARCHIVELOG_FOLDER = "";
|
||||
@@ -164,27 +168,33 @@ public class HoodieTableConfig implements Serializable {
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the Read Optimized Storage Format.
|
||||
* Get the base file storage format.
|
||||
*
|
||||
* @return HoodieFileFormat for the Read Optimized Storage format
|
||||
* @return HoodieFileFormat for the base file Storage format
|
||||
*/
|
||||
public HoodieFileFormat getROFileFormat() {
|
||||
public HoodieFileFormat getBaseFileFormat() {
|
||||
if (props.containsKey(HOODIE_BASE_FILE_FORMAT_PROP_NAME)) {
|
||||
return HoodieFileFormat.valueOf(props.getProperty(HOODIE_BASE_FILE_FORMAT_PROP_NAME));
|
||||
}
|
||||
if (props.containsKey(HOODIE_RO_FILE_FORMAT_PROP_NAME)) {
|
||||
return HoodieFileFormat.valueOf(props.getProperty(HOODIE_RO_FILE_FORMAT_PROP_NAME));
|
||||
}
|
||||
return DEFAULT_RO_FILE_FORMAT;
|
||||
return DEFAULT_BASE_FILE_FORMAT;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the Read Optimized Storage Format.
|
||||
* Get the log Storage Format.
|
||||
*
|
||||
* @return HoodieFileFormat for the Read Optimized Storage format
|
||||
* @return HoodieFileFormat for the log Storage format
|
||||
*/
|
||||
public HoodieFileFormat getRTFileFormat() {
|
||||
public HoodieFileFormat getLogFileFormat() {
|
||||
if (props.containsKey(HOODIE_LOG_FILE_FORMAT_PROP_NAME)) {
|
||||
return HoodieFileFormat.valueOf(props.getProperty(HOODIE_LOG_FILE_FORMAT_PROP_NAME));
|
||||
}
|
||||
if (props.containsKey(HOODIE_RT_FILE_FORMAT_PROP_NAME)) {
|
||||
return HoodieFileFormat.valueOf(props.getProperty(HOODIE_RT_FILE_FORMAT_PROP_NAME));
|
||||
}
|
||||
return DEFAULT_RT_FILE_FORMAT;
|
||||
return DEFAULT_LOG_FILE_FORMAT;
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -299,7 +299,7 @@ public class HoodieTableMetaClient implements Serializable {
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method to initialize a given path, as a given storage type and table name.
|
||||
* Helper method to initialize a given path, as a given type and table name.
|
||||
*/
|
||||
public static HoodieTableMetaClient initTableType(Configuration hadoopConf, String basePath,
|
||||
HoodieTableType tableType, String tableName, String payloadClassName) throws IOException {
|
||||
@@ -437,7 +437,7 @@ public class HoodieTableMetaClient implements Serializable {
|
||||
case MERGE_ON_READ:
|
||||
return HoodieActiveTimeline.DELTA_COMMIT_ACTION;
|
||||
default:
|
||||
throw new HoodieException("Could not commit on unknown storage type " + this.getTableType());
|
||||
throw new HoodieException("Could not commit on unknown table type " + this.getTableType());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -18,12 +18,15 @@
|
||||
|
||||
package org.apache.hudi.common.table;
|
||||
|
||||
import org.apache.hudi.common.table.TableFileSystemView.BaseFileOnlyView;
|
||||
import org.apache.hudi.common.table.TableFileSystemView.SliceView;
|
||||
|
||||
/**
|
||||
* A consolidated file-system view interface exposing both realtime and read-optimized views along with
|
||||
* A consolidated file-system view interface exposing both complete slice and basefile only views along with
|
||||
* update operations.
|
||||
*/
|
||||
public interface SyncableFileSystemView
|
||||
extends TableFileSystemView, TableFileSystemView.ReadOptimizedView, TableFileSystemView.RealtimeView {
|
||||
extends TableFileSystemView, BaseFileOnlyView, SliceView {
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ package org.apache.hudi.common.table;
|
||||
|
||||
import org.apache.hudi.common.model.CompactionOperation;
|
||||
import org.apache.hudi.common.model.FileSlice;
|
||||
import org.apache.hudi.common.model.HoodieDataFile;
|
||||
import org.apache.hudi.common.model.HoodieBaseFile;
|
||||
import org.apache.hudi.common.model.HoodieFileGroup;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
@@ -37,57 +37,57 @@ import java.util.stream.Stream;
|
||||
public interface TableFileSystemView {
|
||||
|
||||
/**
|
||||
* ReadOptimizedView with methods to only access latest version of file for the instant(s) passed.
|
||||
* Methods to only access latest version of file for the instant(s) passed.
|
||||
*/
|
||||
interface ReadOptimizedViewWithLatestSlice {
|
||||
interface BaseFileOnlyViewWithLatestSlice {
|
||||
|
||||
/**
|
||||
* Stream all the latest data files in the given partition.
|
||||
*/
|
||||
Stream<HoodieDataFile> getLatestDataFiles(String partitionPath);
|
||||
Stream<HoodieBaseFile> getLatestBaseFiles(String partitionPath);
|
||||
|
||||
/**
|
||||
* Get Latest data file for a partition and file-Id.
|
||||
*/
|
||||
Option<HoodieDataFile> getLatestDataFile(String partitionPath, String fileId);
|
||||
Option<HoodieBaseFile> getLatestBaseFile(String partitionPath, String fileId);
|
||||
|
||||
/**
|
||||
* Stream all the latest data files, in the file system view.
|
||||
*/
|
||||
Stream<HoodieDataFile> getLatestDataFiles();
|
||||
Stream<HoodieBaseFile> getLatestBaseFiles();
|
||||
|
||||
/**
|
||||
* Stream all the latest version data files in the given partition with precondition that commitTime(file) before
|
||||
* maxCommitTime.
|
||||
*/
|
||||
Stream<HoodieDataFile> getLatestDataFilesBeforeOrOn(String partitionPath, String maxCommitTime);
|
||||
Stream<HoodieBaseFile> getLatestBaseFilesBeforeOrOn(String partitionPath, String maxCommitTime);
|
||||
|
||||
/**
|
||||
* Stream all the latest data files pass.
|
||||
*/
|
||||
Stream<HoodieDataFile> getLatestDataFilesInRange(List<String> commitsToReturn);
|
||||
Stream<HoodieBaseFile> getLatestBaseFilesInRange(List<String> commitsToReturn);
|
||||
}
|
||||
|
||||
/**
|
||||
* ReadOptimizedView - methods to provide a view of columnar data files only.
|
||||
* Methods to provide a view of base files only.
|
||||
*/
|
||||
interface ReadOptimizedView extends ReadOptimizedViewWithLatestSlice {
|
||||
interface BaseFileOnlyView extends BaseFileOnlyViewWithLatestSlice {
|
||||
/**
|
||||
* Stream all the data file versions grouped by FileId for a given partition.
|
||||
*/
|
||||
Stream<HoodieDataFile> getAllDataFiles(String partitionPath);
|
||||
Stream<HoodieBaseFile> getAllBaseFiles(String partitionPath);
|
||||
|
||||
/**
|
||||
* Get the version of data file matching the instant time in the given partition.
|
||||
*/
|
||||
Option<HoodieDataFile> getDataFileOn(String partitionPath, String instantTime, String fileId);
|
||||
Option<HoodieBaseFile> getBaseFileOn(String partitionPath, String instantTime, String fileId);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* RealtimeView with methods to only access latest version of file-slice for the instant(s) passed.
|
||||
* Methods to only access latest version of file-slice for the instant(s) passed.
|
||||
*/
|
||||
interface RealtimeViewWithLatestSlice {
|
||||
interface SliceViewWithLatestSlice {
|
||||
|
||||
/**
|
||||
* Stream all the latest file slices in the given partition.
|
||||
@@ -131,9 +131,9 @@ public interface TableFileSystemView {
|
||||
}
|
||||
|
||||
/**
|
||||
* RealtimeView - methods to access a combination of columnar data files + log files with real time data.
|
||||
* Methods to access a combination of base files + log file slices.
|
||||
*/
|
||||
interface RealtimeView extends RealtimeViewWithLatestSlice {
|
||||
interface SliceView extends SliceViewWithLatestSlice {
|
||||
|
||||
/**
|
||||
* Stream all the file slices for a given partition, latest or not.
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
|
||||
package org.apache.hudi.common.table.timeline.dto;
|
||||
|
||||
import org.apache.hudi.common.model.HoodieDataFile;
|
||||
import org.apache.hudi.common.model.HoodieBaseFile;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
@@ -27,7 +27,7 @@ import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
* The data transfer object of data file.
|
||||
*/
|
||||
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||
public class DataFileDTO {
|
||||
public class BaseFileDTO {
|
||||
|
||||
@JsonProperty("fileStatus")
|
||||
private FileStatusDTO fileStatus;
|
||||
@@ -36,27 +36,27 @@ public class DataFileDTO {
|
||||
@JsonProperty("fileLen")
|
||||
private long fileLen;
|
||||
|
||||
public static HoodieDataFile toHoodieDataFile(DataFileDTO dto) {
|
||||
public static HoodieBaseFile toHoodieBaseFile(BaseFileDTO dto) {
|
||||
if (null == dto) {
|
||||
return null;
|
||||
}
|
||||
|
||||
HoodieDataFile dataFile = null;
|
||||
HoodieBaseFile baseFile;
|
||||
if (null != dto.fileStatus) {
|
||||
dataFile = new HoodieDataFile(FileStatusDTO.toFileStatus(dto.fileStatus));
|
||||
baseFile = new HoodieBaseFile(FileStatusDTO.toFileStatus(dto.fileStatus));
|
||||
} else {
|
||||
dataFile = new HoodieDataFile(dto.fullPath);
|
||||
dataFile.setFileLen(dto.fileLen);
|
||||
baseFile = new HoodieBaseFile(dto.fullPath);
|
||||
baseFile.setFileLen(dto.fileLen);
|
||||
}
|
||||
return dataFile;
|
||||
return baseFile;
|
||||
}
|
||||
|
||||
public static DataFileDTO fromHoodieDataFile(HoodieDataFile dataFile) {
|
||||
public static BaseFileDTO fromHoodieBaseFile(HoodieBaseFile dataFile) {
|
||||
if (null == dataFile) {
|
||||
return null;
|
||||
}
|
||||
|
||||
DataFileDTO dto = new DataFileDTO();
|
||||
BaseFileDTO dto = new BaseFileDTO();
|
||||
dto.fileStatus = FileStatusDTO.fromFileStatus(dataFile.getFileStatus());
|
||||
dto.fullPath = dataFile.getPath();
|
||||
dto.fileLen = dataFile.getFileLen();
|
||||
@@ -32,8 +32,8 @@ import java.util.stream.Collectors;
|
||||
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||
public class FileSliceDTO {
|
||||
|
||||
@JsonProperty("dataFile")
|
||||
DataFileDTO dataFile;
|
||||
@JsonProperty("baseFile")
|
||||
BaseFileDTO baseFile;
|
||||
@JsonProperty("logFiles")
|
||||
List<LogFileDTO> logFiles;
|
||||
@JsonProperty("partition")
|
||||
@@ -48,14 +48,14 @@ public class FileSliceDTO {
|
||||
dto.partitionPath = slice.getPartitionPath();
|
||||
dto.baseInstantTime = slice.getBaseInstantTime();
|
||||
dto.fileId = slice.getFileId();
|
||||
dto.dataFile = slice.getDataFile().map(DataFileDTO::fromHoodieDataFile).orElse(null);
|
||||
dto.baseFile = slice.getBaseFile().map(BaseFileDTO::fromHoodieBaseFile).orElse(null);
|
||||
dto.logFiles = slice.getLogFiles().map(LogFileDTO::fromHoodieLogFile).collect(Collectors.toList());
|
||||
return dto;
|
||||
}
|
||||
|
||||
public static FileSlice toFileSlice(FileSliceDTO dto) {
|
||||
FileSlice slice = new FileSlice(dto.partitionPath, dto.baseInstantTime, dto.fileId);
|
||||
slice.setDataFile(DataFileDTO.toHoodieDataFile(dto.dataFile));
|
||||
slice.setBaseFile(BaseFileDTO.toHoodieBaseFile(dto.baseFile));
|
||||
dto.logFiles.stream().forEach(lf -> slice.addLogFile(LogFileDTO.toHoodieLogFile(lf)));
|
||||
return slice;
|
||||
}
|
||||
|
||||
@@ -20,7 +20,7 @@ package org.apache.hudi.common.table.view;
|
||||
|
||||
import org.apache.hudi.common.model.CompactionOperation;
|
||||
import org.apache.hudi.common.model.FileSlice;
|
||||
import org.apache.hudi.common.model.HoodieDataFile;
|
||||
import org.apache.hudi.common.model.HoodieBaseFile;
|
||||
import org.apache.hudi.common.model.HoodieFileGroup;
|
||||
import org.apache.hudi.common.model.HoodieFileGroupId;
|
||||
import org.apache.hudi.common.model.HoodieLogFile;
|
||||
@@ -133,16 +133,16 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
*/
|
||||
protected List<HoodieFileGroup> buildFileGroups(FileStatus[] statuses, HoodieTimeline timeline,
|
||||
boolean addPendingCompactionFileSlice) {
|
||||
return buildFileGroups(convertFileStatusesToDataFiles(statuses), convertFileStatusesToLogFiles(statuses), timeline,
|
||||
return buildFileGroups(convertFileStatusesToBaseFiles(statuses), convertFileStatusesToLogFiles(statuses), timeline,
|
||||
addPendingCompactionFileSlice);
|
||||
}
|
||||
|
||||
protected List<HoodieFileGroup> buildFileGroups(Stream<HoodieDataFile> dataFileStream,
|
||||
protected List<HoodieFileGroup> buildFileGroups(Stream<HoodieBaseFile> baseFileStream,
|
||||
Stream<HoodieLogFile> logFileStream, HoodieTimeline timeline, boolean addPendingCompactionFileSlice) {
|
||||
Map<Pair<String, String>, List<HoodieDataFile>> dataFiles =
|
||||
dataFileStream.collect(Collectors.groupingBy((dataFile) -> {
|
||||
String partitionPathStr = getPartitionPathFromFilePath(dataFile.getPath());
|
||||
return Pair.of(partitionPathStr, dataFile.getFileId());
|
||||
Map<Pair<String, String>, List<HoodieBaseFile>> baseFiles =
|
||||
baseFileStream.collect(Collectors.groupingBy((baseFile) -> {
|
||||
String partitionPathStr = getPartitionPathFromFilePath(baseFile.getPath());
|
||||
return Pair.of(partitionPathStr, baseFile.getFileId());
|
||||
}));
|
||||
|
||||
Map<Pair<String, String>, List<HoodieLogFile>> logFiles = logFileStream.collect(Collectors.groupingBy((logFile) -> {
|
||||
@@ -151,15 +151,15 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
return Pair.of(partitionPathStr, logFile.getFileId());
|
||||
}));
|
||||
|
||||
Set<Pair<String, String>> fileIdSet = new HashSet<>(dataFiles.keySet());
|
||||
Set<Pair<String, String>> fileIdSet = new HashSet<>(baseFiles.keySet());
|
||||
fileIdSet.addAll(logFiles.keySet());
|
||||
|
||||
List<HoodieFileGroup> fileGroups = new ArrayList<>();
|
||||
fileIdSet.forEach(pair -> {
|
||||
String fileId = pair.getValue();
|
||||
HoodieFileGroup group = new HoodieFileGroup(pair.getKey(), fileId, timeline);
|
||||
if (dataFiles.containsKey(pair)) {
|
||||
dataFiles.get(pair).forEach(group::addDataFile);
|
||||
if (baseFiles.containsKey(pair)) {
|
||||
baseFiles.get(pair).forEach(group::addBaseFile);
|
||||
}
|
||||
if (logFiles.containsKey(pair)) {
|
||||
logFiles.get(pair).forEach(group::addLogFile);
|
||||
@@ -233,7 +233,7 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
storePartitionView(partitionPathStr, new ArrayList<>());
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Failed to list data files in partition " + partitionPathStr, e);
|
||||
throw new HoodieIOException("Failed to list base files in partition " + partitionPathStr, e);
|
||||
}
|
||||
} else {
|
||||
LOG.debug("View already built for Partition :" + partitionPathStr + ", FOUND is ");
|
||||
@@ -245,14 +245,14 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper to convert file-status to data-files.
|
||||
* Helper to convert file-status to base-files.
|
||||
*
|
||||
* @param statuses List of File-Status
|
||||
*/
|
||||
private Stream<HoodieDataFile> convertFileStatusesToDataFiles(FileStatus[] statuses) {
|
||||
private Stream<HoodieBaseFile> convertFileStatusesToBaseFiles(FileStatus[] statuses) {
|
||||
Predicate<FileStatus> roFilePredicate = fileStatus -> fileStatus.getPath().getName()
|
||||
.contains(metaClient.getTableConfig().getROFileFormat().getFileExtension());
|
||||
return Arrays.stream(statuses).filter(roFilePredicate).map(HoodieDataFile::new);
|
||||
.contains(metaClient.getTableConfig().getBaseFileFormat().getFileExtension());
|
||||
return Arrays.stream(statuses).filter(roFilePredicate).map(HoodieBaseFile::new);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -262,23 +262,23 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
*/
|
||||
private Stream<HoodieLogFile> convertFileStatusesToLogFiles(FileStatus[] statuses) {
|
||||
Predicate<FileStatus> rtFilePredicate = fileStatus -> fileStatus.getPath().getName()
|
||||
.contains(metaClient.getTableConfig().getRTFileFormat().getFileExtension());
|
||||
.contains(metaClient.getTableConfig().getLogFileFormat().getFileExtension());
|
||||
return Arrays.stream(statuses).filter(rtFilePredicate).map(HoodieLogFile::new);
|
||||
}
|
||||
|
||||
/**
|
||||
* With async compaction, it is possible to see partial/complete data-files due to inflight-compactions, Ignore those
|
||||
* data-files.
|
||||
* With async compaction, it is possible to see partial/complete base-files due to inflight-compactions, Ignore those
|
||||
* base-files.
|
||||
*
|
||||
* @param dataFile Data File
|
||||
* @param baseFile base File
|
||||
*/
|
||||
protected boolean isDataFileDueToPendingCompaction(HoodieDataFile dataFile) {
|
||||
final String partitionPath = getPartitionPathFromFilePath(dataFile.getPath());
|
||||
protected boolean isBaseFileDueToPendingCompaction(HoodieBaseFile baseFile) {
|
||||
final String partitionPath = getPartitionPathFromFilePath(baseFile.getPath());
|
||||
|
||||
Option<Pair<String, CompactionOperation>> compactionWithInstantTime =
|
||||
getPendingCompactionOperationWithInstant(new HoodieFileGroupId(partitionPath, dataFile.getFileId()));
|
||||
getPendingCompactionOperationWithInstant(new HoodieFileGroupId(partitionPath, baseFile.getFileId()));
|
||||
return (compactionWithInstantTime.isPresent()) && (null != compactionWithInstantTime.get().getKey())
|
||||
&& dataFile.getCommitTime().equals(compactionWithInstantTime.get().getKey());
|
||||
&& baseFile.getCommitTime().equals(compactionWithInstantTime.get().getKey());
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -296,15 +296,15 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
}
|
||||
|
||||
/**
|
||||
* With async compaction, it is possible to see partial/complete data-files due to inflight-compactions, Ignore those
|
||||
* data-files.
|
||||
* With async compaction, it is possible to see partial/complete base-files due to inflight-compactions, Ignore those
|
||||
* base-files.
|
||||
*
|
||||
* @param fileSlice File Slice
|
||||
*/
|
||||
protected FileSlice filterDataFileAfterPendingCompaction(FileSlice fileSlice) {
|
||||
protected FileSlice filterBaseFileAfterPendingCompaction(FileSlice fileSlice) {
|
||||
if (isFileSliceAfterPendingCompaction(fileSlice)) {
|
||||
LOG.info("File Slice (" + fileSlice + ") is in pending compaction");
|
||||
// Data file is filtered out of the file-slice as the corresponding compaction
|
||||
// Base file is filtered out of the file-slice as the corresponding compaction
|
||||
// instant not completed yet.
|
||||
FileSlice transformed =
|
||||
new FileSlice(fileSlice.getPartitionPath(), fileSlice.getBaseInstantTime(), fileSlice.getFileId());
|
||||
@@ -325,38 +325,38 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
}
|
||||
|
||||
@Override
|
||||
public final Stream<HoodieDataFile> getLatestDataFiles(String partitionStr) {
|
||||
public final Stream<HoodieBaseFile> getLatestBaseFiles(String partitionStr) {
|
||||
try {
|
||||
readLock.lock();
|
||||
String partitionPath = formatPartitionKey(partitionStr);
|
||||
ensurePartitionLoadedCorrectly(partitionPath);
|
||||
return fetchLatestDataFiles(partitionPath);
|
||||
return fetchLatestBaseFiles(partitionPath);
|
||||
} finally {
|
||||
readLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public final Stream<HoodieDataFile> getLatestDataFiles() {
|
||||
public final Stream<HoodieBaseFile> getLatestBaseFiles() {
|
||||
try {
|
||||
readLock.lock();
|
||||
return fetchLatestDataFiles();
|
||||
return fetchLatestBaseFiles();
|
||||
} finally {
|
||||
readLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public final Stream<HoodieDataFile> getLatestDataFilesBeforeOrOn(String partitionStr, String maxCommitTime) {
|
||||
public final Stream<HoodieBaseFile> getLatestBaseFilesBeforeOrOn(String partitionStr, String maxCommitTime) {
|
||||
try {
|
||||
readLock.lock();
|
||||
String partitionPath = formatPartitionKey(partitionStr);
|
||||
ensurePartitionLoadedCorrectly(partitionPath);
|
||||
return fetchAllStoredFileGroups(partitionPath)
|
||||
.map(fileGroup -> Option.fromJavaOptional(fileGroup.getAllDataFiles()
|
||||
.filter(dataFile -> HoodieTimeline.compareTimestamps(dataFile.getCommitTime(), maxCommitTime,
|
||||
.map(fileGroup -> Option.fromJavaOptional(fileGroup.getAllBaseFiles()
|
||||
.filter(baseFile -> HoodieTimeline.compareTimestamps(baseFile.getCommitTime(), maxCommitTime,
|
||||
HoodieTimeline.LESSER_OR_EQUAL))
|
||||
.filter(df -> !isDataFileDueToPendingCompaction(df)).findFirst()))
|
||||
.filter(df -> !isBaseFileDueToPendingCompaction(df)).findFirst()))
|
||||
.filter(Option::isPresent).map(Option::get);
|
||||
} finally {
|
||||
readLock.unlock();
|
||||
@@ -364,43 +364,43 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
}
|
||||
|
||||
@Override
|
||||
public final Option<HoodieDataFile> getDataFileOn(String partitionStr, String instantTime, String fileId) {
|
||||
public final Option<HoodieBaseFile> getBaseFileOn(String partitionStr, String instantTime, String fileId) {
|
||||
try {
|
||||
readLock.lock();
|
||||
String partitionPath = formatPartitionKey(partitionStr);
|
||||
ensurePartitionLoadedCorrectly(partitionPath);
|
||||
return fetchHoodieFileGroup(partitionPath, fileId).map(fileGroup -> fileGroup.getAllDataFiles()
|
||||
return fetchHoodieFileGroup(partitionPath, fileId).map(fileGroup -> fileGroup.getAllBaseFiles()
|
||||
.filter(
|
||||
dataFile -> HoodieTimeline.compareTimestamps(dataFile.getCommitTime(), instantTime, HoodieTimeline.EQUAL))
|
||||
.filter(df -> !isDataFileDueToPendingCompaction(df)).findFirst().orElse(null));
|
||||
baseFile -> HoodieTimeline.compareTimestamps(baseFile.getCommitTime(), instantTime, HoodieTimeline.EQUAL))
|
||||
.filter(df -> !isBaseFileDueToPendingCompaction(df)).findFirst().orElse(null));
|
||||
} finally {
|
||||
readLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Latest data file for a partition and file-Id.
|
||||
* Get Latest base file for a partition and file-Id.
|
||||
*/
|
||||
@Override
|
||||
public final Option<HoodieDataFile> getLatestDataFile(String partitionStr, String fileId) {
|
||||
public final Option<HoodieBaseFile> getLatestBaseFile(String partitionStr, String fileId) {
|
||||
try {
|
||||
readLock.lock();
|
||||
String partitionPath = formatPartitionKey(partitionStr);
|
||||
ensurePartitionLoadedCorrectly(partitionPath);
|
||||
return fetchLatestDataFile(partitionPath, fileId);
|
||||
return fetchLatestBaseFile(partitionPath, fileId);
|
||||
} finally {
|
||||
readLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public final Stream<HoodieDataFile> getLatestDataFilesInRange(List<String> commitsToReturn) {
|
||||
public final Stream<HoodieBaseFile> getLatestBaseFilesInRange(List<String> commitsToReturn) {
|
||||
try {
|
||||
readLock.lock();
|
||||
return fetchAllStoredFileGroups().map(fileGroup -> {
|
||||
return Option.fromJavaOptional(
|
||||
fileGroup.getAllDataFiles().filter(dataFile -> commitsToReturn.contains(dataFile.getCommitTime())
|
||||
&& !isDataFileDueToPendingCompaction(dataFile)).findFirst());
|
||||
fileGroup.getAllBaseFiles().filter(baseFile -> commitsToReturn.contains(baseFile.getCommitTime())
|
||||
&& !isBaseFileDueToPendingCompaction(baseFile)).findFirst());
|
||||
}).filter(Option::isPresent).map(Option::get);
|
||||
} finally {
|
||||
readLock.unlock();
|
||||
@@ -408,14 +408,14 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
}
|
||||
|
||||
@Override
|
||||
public final Stream<HoodieDataFile> getAllDataFiles(String partitionStr) {
|
||||
public final Stream<HoodieBaseFile> getAllBaseFiles(String partitionStr) {
|
||||
try {
|
||||
readLock.lock();
|
||||
String partitionPath = formatPartitionKey(partitionStr);
|
||||
ensurePartitionLoadedCorrectly(partitionPath);
|
||||
return fetchAllDataFiles(partitionPath)
|
||||
return fetchAllBaseFiles(partitionPath)
|
||||
.filter(df -> visibleCommitsAndCompactionTimeline.containsOrBeforeTimelineStarts(df.getCommitTime()))
|
||||
.filter(df -> !isDataFileDueToPendingCompaction(df));
|
||||
.filter(df -> !isBaseFileDueToPendingCompaction(df));
|
||||
} finally {
|
||||
readLock.unlock();
|
||||
}
|
||||
@@ -427,7 +427,7 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
readLock.lock();
|
||||
String partitionPath = formatPartitionKey(partitionStr);
|
||||
ensurePartitionLoadedCorrectly(partitionPath);
|
||||
return fetchLatestFileSlices(partitionPath).map(fs -> filterDataFileAfterPendingCompaction(fs));
|
||||
return fetchLatestFileSlices(partitionPath).map(this::filterBaseFileAfterPendingCompaction);
|
||||
} finally {
|
||||
readLock.unlock();
|
||||
}
|
||||
@@ -443,7 +443,7 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
String partitionPath = formatPartitionKey(partitionStr);
|
||||
ensurePartitionLoadedCorrectly(partitionPath);
|
||||
Option<FileSlice> fs = fetchLatestFileSlice(partitionPath, fileId);
|
||||
return fs.map(f -> filterDataFileAfterPendingCompaction(f));
|
||||
return fs.map(f -> filterBaseFileAfterPendingCompaction(f));
|
||||
} finally {
|
||||
readLock.unlock();
|
||||
}
|
||||
@@ -480,7 +480,7 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
ensurePartitionLoadedCorrectly(partitionPath);
|
||||
Stream<FileSlice> fileSliceStream = fetchLatestFileSlicesBeforeOrOn(partitionPath, maxCommitTime);
|
||||
if (includeFileSlicesInPendingCompaction) {
|
||||
return fileSliceStream.map(fs -> filterDataFileAfterPendingCompaction(fs));
|
||||
return fileSliceStream.map(fs -> filterBaseFileAfterPendingCompaction(fs));
|
||||
} else {
|
||||
return fileSliceStream.filter(fs -> !isPendingCompactionScheduledForFileId(fs.getFileGroupId()));
|
||||
}
|
||||
@@ -653,33 +653,33 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
}
|
||||
|
||||
/**
|
||||
* Default implementation for fetching latest data-files for the partition-path.
|
||||
* Default implementation for fetching latest base-files for the partition-path.
|
||||
*/
|
||||
Stream<HoodieDataFile> fetchLatestDataFiles(final String partitionPath) {
|
||||
return fetchAllStoredFileGroups(partitionPath).map(this::getLatestDataFile).filter(Option::isPresent)
|
||||
Stream<HoodieBaseFile> fetchLatestBaseFiles(final String partitionPath) {
|
||||
return fetchAllStoredFileGroups(partitionPath).map(this::getLatestBaseFile).filter(Option::isPresent)
|
||||
.map(Option::get);
|
||||
}
|
||||
|
||||
protected Option<HoodieDataFile> getLatestDataFile(HoodieFileGroup fileGroup) {
|
||||
protected Option<HoodieBaseFile> getLatestBaseFile(HoodieFileGroup fileGroup) {
|
||||
return Option
|
||||
.fromJavaOptional(fileGroup.getAllDataFiles().filter(df -> !isDataFileDueToPendingCompaction(df)).findFirst());
|
||||
.fromJavaOptional(fileGroup.getAllBaseFiles().filter(df -> !isBaseFileDueToPendingCompaction(df)).findFirst());
|
||||
}
|
||||
|
||||
/**
|
||||
* Default implementation for fetching latest data-files across all partitions.
|
||||
* Default implementation for fetching latest base-files across all partitions.
|
||||
*/
|
||||
Stream<HoodieDataFile> fetchLatestDataFiles() {
|
||||
return fetchAllStoredFileGroups().map(this::getLatestDataFile).filter(Option::isPresent).map(Option::get);
|
||||
Stream<HoodieBaseFile> fetchLatestBaseFiles() {
|
||||
return fetchAllStoredFileGroups().map(this::getLatestBaseFile).filter(Option::isPresent).map(Option::get);
|
||||
}
|
||||
|
||||
/**
|
||||
* Default implementation for fetching all data-files for a partition.
|
||||
* Default implementation for fetching all base-files for a partition.
|
||||
*
|
||||
* @param partitionPath partition-path
|
||||
*/
|
||||
Stream<HoodieDataFile> fetchAllDataFiles(String partitionPath) {
|
||||
return fetchAllStoredFileGroups(partitionPath).map(HoodieFileGroup::getAllDataFiles)
|
||||
.flatMap(dataFileList -> dataFileList);
|
||||
Stream<HoodieBaseFile> fetchAllBaseFiles(String partitionPath) {
|
||||
return fetchAllStoredFileGroups(partitionPath).map(HoodieFileGroup::getAllBaseFiles)
|
||||
.flatMap(baseFileList -> baseFileList);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -719,8 +719,8 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
private static FileSlice mergeCompactionPendingFileSlices(FileSlice lastSlice, FileSlice penultimateSlice) {
|
||||
FileSlice merged = new FileSlice(penultimateSlice.getPartitionPath(), penultimateSlice.getBaseInstantTime(),
|
||||
penultimateSlice.getFileId());
|
||||
if (penultimateSlice.getDataFile().isPresent()) {
|
||||
merged.setDataFile(penultimateSlice.getDataFile().get());
|
||||
if (penultimateSlice.getBaseFile().isPresent()) {
|
||||
merged.setBaseFile(penultimateSlice.getBaseFile().get());
|
||||
}
|
||||
// Add Log files from penultimate and last slices
|
||||
penultimateSlice.getLogFiles().forEach(merged::addLogFile);
|
||||
@@ -752,15 +752,15 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
}
|
||||
|
||||
/**
|
||||
* Default implementation for fetching latest data-file.
|
||||
* Default implementation for fetching latest base-file.
|
||||
*
|
||||
* @param partitionPath Partition path
|
||||
* @param fileId File Id
|
||||
* @return Data File if present
|
||||
* @return base File if present
|
||||
*/
|
||||
protected Option<HoodieDataFile> fetchLatestDataFile(String partitionPath, String fileId) {
|
||||
return Option
|
||||
.fromJavaOptional(fetchLatestDataFiles(partitionPath).filter(fs -> fs.getFileId().equals(fileId)).findFirst());
|
||||
protected Option<HoodieBaseFile> fetchLatestBaseFile(String partitionPath, String fileId) {
|
||||
return Option.fromJavaOptional(fetchLatestBaseFiles(partitionPath)
|
||||
.filter(fs -> fs.getFileId().equals(fileId)).findFirst());
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -25,7 +25,7 @@ import org.apache.hudi.avro.model.HoodieRollbackMetadata;
|
||||
import org.apache.hudi.common.model.CompactionOperation;
|
||||
import org.apache.hudi.common.model.FileSlice;
|
||||
import org.apache.hudi.common.model.HoodieCommitMetadata;
|
||||
import org.apache.hudi.common.model.HoodieDataFile;
|
||||
import org.apache.hudi.common.model.HoodieBaseFile;
|
||||
import org.apache.hudi.common.model.HoodieFileGroup;
|
||||
import org.apache.hudi.common.model.HoodieLogFile;
|
||||
import org.apache.hudi.common.table.HoodieTimeline;
|
||||
@@ -318,13 +318,13 @@ public abstract class IncrementalTimelineSyncFileSystemView extends AbstractTabl
|
||||
* Note that while finding the new data/log files added/removed, the path stored in metadata will be missing the
|
||||
* base-path,scheme and authority. Ensure the matching process takes care of this discrepancy.
|
||||
*/
|
||||
Map<String, HoodieDataFile> viewDataFiles = fileGroups.stream().flatMap(HoodieFileGroup::getAllRawFileSlices)
|
||||
.map(FileSlice::getDataFile).filter(Option::isPresent).map(Option::get)
|
||||
Map<String, HoodieBaseFile> viewDataFiles = fileGroups.stream().flatMap(HoodieFileGroup::getAllRawFileSlices)
|
||||
.map(FileSlice::getBaseFile).filter(Option::isPresent).map(Option::get)
|
||||
.map(df -> Pair.of(Path.getPathWithoutSchemeAndAuthority(new Path(df.getPath())).toString(), df))
|
||||
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
|
||||
// Note: Delta Log Files and Data FIles can be empty when adding/removing pending compactions
|
||||
Map<String, HoodieDataFile> deltaDataFiles = deltaFileGroups.stream().flatMap(HoodieFileGroup::getAllRawFileSlices)
|
||||
.map(FileSlice::getDataFile).filter(Option::isPresent).map(Option::get)
|
||||
Map<String, HoodieBaseFile> deltaDataFiles = deltaFileGroups.stream().flatMap(HoodieFileGroup::getAllRawFileSlices)
|
||||
.map(FileSlice::getBaseFile).filter(Option::isPresent).map(Option::get)
|
||||
.map(df -> Pair.of(Path.getPathWithoutSchemeAndAuthority(new Path(df.getPath())).toString(), df))
|
||||
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ package org.apache.hudi.common.table.view;
|
||||
|
||||
import org.apache.hudi.common.model.CompactionOperation;
|
||||
import org.apache.hudi.common.model.FileSlice;
|
||||
import org.apache.hudi.common.model.HoodieDataFile;
|
||||
import org.apache.hudi.common.model.HoodieBaseFile;
|
||||
import org.apache.hudi.common.model.HoodieFileGroup;
|
||||
import org.apache.hudi.common.table.HoodieTimeline;
|
||||
import org.apache.hudi.common.table.SyncableFileSystemView;
|
||||
@@ -120,39 +120,39 @@ public class PriorityBasedFileSystemView implements SyncableFileSystemView, Seri
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<HoodieDataFile> getLatestDataFiles(String partitionPath) {
|
||||
return execute(partitionPath, preferredView::getLatestDataFiles, secondaryView::getLatestDataFiles);
|
||||
public Stream<HoodieBaseFile> getLatestBaseFiles(String partitionPath) {
|
||||
return execute(partitionPath, preferredView::getLatestBaseFiles, secondaryView::getLatestBaseFiles);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<HoodieDataFile> getLatestDataFiles() {
|
||||
return execute(preferredView::getLatestDataFiles, secondaryView::getLatestDataFiles);
|
||||
public Stream<HoodieBaseFile> getLatestBaseFiles() {
|
||||
return execute(preferredView::getLatestBaseFiles, secondaryView::getLatestBaseFiles);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<HoodieDataFile> getLatestDataFilesBeforeOrOn(String partitionPath, String maxCommitTime) {
|
||||
return execute(partitionPath, maxCommitTime, preferredView::getLatestDataFilesBeforeOrOn,
|
||||
secondaryView::getLatestDataFilesBeforeOrOn);
|
||||
public Stream<HoodieBaseFile> getLatestBaseFilesBeforeOrOn(String partitionPath, String maxCommitTime) {
|
||||
return execute(partitionPath, maxCommitTime, preferredView::getLatestBaseFilesBeforeOrOn,
|
||||
secondaryView::getLatestBaseFilesBeforeOrOn);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Option<HoodieDataFile> getLatestDataFile(String partitionPath, String fileId) {
|
||||
return execute(partitionPath, fileId, preferredView::getLatestDataFile, secondaryView::getLatestDataFile);
|
||||
public Option<HoodieBaseFile> getLatestBaseFile(String partitionPath, String fileId) {
|
||||
return execute(partitionPath, fileId, preferredView::getLatestBaseFile, secondaryView::getLatestBaseFile);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Option<HoodieDataFile> getDataFileOn(String partitionPath, String instantTime, String fileId) {
|
||||
return execute(partitionPath, instantTime, fileId, preferredView::getDataFileOn, secondaryView::getDataFileOn);
|
||||
public Option<HoodieBaseFile> getBaseFileOn(String partitionPath, String instantTime, String fileId) {
|
||||
return execute(partitionPath, instantTime, fileId, preferredView::getBaseFileOn, secondaryView::getBaseFileOn);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<HoodieDataFile> getLatestDataFilesInRange(List<String> commitsToReturn) {
|
||||
return execute(commitsToReturn, preferredView::getLatestDataFilesInRange, secondaryView::getLatestDataFilesInRange);
|
||||
public Stream<HoodieBaseFile> getLatestBaseFilesInRange(List<String> commitsToReturn) {
|
||||
return execute(commitsToReturn, preferredView::getLatestBaseFilesInRange, secondaryView::getLatestBaseFilesInRange);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<HoodieDataFile> getAllDataFiles(String partitionPath) {
|
||||
return execute(partitionPath, preferredView::getAllDataFiles, secondaryView::getAllDataFiles);
|
||||
public Stream<HoodieBaseFile> getAllBaseFiles(String partitionPath) {
|
||||
return execute(partitionPath, preferredView::getAllBaseFiles, secondaryView::getAllBaseFiles);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@@ -20,14 +20,14 @@ package org.apache.hudi.common.table.view;
|
||||
|
||||
import org.apache.hudi.common.model.CompactionOperation;
|
||||
import org.apache.hudi.common.model.FileSlice;
|
||||
import org.apache.hudi.common.model.HoodieDataFile;
|
||||
import org.apache.hudi.common.model.HoodieBaseFile;
|
||||
import org.apache.hudi.common.model.HoodieFileGroup;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.HoodieTimeline;
|
||||
import org.apache.hudi.common.table.SyncableFileSystemView;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.timeline.dto.CompactionOpDTO;
|
||||
import org.apache.hudi.common.table.timeline.dto.DataFileDTO;
|
||||
import org.apache.hudi.common.table.timeline.dto.BaseFileDTO;
|
||||
import org.apache.hudi.common.table.timeline.dto.FileGroupDTO;
|
||||
import org.apache.hudi.common.table.timeline.dto.FileSliceDTO;
|
||||
import org.apache.hudi.common.table.timeline.dto.InstantDTO;
|
||||
@@ -205,74 +205,74 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<HoodieDataFile> getLatestDataFiles(String partitionPath) {
|
||||
public Stream<HoodieBaseFile> getLatestBaseFiles(String partitionPath) {
|
||||
Map<String, String> paramsMap = getParamsWithPartitionPath(partitionPath);
|
||||
try {
|
||||
List<DataFileDTO> dataFiles = executeRequest(LATEST_PARTITION_DATA_FILES_URL, paramsMap,
|
||||
new TypeReference<List<DataFileDTO>>() {}, RequestMethod.GET);
|
||||
return dataFiles.stream().map(DataFileDTO::toHoodieDataFile);
|
||||
List<BaseFileDTO> dataFiles = executeRequest(LATEST_PARTITION_DATA_FILES_URL, paramsMap,
|
||||
new TypeReference<List<BaseFileDTO>>() {}, RequestMethod.GET);
|
||||
return dataFiles.stream().map(BaseFileDTO::toHoodieBaseFile);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieRemoteException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<HoodieDataFile> getLatestDataFiles() {
|
||||
public Stream<HoodieBaseFile> getLatestBaseFiles() {
|
||||
Map<String, String> paramsMap = getParams();
|
||||
try {
|
||||
List<DataFileDTO> dataFiles = executeRequest(LATEST_ALL_DATA_FILES, paramsMap,
|
||||
new TypeReference<List<DataFileDTO>>() {}, RequestMethod.GET);
|
||||
return dataFiles.stream().map(DataFileDTO::toHoodieDataFile);
|
||||
List<BaseFileDTO> dataFiles = executeRequest(LATEST_ALL_DATA_FILES, paramsMap,
|
||||
new TypeReference<List<BaseFileDTO>>() {}, RequestMethod.GET);
|
||||
return dataFiles.stream().map(BaseFileDTO::toHoodieBaseFile);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieRemoteException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<HoodieDataFile> getLatestDataFilesBeforeOrOn(String partitionPath, String maxCommitTime) {
|
||||
public Stream<HoodieBaseFile> getLatestBaseFilesBeforeOrOn(String partitionPath, String maxCommitTime) {
|
||||
Map<String, String> paramsMap = getParamsWithAdditionalParam(partitionPath, MAX_INSTANT_PARAM, maxCommitTime);
|
||||
try {
|
||||
List<DataFileDTO> dataFiles = executeRequest(LATEST_DATA_FILES_BEFORE_ON_INSTANT_URL, paramsMap,
|
||||
new TypeReference<List<DataFileDTO>>() {}, RequestMethod.GET);
|
||||
return dataFiles.stream().map(DataFileDTO::toHoodieDataFile);
|
||||
List<BaseFileDTO> dataFiles = executeRequest(LATEST_DATA_FILES_BEFORE_ON_INSTANT_URL, paramsMap,
|
||||
new TypeReference<List<BaseFileDTO>>() {}, RequestMethod.GET);
|
||||
return dataFiles.stream().map(BaseFileDTO::toHoodieBaseFile);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieRemoteException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Option<HoodieDataFile> getDataFileOn(String partitionPath, String instantTime, String fileId) {
|
||||
public Option<HoodieBaseFile> getBaseFileOn(String partitionPath, String instantTime, String fileId) {
|
||||
Map<String, String> paramsMap = getParamsWithAdditionalParams(partitionPath,
|
||||
new String[] {INSTANT_PARAM, FILEID_PARAM}, new String[] {instantTime, fileId});
|
||||
try {
|
||||
List<DataFileDTO> dataFiles = executeRequest(LATEST_DATA_FILE_ON_INSTANT_URL, paramsMap,
|
||||
new TypeReference<List<DataFileDTO>>() {}, RequestMethod.GET);
|
||||
return Option.fromJavaOptional(dataFiles.stream().map(DataFileDTO::toHoodieDataFile).findFirst());
|
||||
List<BaseFileDTO> dataFiles = executeRequest(LATEST_DATA_FILE_ON_INSTANT_URL, paramsMap,
|
||||
new TypeReference<List<BaseFileDTO>>() {}, RequestMethod.GET);
|
||||
return Option.fromJavaOptional(dataFiles.stream().map(BaseFileDTO::toHoodieBaseFile).findFirst());
|
||||
} catch (IOException e) {
|
||||
throw new HoodieRemoteException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<HoodieDataFile> getLatestDataFilesInRange(List<String> commitsToReturn) {
|
||||
public Stream<HoodieBaseFile> getLatestBaseFilesInRange(List<String> commitsToReturn) {
|
||||
Map<String, String> paramsMap =
|
||||
getParams(INSTANTS_PARAM, StringUtils.join(commitsToReturn.toArray(new String[0]), ","));
|
||||
try {
|
||||
List<DataFileDTO> dataFiles = executeRequest(LATEST_DATA_FILES_RANGE_INSTANT_URL, paramsMap,
|
||||
new TypeReference<List<DataFileDTO>>() {}, RequestMethod.GET);
|
||||
return dataFiles.stream().map(DataFileDTO::toHoodieDataFile);
|
||||
List<BaseFileDTO> dataFiles = executeRequest(LATEST_DATA_FILES_RANGE_INSTANT_URL, paramsMap,
|
||||
new TypeReference<List<BaseFileDTO>>() {}, RequestMethod.GET);
|
||||
return dataFiles.stream().map(BaseFileDTO::toHoodieBaseFile);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieRemoteException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<HoodieDataFile> getAllDataFiles(String partitionPath) {
|
||||
public Stream<HoodieBaseFile> getAllBaseFiles(String partitionPath) {
|
||||
Map<String, String> paramsMap = getParamsWithPartitionPath(partitionPath);
|
||||
try {
|
||||
List<DataFileDTO> dataFiles =
|
||||
executeRequest(ALL_DATA_FILES, paramsMap, new TypeReference<List<DataFileDTO>>() {}, RequestMethod.GET);
|
||||
return dataFiles.stream().map(DataFileDTO::toHoodieDataFile);
|
||||
List<BaseFileDTO> dataFiles =
|
||||
executeRequest(ALL_DATA_FILES, paramsMap, new TypeReference<List<BaseFileDTO>>() {}, RequestMethod.GET);
|
||||
return dataFiles.stream().map(BaseFileDTO::toHoodieBaseFile);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieRemoteException(e);
|
||||
}
|
||||
@@ -439,12 +439,12 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
|
||||
}
|
||||
|
||||
@Override
|
||||
public Option<HoodieDataFile> getLatestDataFile(String partitionPath, String fileId) {
|
||||
public Option<HoodieBaseFile> getLatestBaseFile(String partitionPath, String fileId) {
|
||||
Map<String, String> paramsMap = getParamsWithAdditionalParam(partitionPath, FILEID_PARAM, fileId);
|
||||
try {
|
||||
List<DataFileDTO> dataFiles = executeRequest(LATEST_PARTITION_DATA_FILE_URL, paramsMap,
|
||||
new TypeReference<List<DataFileDTO>>() {}, RequestMethod.GET);
|
||||
return Option.fromJavaOptional(dataFiles.stream().map(DataFileDTO::toHoodieDataFile).findFirst());
|
||||
List<BaseFileDTO> dataFiles = executeRequest(LATEST_PARTITION_DATA_FILE_URL, paramsMap,
|
||||
new TypeReference<List<BaseFileDTO>>() {}, RequestMethod.GET);
|
||||
return Option.fromJavaOptional(dataFiles.stream().map(BaseFileDTO::toHoodieBaseFile).findFirst());
|
||||
} catch (IOException e) {
|
||||
throw new HoodieRemoteException(e);
|
||||
}
|
||||
|
||||
@@ -20,7 +20,7 @@ package org.apache.hudi.common.table.view;
|
||||
|
||||
import org.apache.hudi.common.model.CompactionOperation;
|
||||
import org.apache.hudi.common.model.FileSlice;
|
||||
import org.apache.hudi.common.model.HoodieDataFile;
|
||||
import org.apache.hudi.common.model.HoodieBaseFile;
|
||||
import org.apache.hudi.common.model.HoodieFileGroup;
|
||||
import org.apache.hudi.common.model.HoodieFileGroupId;
|
||||
import org.apache.hudi.common.model.HoodieLogFile;
|
||||
@@ -174,7 +174,7 @@ public class RocksDbBasedFileSystemView extends IncrementalTimelineSyncFileSyste
|
||||
rocksDB.writeBatch(batch -> {
|
||||
fg.getAllFileSlicesIncludingInflight().forEach(fs -> {
|
||||
rocksDB.putInBatch(batch, schemaHelper.getColFamilyForView(), schemaHelper.getKeyForSliceView(fg, fs), fs);
|
||||
fs.getDataFile().ifPresent(df -> {
|
||||
fs.getBaseFile().ifPresent(df -> {
|
||||
rocksDB.putInBatch(batch, schemaHelper.getColFamilyForView(), schemaHelper.getKeyForDataFileView(fg, fs),
|
||||
df);
|
||||
});
|
||||
@@ -218,8 +218,8 @@ public class RocksDbBasedFileSystemView extends IncrementalTimelineSyncFileSyste
|
||||
switch (mode) {
|
||||
case ADD: {
|
||||
FileSlice newFileSlice = new FileSlice(oldSlice.getFileGroupId(), oldSlice.getBaseInstantTime());
|
||||
oldSlice.getDataFile().ifPresent(df -> newFileSlice.setDataFile(df));
|
||||
fs.getDataFile().ifPresent(df -> newFileSlice.setDataFile(df));
|
||||
oldSlice.getBaseFile().ifPresent(df -> newFileSlice.setBaseFile(df));
|
||||
fs.getBaseFile().ifPresent(df -> newFileSlice.setBaseFile(df));
|
||||
Map<String, HoodieLogFile> newLogFiles = new HashMap<>(logFiles);
|
||||
deltaLogFiles.entrySet().stream().filter(e -> !logFiles.containsKey(e.getKey()))
|
||||
.forEach(p -> newLogFiles.put(p.getKey(), p.getValue()));
|
||||
@@ -230,15 +230,15 @@ public class RocksDbBasedFileSystemView extends IncrementalTimelineSyncFileSyste
|
||||
case REMOVE: {
|
||||
LOG.info("Removing old File Slice =" + fs);
|
||||
FileSlice newFileSlice = new FileSlice(oldSlice.getFileGroupId(), oldSlice.getBaseInstantTime());
|
||||
fs.getDataFile().orElseGet(() -> {
|
||||
oldSlice.getDataFile().ifPresent(df -> newFileSlice.setDataFile(df));
|
||||
fs.getBaseFile().orElseGet(() -> {
|
||||
oldSlice.getBaseFile().ifPresent(df -> newFileSlice.setBaseFile(df));
|
||||
return null;
|
||||
});
|
||||
|
||||
deltaLogFiles.keySet().stream().forEach(p -> logFiles.remove(p));
|
||||
// Add remaining log files back
|
||||
logFiles.values().stream().forEach(lf -> newFileSlice.addLogFile(lf));
|
||||
if (newFileSlice.getDataFile().isPresent() || (newFileSlice.getLogFiles().count() > 0)) {
|
||||
if (newFileSlice.getBaseFile().isPresent() || (newFileSlice.getLogFiles().count() > 0)) {
|
||||
LOG.info("Adding back new file-slice after remove FS=" + newFileSlice);
|
||||
return newFileSlice;
|
||||
}
|
||||
@@ -250,7 +250,7 @@ public class RocksDbBasedFileSystemView extends IncrementalTimelineSyncFileSyste
|
||||
}
|
||||
}).filter(Objects::nonNull).forEach(fs -> {
|
||||
rocksDB.putInBatch(batch, schemaHelper.getColFamilyForView(), schemaHelper.getKeyForSliceView(fg, fs), fs);
|
||||
fs.getDataFile().ifPresent(df -> {
|
||||
fs.getBaseFile().ifPresent(df -> {
|
||||
rocksDB.putInBatch(batch, schemaHelper.getColFamilyForView(), schemaHelper.getKeyForDataFileView(fg, fs),
|
||||
df);
|
||||
});
|
||||
@@ -266,8 +266,8 @@ public class RocksDbBasedFileSystemView extends IncrementalTimelineSyncFileSyste
|
||||
}
|
||||
|
||||
@Override
|
||||
Stream<HoodieDataFile> fetchAllDataFiles(String partitionPath) {
|
||||
return rocksDB.<HoodieDataFile>prefixSearch(schemaHelper.getColFamilyForView(),
|
||||
Stream<HoodieBaseFile> fetchAllBaseFiles(String partitionPath) {
|
||||
return rocksDB.<HoodieBaseFile>prefixSearch(schemaHelper.getColFamilyForView(),
|
||||
schemaHelper.getPrefixForDataFileViewByPartition(partitionPath)).map(Pair::getValue);
|
||||
}
|
||||
|
||||
@@ -298,11 +298,11 @@ public class RocksDbBasedFileSystemView extends IncrementalTimelineSyncFileSyste
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Option<HoodieDataFile> fetchLatestDataFile(String partitionPath, String fileId) {
|
||||
protected Option<HoodieBaseFile> fetchLatestBaseFile(String partitionPath, String fileId) {
|
||||
// Retries only file-slices of the file and filters for the latest
|
||||
return Option
|
||||
.ofNullable(rocksDB
|
||||
.<HoodieDataFile>prefixSearch(schemaHelper.getColFamilyForView(),
|
||||
.<HoodieBaseFile>prefixSearch(schemaHelper.getColFamilyForView(),
|
||||
schemaHelper.getPrefixForDataFileViewByPartitionFile(partitionPath, fileId))
|
||||
.map(Pair::getValue).reduce(null,
|
||||
(x, y) -> ((x == null) ? y
|
||||
|
||||
@@ -69,8 +69,8 @@ public class CompactionUtils {
|
||||
builder.setFileId(fileSlice.getFileId());
|
||||
builder.setBaseInstantTime(fileSlice.getBaseInstantTime());
|
||||
builder.setDeltaFilePaths(fileSlice.getLogFiles().map(lf -> lf.getPath().getName()).collect(Collectors.toList()));
|
||||
if (fileSlice.getDataFile().isPresent()) {
|
||||
builder.setDataFilePath(fileSlice.getDataFile().get().getFileName());
|
||||
if (fileSlice.getBaseFile().isPresent()) {
|
||||
builder.setDataFilePath(fileSlice.getBaseFile().get().getFileName());
|
||||
}
|
||||
|
||||
if (metricsCaptureFunction.isPresent()) {
|
||||
|
||||
@@ -22,14 +22,15 @@ import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
||||
import org.apache.hudi.common.HoodieCommonTestHarness;
|
||||
import org.apache.hudi.common.model.CompactionOperation;
|
||||
import org.apache.hudi.common.model.FileSlice;
|
||||
import org.apache.hudi.common.model.HoodieDataFile;
|
||||
import org.apache.hudi.common.model.HoodieBaseFile;
|
||||
import org.apache.hudi.common.model.HoodieFileGroup;
|
||||
import org.apache.hudi.common.model.HoodieFileGroupId;
|
||||
import org.apache.hudi.common.model.HoodieLogFile;
|
||||
import org.apache.hudi.common.model.HoodieTableType;
|
||||
import org.apache.hudi.common.table.HoodieTimeline;
|
||||
import org.apache.hudi.common.table.SyncableFileSystemView;
|
||||
import org.apache.hudi.common.table.TableFileSystemView;
|
||||
import org.apache.hudi.common.table.TableFileSystemView.BaseFileOnlyView;
|
||||
import org.apache.hudi.common.table.TableFileSystemView.SliceView;
|
||||
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant.State;
|
||||
@@ -76,8 +77,8 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarness {
|
||||
private static String TEST_WRITE_TOKEN = "1-0-1";
|
||||
|
||||
protected SyncableFileSystemView fsView;
|
||||
protected TableFileSystemView.ReadOptimizedView roView;
|
||||
protected TableFileSystemView.RealtimeView rtView;
|
||||
protected BaseFileOnlyView roView;
|
||||
protected SliceView rtView;
|
||||
|
||||
@Before
|
||||
public void init() throws IOException {
|
||||
@@ -135,13 +136,13 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarness {
|
||||
|
||||
refreshFsView();
|
||||
|
||||
List<HoodieDataFile> dataFiles = roView.getLatestDataFiles().collect(Collectors.toList());
|
||||
List<HoodieBaseFile> dataFiles = roView.getLatestBaseFiles().collect(Collectors.toList());
|
||||
assertTrue("No data file expected", dataFiles.isEmpty());
|
||||
List<FileSlice> fileSliceList = rtView.getLatestFileSlices(partitionPath).collect(Collectors.toList());
|
||||
assertEquals(1, fileSliceList.size());
|
||||
FileSlice fileSlice = fileSliceList.get(0);
|
||||
assertEquals("File-Id must be set correctly", fileId, fileSlice.getFileId());
|
||||
assertFalse("Data file for base instant must be present", fileSlice.getDataFile().isPresent());
|
||||
assertFalse("Data file for base instant must be present", fileSlice.getBaseFile().isPresent());
|
||||
assertEquals("Base Instant for file-group set correctly", instantTime1, fileSlice.getBaseInstantTime());
|
||||
List<HoodieLogFile> logFiles = fileSlice.getLogFiles().collect(Collectors.toList());
|
||||
assertEquals("Correct number of log-files shows up in file-slice", 2, logFiles.size());
|
||||
@@ -154,7 +155,7 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarness {
|
||||
assertEquals(1, fileSliceList.size());
|
||||
fileSlice = fileSliceList.get(0);
|
||||
assertEquals("File-Id must be set correctly", fileId, fileSlice.getFileId());
|
||||
assertFalse("Data file for base instant must be present", fileSlice.getDataFile().isPresent());
|
||||
assertFalse("Data file for base instant must be present", fileSlice.getBaseFile().isPresent());
|
||||
assertEquals("Base Instant for file-group set correctly", instantTime1, fileSlice.getBaseInstantTime());
|
||||
logFiles = fileSlice.getLogFiles().collect(Collectors.toList());
|
||||
assertEquals("Correct number of log-files shows up in file-slice", 2, logFiles.size());
|
||||
@@ -166,7 +167,7 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarness {
|
||||
assertEquals(1, fileSliceList.size());
|
||||
fileSlice = fileSliceList.get(0);
|
||||
assertEquals("File-Id must be set correctly", fileId, fileSlice.getFileId());
|
||||
assertFalse("Data file for base instant must be present", fileSlice.getDataFile().isPresent());
|
||||
assertFalse("Data file for base instant must be present", fileSlice.getBaseFile().isPresent());
|
||||
assertEquals("Base Instant for file-group set correctly", instantTime1, fileSlice.getBaseInstantTime());
|
||||
logFiles = fileSlice.getLogFiles().collect(Collectors.toList());
|
||||
assertEquals("Correct number of log-files shows up in file-slice", 2, logFiles.size());
|
||||
@@ -176,7 +177,7 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarness {
|
||||
assertEquals("Total number of file-slices in view matches expected", expNumTotalFileSlices,
|
||||
rtView.getAllFileSlices(partitionPath).count());
|
||||
assertEquals("Total number of data-files in view matches expected", expNumTotalDataFiles,
|
||||
roView.getAllDataFiles(partitionPath).count());
|
||||
roView.getAllBaseFiles(partitionPath).count());
|
||||
assertEquals("Total number of file-groups in view matches expected", 1,
|
||||
fsView.getAllFileGroups(partitionPath).count());
|
||||
}
|
||||
@@ -293,7 +294,7 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarness {
|
||||
assertEquals("Expected latest file-slices", 1, slices.size());
|
||||
assertEquals("Base-Instant must be compaction Instant", compactionRequestedTime,
|
||||
slices.get(0).getBaseInstantTime());
|
||||
assertFalse("Latest File Slice must not have data-file", slices.get(0).getDataFile().isPresent());
|
||||
assertFalse("Latest File Slice must not have data-file", slices.get(0).getBaseFile().isPresent());
|
||||
assertEquals("Latest File Slice must not have any log-files", 0, slices.get(0).getLogFiles().count());
|
||||
|
||||
// Fake delta-ingestion after compaction-requested
|
||||
@@ -313,7 +314,7 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarness {
|
||||
saveAsComplete(commitTimeline, deltaInstant5, Option.empty());
|
||||
refreshFsView();
|
||||
|
||||
List<HoodieDataFile> dataFiles = roView.getAllDataFiles(partitionPath).collect(Collectors.toList());
|
||||
List<HoodieBaseFile> dataFiles = roView.getAllBaseFiles(partitionPath).collect(Collectors.toList());
|
||||
if (skipCreatingDataFile) {
|
||||
assertTrue("No data file expected", dataFiles.isEmpty());
|
||||
} else {
|
||||
@@ -328,9 +329,9 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarness {
|
||||
FileSlice fileSlice = fileSliceList.get(0);
|
||||
assertEquals(fileId, fileSlice.getFileId());
|
||||
if (!skipCreatingDataFile) {
|
||||
assertEquals("Data file must be present", dataFileName, fileSlice.getDataFile().get().getFileName());
|
||||
assertEquals("Data file must be present", dataFileName, fileSlice.getBaseFile().get().getFileName());
|
||||
} else {
|
||||
assertFalse("No data-file expected as it was not created", fileSlice.getDataFile().isPresent());
|
||||
assertFalse("No data-file expected as it was not created", fileSlice.getBaseFile().isPresent());
|
||||
}
|
||||
assertEquals("Base Instant of penultimate file-slice must be base instant", instantTime1,
|
||||
fileSlice.getBaseInstantTime());
|
||||
@@ -346,7 +347,7 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarness {
|
||||
assertEquals("Expect only one file-id", 1, fileSliceList.size());
|
||||
fileSlice = fileSliceList.get(0);
|
||||
assertEquals(fileId, fileSlice.getFileId());
|
||||
assertFalse("No data-file expected in latest file-slice", fileSlice.getDataFile().isPresent());
|
||||
assertFalse("No data-file expected in latest file-slice", fileSlice.getBaseFile().isPresent());
|
||||
assertEquals("Compaction requested instant must be base instant", compactionRequestedTime,
|
||||
fileSlice.getBaseInstantTime());
|
||||
logFiles = fileSlice.getLogFiles().collect(Collectors.toList());
|
||||
@@ -355,28 +356,28 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarness {
|
||||
assertEquals("Log File Order check", fileName3, logFiles.get(1).getFileName());
|
||||
|
||||
/** Data Files API tests */
|
||||
dataFiles = roView.getLatestDataFiles().collect(Collectors.toList());
|
||||
dataFiles = roView.getLatestBaseFiles().collect(Collectors.toList());
|
||||
if (skipCreatingDataFile) {
|
||||
assertEquals("Expect no data file to be returned", 0, dataFiles.size());
|
||||
} else {
|
||||
assertEquals("Expect only one data-file to be sent", 1, dataFiles.size());
|
||||
dataFiles.forEach(df -> assertEquals("Expect data-file for instant 1 be returned", df.getCommitTime(), instantTime1));
|
||||
}
|
||||
dataFiles = roView.getLatestDataFiles(partitionPath).collect(Collectors.toList());
|
||||
dataFiles = roView.getLatestBaseFiles(partitionPath).collect(Collectors.toList());
|
||||
if (skipCreatingDataFile) {
|
||||
assertEquals("Expect no data file to be returned", 0, dataFiles.size());
|
||||
} else {
|
||||
assertEquals("Expect only one data-file to be sent", 1, dataFiles.size());
|
||||
dataFiles.forEach(df -> assertEquals("Expect data-file for instant 1 be returned", df.getCommitTime(), instantTime1));
|
||||
}
|
||||
dataFiles = roView.getLatestDataFilesBeforeOrOn(partitionPath, deltaInstantTime5).collect(Collectors.toList());
|
||||
dataFiles = roView.getLatestBaseFilesBeforeOrOn(partitionPath, deltaInstantTime5).collect(Collectors.toList());
|
||||
if (skipCreatingDataFile) {
|
||||
assertEquals("Expect no data file to be returned", 0, dataFiles.size());
|
||||
} else {
|
||||
assertEquals("Expect only one data-file to be sent", 1, dataFiles.size());
|
||||
dataFiles.forEach(df -> assertEquals("Expect data-file for instant 1 be returned", df.getCommitTime(), instantTime1));
|
||||
}
|
||||
dataFiles = roView.getLatestDataFilesInRange(allInstantTimes).collect(Collectors.toList());
|
||||
dataFiles = roView.getLatestBaseFilesInRange(allInstantTimes).collect(Collectors.toList());
|
||||
if (skipCreatingDataFile) {
|
||||
assertEquals("Expect no data file to be returned", 0, dataFiles.size());
|
||||
} else {
|
||||
@@ -415,8 +416,8 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarness {
|
||||
|
||||
List<FileSlice> allRawFileSlices = getAllRawFileSlices(partitionPath).collect(Collectors.toList());
|
||||
dataFiles = allRawFileSlices.stream().flatMap(slice -> {
|
||||
if (slice.getDataFile().isPresent()) {
|
||||
return Stream.of(slice.getDataFile().get());
|
||||
if (slice.getBaseFile().isPresent()) {
|
||||
return Stream.of(slice.getBaseFile().get());
|
||||
}
|
||||
return Stream.empty();
|
||||
}).collect(Collectors.toList());
|
||||
@@ -424,7 +425,7 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarness {
|
||||
if (includeInvalidAndInflight) {
|
||||
assertEquals("Inflight/Orphan data-file is also expected",
|
||||
2 + (isCompactionInFlight ? 1 : 0) + (skipCreatingDataFile ? 0 : 1), dataFiles.size());
|
||||
Set<String> fileNames = dataFiles.stream().map(HoodieDataFile::getFileName).collect(Collectors.toSet());
|
||||
Set<String> fileNames = dataFiles.stream().map(HoodieBaseFile::getFileName).collect(Collectors.toSet());
|
||||
assertTrue("Expect orphan data-file to be present", fileNames.contains(orphanDataFileName));
|
||||
assertTrue("Expect inflight data-file to be present", fileNames.contains(inflightDataFileName));
|
||||
if (!skipCreatingDataFile) {
|
||||
@@ -448,26 +449,26 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarness {
|
||||
assertEquals("Orphan File Slice with data-file check base-commit", invalidInstantId,
|
||||
orphanFileSliceWithDataFile.getBaseInstantTime());
|
||||
assertEquals("Orphan File Slice with data-file check data-file", orphanDataFileName,
|
||||
orphanFileSliceWithDataFile.getDataFile().get().getFileName());
|
||||
orphanFileSliceWithDataFile.getBaseFile().get().getFileName());
|
||||
assertEquals("Orphan File Slice with data-file check data-file", 0,
|
||||
orphanFileSliceWithDataFile.getLogFiles().count());
|
||||
assertEquals("Inflight File Slice with data-file check base-commit", inflightDeltaInstantTime,
|
||||
inflightFileSliceWithDataFile.getBaseInstantTime());
|
||||
assertEquals("Inflight File Slice with data-file check data-file", inflightDataFileName,
|
||||
inflightFileSliceWithDataFile.getDataFile().get().getFileName());
|
||||
inflightFileSliceWithDataFile.getBaseFile().get().getFileName());
|
||||
assertEquals("Inflight File Slice with data-file check data-file", 0,
|
||||
inflightFileSliceWithDataFile.getLogFiles().count());
|
||||
assertEquals("Orphan File Slice with log-file check base-commit", invalidInstantId,
|
||||
orphanFileSliceWithLogFile.getBaseInstantTime());
|
||||
assertFalse("Orphan File Slice with log-file check data-file",
|
||||
orphanFileSliceWithLogFile.getDataFile().isPresent());
|
||||
orphanFileSliceWithLogFile.getBaseFile().isPresent());
|
||||
logFiles = orphanFileSliceWithLogFile.getLogFiles().collect(Collectors.toList());
|
||||
assertEquals("Orphan File Slice with log-file check data-file", 1, logFiles.size());
|
||||
assertEquals("Orphan File Slice with log-file check data-file", orphanLogFileName, logFiles.get(0).getFileName());
|
||||
assertEquals("Inflight File Slice with log-file check base-commit", inflightDeltaInstantTime,
|
||||
inflightFileSliceWithLogFile.getBaseInstantTime());
|
||||
assertFalse("Inflight File Slice with log-file check data-file",
|
||||
inflightFileSliceWithLogFile.getDataFile().isPresent());
|
||||
inflightFileSliceWithLogFile.getBaseFile().isPresent());
|
||||
logFiles = inflightFileSliceWithLogFile.getLogFiles().collect(Collectors.toList());
|
||||
assertEquals("Inflight File Slice with log-file check data-file", 1, logFiles.size());
|
||||
assertEquals("Inflight File Slice with log-file check data-file", inflightLogFileName,
|
||||
@@ -486,11 +487,11 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarness {
|
||||
commitTimeline.saveAsComplete(compactionInstant, Option.empty());
|
||||
refreshFsView();
|
||||
// populate the cache
|
||||
roView.getAllDataFiles(partitionPath);
|
||||
roView.getAllBaseFiles(partitionPath);
|
||||
|
||||
fileSliceList = rtView.getLatestFileSlices(partitionPath).collect(Collectors.toList());
|
||||
LOG.info("FILESLICE LIST=" + fileSliceList);
|
||||
dataFiles = fileSliceList.stream().map(FileSlice::getDataFile).filter(Option::isPresent).map(Option::get)
|
||||
dataFiles = fileSliceList.stream().map(FileSlice::getBaseFile).filter(Option::isPresent).map(Option::get)
|
||||
.collect(Collectors.toList());
|
||||
assertEquals("Expect only one data-files in latest view as there is only one file-group", 1, dataFiles.size());
|
||||
assertEquals("Data Filename must match", compactDataFileName, dataFiles.get(0).getFileName());
|
||||
@@ -498,7 +499,7 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarness {
|
||||
fileSlice = fileSliceList.get(0);
|
||||
assertEquals("Check file-Id is set correctly", fileId, fileSlice.getFileId());
|
||||
assertEquals("Check data-filename is set correctly", compactDataFileName,
|
||||
fileSlice.getDataFile().get().getFileName());
|
||||
fileSlice.getBaseFile().get().getFileName());
|
||||
assertEquals("Ensure base-instant is now compaction request instant", compactionRequestedTime,
|
||||
fileSlice.getBaseInstantTime());
|
||||
logFiles = fileSlice.getLogFiles().collect(Collectors.toList());
|
||||
@@ -507,23 +508,23 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarness {
|
||||
assertEquals("Log File Order check", fileName3, logFiles.get(1).getFileName());
|
||||
|
||||
/** Data Files API tests */
|
||||
dataFiles = roView.getLatestDataFiles().collect(Collectors.toList());
|
||||
dataFiles = roView.getLatestBaseFiles().collect(Collectors.toList());
|
||||
assertEquals("Expect only one data-file to be sent", 1, dataFiles.size());
|
||||
dataFiles.forEach(df -> assertEquals("Expect data-file created by compaction be returned", df.getCommitTime(), compactionRequestedTime));
|
||||
dataFiles = roView.getLatestDataFiles(partitionPath).collect(Collectors.toList());
|
||||
dataFiles = roView.getLatestBaseFiles(partitionPath).collect(Collectors.toList());
|
||||
assertEquals("Expect only one data-file to be sent", 1, dataFiles.size());
|
||||
dataFiles.forEach(df -> assertEquals("Expect data-file created by compaction be returned", df.getCommitTime(), compactionRequestedTime));
|
||||
dataFiles = roView.getLatestDataFilesBeforeOrOn(partitionPath, deltaInstantTime5).collect(Collectors.toList());
|
||||
dataFiles = roView.getLatestBaseFilesBeforeOrOn(partitionPath, deltaInstantTime5).collect(Collectors.toList());
|
||||
assertEquals("Expect only one data-file to be sent", 1, dataFiles.size());
|
||||
dataFiles.forEach(df -> assertEquals("Expect data-file created by compaction be returned", df.getCommitTime(), compactionRequestedTime));
|
||||
dataFiles = roView.getLatestDataFilesInRange(allInstantTimes).collect(Collectors.toList());
|
||||
dataFiles = roView.getLatestBaseFilesInRange(allInstantTimes).collect(Collectors.toList());
|
||||
assertEquals("Expect only one data-file to be sent", 1, dataFiles.size());
|
||||
dataFiles.forEach(df -> assertEquals("Expect data-file created by compaction be returned", df.getCommitTime(), compactionRequestedTime));
|
||||
|
||||
assertEquals("Total number of file-slices in partitions matches expected", expTotalFileSlices,
|
||||
rtView.getAllFileSlices(partitionPath).count());
|
||||
assertEquals("Total number of data-files in partitions matches expected", expTotalDataFiles,
|
||||
roView.getAllDataFiles(partitionPath).count());
|
||||
roView.getAllBaseFiles(partitionPath).count());
|
||||
// file-groups includes inflight/invalid file-ids
|
||||
assertEquals("Total number of file-groups in partitions matches expected", 5,
|
||||
fsView.getAllFileGroups(partitionPath).count());
|
||||
@@ -535,7 +536,7 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarness {
|
||||
new File(basePath + "/" + partitionPath).mkdirs();
|
||||
String fileId = UUID.randomUUID().toString();
|
||||
|
||||
assertFalse("No commit, should not find any data file", roView.getLatestDataFiles(partitionPath)
|
||||
assertFalse("No commit, should not find any data file", roView.getLatestBaseFiles(partitionPath)
|
||||
.anyMatch(dfile -> dfile.getFileId().equals(fileId)));
|
||||
|
||||
// Only one commit, but is not safe
|
||||
@@ -543,7 +544,7 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarness {
|
||||
String fileName1 = FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId);
|
||||
new File(basePath + "/" + partitionPath + "/" + fileName1).createNewFile();
|
||||
refreshFsView();
|
||||
assertFalse("No commit, should not find any data file", roView.getLatestDataFiles(partitionPath)
|
||||
assertFalse("No commit, should not find any data file", roView.getLatestBaseFiles(partitionPath)
|
||||
.anyMatch(dfile -> dfile.getFileId().equals(fileId)));
|
||||
|
||||
// Make this commit safe
|
||||
@@ -551,7 +552,7 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarness {
|
||||
HoodieInstant instant1 = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, commitTime1);
|
||||
saveAsComplete(commitTimeline, instant1, Option.empty());
|
||||
refreshFsView();
|
||||
assertEquals("", fileName1, roView.getLatestDataFiles(partitionPath)
|
||||
assertEquals("", fileName1, roView.getLatestBaseFiles(partitionPath)
|
||||
.filter(dfile -> dfile.getFileId().equals(fileId)).findFirst().get().getFileName());
|
||||
|
||||
// Do another commit, but not safe
|
||||
@@ -559,14 +560,14 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarness {
|
||||
String fileName2 = FSUtils.makeDataFileName(commitTime2, TEST_WRITE_TOKEN, fileId);
|
||||
new File(basePath + "/" + partitionPath + "/" + fileName2).createNewFile();
|
||||
refreshFsView();
|
||||
assertEquals("", fileName1, roView.getLatestDataFiles(partitionPath)
|
||||
assertEquals("", fileName1, roView.getLatestBaseFiles(partitionPath)
|
||||
.filter(dfile -> dfile.getFileId().equals(fileId)).findFirst().get().getFileName());
|
||||
|
||||
// Make it safe
|
||||
HoodieInstant instant2 = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, commitTime2);
|
||||
saveAsComplete(commitTimeline, instant2, Option.empty());
|
||||
refreshFsView();
|
||||
assertEquals("", fileName2, roView.getLatestDataFiles(partitionPath)
|
||||
assertEquals("", fileName2, roView.getLatestBaseFiles(partitionPath)
|
||||
.filter(dfile -> dfile.getFileId().equals(fileId)).findFirst().get().getFileName());
|
||||
}
|
||||
|
||||
@@ -648,11 +649,11 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarness {
|
||||
assertEquals(isLatestFileSliceOnly ? 1 : 2, fileSliceMap.get(fileId3).longValue());
|
||||
assertEquals(1, fileSliceMap.get(fileId4).longValue());
|
||||
|
||||
List<HoodieDataFile> dataFileList =
|
||||
roView.getLatestDataFilesBeforeOrOn("2016/05/01", commitTime4).collect(Collectors.toList());
|
||||
List<HoodieBaseFile> dataFileList =
|
||||
roView.getLatestBaseFilesBeforeOrOn("2016/05/01", commitTime4).collect(Collectors.toList());
|
||||
assertEquals(3, dataFileList.size());
|
||||
Set<String> filenames = Sets.newHashSet();
|
||||
for (HoodieDataFile status : dataFileList) {
|
||||
for (HoodieBaseFile status : dataFileList) {
|
||||
filenames.add(status.getFileName());
|
||||
}
|
||||
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime4, TEST_WRITE_TOKEN, fileId1)));
|
||||
@@ -676,10 +677,10 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarness {
|
||||
.contains(FSUtils.makeLogFileName(fileId4, HoodieLogFile.DELTA_EXTENSION, commitTime4, 0, TEST_WRITE_TOKEN)));
|
||||
|
||||
// Reset the max commit time
|
||||
List<HoodieDataFile> dataFiles =
|
||||
roView.getLatestDataFilesBeforeOrOn("2016/05/01", commitTime3).collect(Collectors.toList());
|
||||
List<HoodieBaseFile> dataFiles =
|
||||
roView.getLatestBaseFilesBeforeOrOn("2016/05/01", commitTime3).collect(Collectors.toList());
|
||||
filenames = Sets.newHashSet();
|
||||
for (HoodieDataFile status : dataFiles) {
|
||||
for (HoodieBaseFile status : dataFiles) {
|
||||
filenames.add(status.getFileName());
|
||||
}
|
||||
if (!isLatestFileSliceOnly) {
|
||||
@@ -739,7 +740,7 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarness {
|
||||
for (HoodieFileGroup fileGroup : fileGroups) {
|
||||
String fileId = fileGroup.getFileGroupId().getFileId();
|
||||
Set<String> filenames = Sets.newHashSet();
|
||||
fileGroup.getAllDataFiles().forEach(dataFile -> {
|
||||
fileGroup.getAllBaseFiles().forEach(dataFile -> {
|
||||
assertEquals("All same fileId should be grouped", fileId, dataFile.getFileId());
|
||||
filenames.add(dataFile.getFileName());
|
||||
});
|
||||
@@ -811,13 +812,13 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarness {
|
||||
|
||||
refreshFsView();
|
||||
// Populate view for partition
|
||||
roView.getAllDataFiles("2016/05/01/");
|
||||
roView.getAllBaseFiles("2016/05/01/");
|
||||
|
||||
List<HoodieDataFile> dataFiles =
|
||||
roView.getLatestDataFilesInRange(Lists.newArrayList(commitTime2, commitTime3)).collect(Collectors.toList());
|
||||
List<HoodieBaseFile> dataFiles =
|
||||
roView.getLatestBaseFilesInRange(Lists.newArrayList(commitTime2, commitTime3)).collect(Collectors.toList());
|
||||
assertEquals(isLatestFileSliceOnly ? 2 : 3, dataFiles.size());
|
||||
Set<String> filenames = Sets.newHashSet();
|
||||
for (HoodieDataFile status : dataFiles) {
|
||||
for (HoodieBaseFile status : dataFiles) {
|
||||
filenames.add(status.getFileName());
|
||||
}
|
||||
|
||||
@@ -833,15 +834,15 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarness {
|
||||
for (FileSlice slice : slices) {
|
||||
if (slice.getFileId().equals(fileId1)) {
|
||||
assertEquals(slice.getBaseInstantTime(), commitTime3);
|
||||
assertTrue(slice.getDataFile().isPresent());
|
||||
assertTrue(slice.getBaseFile().isPresent());
|
||||
assertEquals(slice.getLogFiles().count(), 0);
|
||||
} else if (slice.getFileId().equals(fileId2)) {
|
||||
assertEquals(slice.getBaseInstantTime(), commitTime3);
|
||||
assertTrue(slice.getDataFile().isPresent());
|
||||
assertTrue(slice.getBaseFile().isPresent());
|
||||
assertEquals(slice.getLogFiles().count(), 1);
|
||||
} else if (slice.getFileId().equals(fileId3)) {
|
||||
assertEquals(slice.getBaseInstantTime(), commitTime4);
|
||||
assertTrue(slice.getDataFile().isPresent());
|
||||
assertTrue(slice.getBaseFile().isPresent());
|
||||
assertEquals(slice.getLogFiles().count(), 0);
|
||||
}
|
||||
}
|
||||
@@ -883,12 +884,12 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarness {
|
||||
assertEquals(7, statuses.length);
|
||||
|
||||
refreshFsView();
|
||||
List<HoodieDataFile> dataFiles =
|
||||
roView.getLatestDataFilesBeforeOrOn(partitionPath, commitTime2).collect(Collectors.toList());
|
||||
List<HoodieBaseFile> dataFiles =
|
||||
roView.getLatestBaseFilesBeforeOrOn(partitionPath, commitTime2).collect(Collectors.toList());
|
||||
if (!isLatestFileSliceOnly) {
|
||||
assertEquals(2, dataFiles.size());
|
||||
Set<String> filenames = Sets.newHashSet();
|
||||
for (HoodieDataFile status : dataFiles) {
|
||||
for (HoodieBaseFile status : dataFiles) {
|
||||
filenames.add(status.getFileName());
|
||||
}
|
||||
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId1)));
|
||||
@@ -952,7 +953,7 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarness {
|
||||
assertEquals(10, statuses.length);
|
||||
|
||||
refreshFsView();
|
||||
fsView.getAllDataFiles(partitionPath);
|
||||
fsView.getAllBaseFiles(partitionPath);
|
||||
List<HoodieFileGroup> fileGroups = fsView.getAllFileGroups(partitionPath).collect(Collectors.toList());
|
||||
assertEquals(3, fileGroups.size());
|
||||
for (HoodieFileGroup fileGroup : fileGroups) {
|
||||
@@ -980,10 +981,10 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarness {
|
||||
}
|
||||
}
|
||||
|
||||
List<HoodieDataFile> statuses1 = roView.getLatestDataFiles().collect(Collectors.toList());
|
||||
List<HoodieBaseFile> statuses1 = roView.getLatestBaseFiles().collect(Collectors.toList());
|
||||
assertEquals(3, statuses1.size());
|
||||
Set<String> filenames = Sets.newHashSet();
|
||||
for (HoodieDataFile status : statuses1) {
|
||||
for (HoodieBaseFile status : statuses1) {
|
||||
filenames.add(status.getFileName());
|
||||
}
|
||||
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime4, TEST_WRITE_TOKEN, fileId1)));
|
||||
@@ -1092,10 +1093,10 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarness {
|
||||
refreshFsView();
|
||||
|
||||
// Test Data Files
|
||||
List<HoodieDataFile> dataFiles = roView.getAllDataFiles(partitionPath1).collect(Collectors.toList());
|
||||
List<HoodieBaseFile> dataFiles = roView.getAllBaseFiles(partitionPath1).collect(Collectors.toList());
|
||||
assertEquals("One data-file is expected as there is only one file-group", 1, dataFiles.size());
|
||||
assertEquals("Expect only valid commit", "1", dataFiles.get(0).getCommitTime());
|
||||
dataFiles = roView.getAllDataFiles(partitionPath2).collect(Collectors.toList());
|
||||
dataFiles = roView.getAllBaseFiles(partitionPath2).collect(Collectors.toList());
|
||||
assertEquals("One data-file is expected as there is only one file-group", 1, dataFiles.size());
|
||||
assertEquals("Expect only valid commit", "1", dataFiles.get(0).getCommitTime());
|
||||
|
||||
@@ -1106,7 +1107,7 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarness {
|
||||
assertEquals("Expect file-slice to be merged", 1, fileSliceList.size());
|
||||
FileSlice fileSlice = fileSliceList.get(0);
|
||||
assertEquals(fileId, fileSlice.getFileId());
|
||||
assertEquals("Data file must be present", dataFileName, fileSlice.getDataFile().get().getFileName());
|
||||
assertEquals("Data file must be present", dataFileName, fileSlice.getBaseFile().get().getFileName());
|
||||
assertEquals("Base Instant of penultimate file-slice must be base instant", instantTime1,
|
||||
fileSlice.getBaseInstantTime());
|
||||
List<HoodieLogFile> logFiles = fileSlice.getLogFiles().collect(Collectors.toList());
|
||||
@@ -1120,7 +1121,7 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarness {
|
||||
assertEquals("Expect only one file-id", 1, fileSliceList.size());
|
||||
fileSlice = fileSliceList.get(0);
|
||||
assertEquals(fileId, fileSlice.getFileId());
|
||||
assertFalse("No data-file expected in latest file-slice", fileSlice.getDataFile().isPresent());
|
||||
assertFalse("No data-file expected in latest file-slice", fileSlice.getBaseFile().isPresent());
|
||||
assertEquals("Compaction requested instant must be base instant", compactionRequestedTime,
|
||||
fileSlice.getBaseInstantTime());
|
||||
logFiles = fileSlice.getLogFiles().collect(Collectors.toList());
|
||||
|
||||
@@ -29,7 +29,7 @@ import org.apache.hudi.common.model.CompactionOperation;
|
||||
import org.apache.hudi.common.model.FileSlice;
|
||||
import org.apache.hudi.common.model.HoodieCleaningPolicy;
|
||||
import org.apache.hudi.common.model.HoodieCommitMetadata;
|
||||
import org.apache.hudi.common.model.HoodieDataFile;
|
||||
import org.apache.hudi.common.model.HoodieBaseFile;
|
||||
import org.apache.hudi.common.model.HoodieFileGroup;
|
||||
import org.apache.hudi.common.model.HoodieFileGroupId;
|
||||
import org.apache.hudi.common.model.HoodieTableType;
|
||||
@@ -502,7 +502,7 @@ public class TestIncrementalFSViewSync extends HoodieCommonTestHarness {
|
||||
view.getLatestFileSlices(p).forEach(fs -> {
|
||||
Assert.assertEquals(instantTime, fs.getBaseInstantTime());
|
||||
Assert.assertEquals(p, fs.getPartitionPath());
|
||||
Assert.assertFalse(fs.getDataFile().isPresent());
|
||||
Assert.assertFalse(fs.getBaseFile().isPresent());
|
||||
});
|
||||
view.getLatestMergedFileSlicesBeforeOrOn(p, instantTime).forEach(fs -> {
|
||||
Assert
|
||||
@@ -625,7 +625,7 @@ public class TestIncrementalFSViewSync extends HoodieCommonTestHarness {
|
||||
});
|
||||
} else {
|
||||
partitions.forEach(p -> {
|
||||
view.getLatestDataFiles(p).forEach(f -> {
|
||||
view.getLatestBaseFiles(p).forEach(f -> {
|
||||
Assert.assertEquals(instant, f.getCommitTime());
|
||||
});
|
||||
});
|
||||
@@ -676,10 +676,10 @@ public class TestIncrementalFSViewSync extends HoodieCommonTestHarness {
|
||||
FileSlice slice2 = e2.getValue();
|
||||
Assert.assertEquals(slice1.getBaseInstantTime(), slice2.getBaseInstantTime());
|
||||
Assert.assertEquals(slice1.getFileId(), slice2.getFileId());
|
||||
Assert.assertEquals(slice1.getDataFile().isPresent(), slice2.getDataFile().isPresent());
|
||||
if (slice1.getDataFile().isPresent()) {
|
||||
HoodieDataFile df1 = slice1.getDataFile().get();
|
||||
HoodieDataFile df2 = slice2.getDataFile().get();
|
||||
Assert.assertEquals(slice1.getBaseFile().isPresent(), slice2.getBaseFile().isPresent());
|
||||
if (slice1.getBaseFile().isPresent()) {
|
||||
HoodieBaseFile df1 = slice1.getBaseFile().get();
|
||||
HoodieBaseFile df2 = slice2.getBaseFile().get();
|
||||
Assert.assertEquals(df1.getCommitTime(), df2.getCommitTime());
|
||||
Assert.assertEquals(df1.getFileId(), df2.getFileId());
|
||||
Assert.assertEquals(df1.getFileName(), df2.getFileName());
|
||||
|
||||
@@ -21,7 +21,7 @@ package org.apache.hudi.common.util;
|
||||
import org.apache.hudi.avro.model.HoodieCompactionOperation;
|
||||
import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
||||
import org.apache.hudi.common.model.FileSlice;
|
||||
import org.apache.hudi.common.model.HoodieDataFile;
|
||||
import org.apache.hudi.common.model.HoodieBaseFile;
|
||||
import org.apache.hudi.common.model.HoodieFileGroupId;
|
||||
import org.apache.hudi.common.model.HoodieLogFile;
|
||||
import org.apache.hudi.common.model.HoodieTestUtils;
|
||||
@@ -157,7 +157,7 @@ public class CompactionTestUtils {
|
||||
instantId, fileId, Option.of(2));
|
||||
FileSlice slice = new FileSlice(DEFAULT_PARTITION_PATHS[0], instantId, fileId);
|
||||
if (createDataFile) {
|
||||
slice.setDataFile(new TestHoodieDataFile(metaClient.getBasePath() + "/" + DEFAULT_PARTITION_PATHS[0] + "/"
|
||||
slice.setBaseFile(new TestHoodieBaseFile(metaClient.getBasePath() + "/" + DEFAULT_PARTITION_PATHS[0] + "/"
|
||||
+ FSUtils.makeDataFileName(instantId, TEST_WRITE_TOKEN, fileId)));
|
||||
}
|
||||
String logFilePath1 = HoodieTestUtils.getLogFilePath(metaClient.getBasePath(), DEFAULT_PARTITION_PATHS[0],
|
||||
@@ -186,11 +186,11 @@ public class CompactionTestUtils {
|
||||
/**
|
||||
* The hoodie data file for testing.
|
||||
*/
|
||||
public static class TestHoodieDataFile extends HoodieDataFile {
|
||||
public static class TestHoodieBaseFile extends HoodieBaseFile {
|
||||
|
||||
private final String path;
|
||||
|
||||
public TestHoodieDataFile(String path) {
|
||||
public TestHoodieBaseFile(String path) {
|
||||
super(path);
|
||||
this.path = path;
|
||||
}
|
||||
|
||||
@@ -22,12 +22,12 @@ import org.apache.hudi.avro.model.HoodieCompactionOperation;
|
||||
import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
||||
import org.apache.hudi.common.HoodieCommonTestHarness;
|
||||
import org.apache.hudi.common.model.FileSlice;
|
||||
import org.apache.hudi.common.model.HoodieDataFile;
|
||||
import org.apache.hudi.common.model.HoodieBaseFile;
|
||||
import org.apache.hudi.common.model.HoodieFileGroupId;
|
||||
import org.apache.hudi.common.model.HoodieLogFile;
|
||||
import org.apache.hudi.common.model.HoodieTableType;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.util.CompactionTestUtils.TestHoodieDataFile;
|
||||
import org.apache.hudi.common.util.CompactionTestUtils.TestHoodieBaseFile;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
import org.apache.hudi.common.versioning.compaction.CompactionPlanMigrator;
|
||||
|
||||
@@ -97,7 +97,7 @@ public class TestCompactionUtils extends HoodieCommonTestHarness {
|
||||
|
||||
// File Slice with data-file but no log files
|
||||
FileSlice noLogFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0], "000", "noLog1");
|
||||
noLogFileSlice.setDataFile(new TestHoodieDataFile("/tmp/noLog_1_000.parquet"));
|
||||
noLogFileSlice.setBaseFile(new TestHoodieBaseFile("/tmp/noLog_1_000.parquet"));
|
||||
op = CompactionUtils.buildFromFileSlice(DEFAULT_PARTITION_PATHS[0], noLogFileSlice, Option.of(metricsCaptureFn));
|
||||
testFileSliceCompactionOpEquality(noLogFileSlice, op, DEFAULT_PARTITION_PATHS[0],
|
||||
LATEST_COMPACTION_METADATA_VERSION);
|
||||
@@ -113,7 +113,7 @@ public class TestCompactionUtils extends HoodieCommonTestHarness {
|
||||
|
||||
// File Slice with data-file and log files present
|
||||
FileSlice fileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0], "000", "noData1");
|
||||
fileSlice.setDataFile(new TestHoodieDataFile("/tmp/noLog_1_000.parquet"));
|
||||
fileSlice.setBaseFile(new TestHoodieBaseFile("/tmp/noLog_1_000.parquet"));
|
||||
fileSlice.addLogFile(
|
||||
new HoodieLogFile(new Path(FSUtils.makeLogFileName("noData1", ".log", "000", 1, TEST_WRITE_TOKEN))));
|
||||
fileSlice.addLogFile(
|
||||
@@ -129,13 +129,13 @@ public class TestCompactionUtils extends HoodieCommonTestHarness {
|
||||
Path fullPartitionPath = new Path(new Path(metaClient.getBasePath()), DEFAULT_PARTITION_PATHS[0]);
|
||||
FileSlice emptyFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0], "000", "empty1");
|
||||
FileSlice fileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0], "000", "noData1");
|
||||
fileSlice.setDataFile(new TestHoodieDataFile(fullPartitionPath.toString() + "/data1_1_000.parquet"));
|
||||
fileSlice.setBaseFile(new TestHoodieBaseFile(fullPartitionPath.toString() + "/data1_1_000.parquet"));
|
||||
fileSlice.addLogFile(new HoodieLogFile(
|
||||
new Path(fullPartitionPath, new Path(FSUtils.makeLogFileName("noData1", ".log", "000", 1, TEST_WRITE_TOKEN)))));
|
||||
fileSlice.addLogFile(new HoodieLogFile(
|
||||
new Path(fullPartitionPath, new Path(FSUtils.makeLogFileName("noData1", ".log", "000", 2, TEST_WRITE_TOKEN)))));
|
||||
FileSlice noLogFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0], "000", "noLog1");
|
||||
noLogFileSlice.setDataFile(new TestHoodieDataFile(fullPartitionPath.toString() + "/noLog_1_000.parquet"));
|
||||
noLogFileSlice.setBaseFile(new TestHoodieBaseFile(fullPartitionPath.toString() + "/noLog_1_000.parquet"));
|
||||
FileSlice noDataFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0], "000", "noData1");
|
||||
noDataFileSlice.addLogFile(new HoodieLogFile(
|
||||
new Path(fullPartitionPath, new Path(FSUtils.makeLogFileName("noData1", ".log", "000", 1, TEST_WRITE_TOKEN)))));
|
||||
@@ -241,8 +241,8 @@ public class TestCompactionUtils extends HoodieCommonTestHarness {
|
||||
Assert.assertEquals("Partition path is correct", expPartitionPath, op.getPartitionPath());
|
||||
Assert.assertEquals("Same base-instant", slice.getBaseInstantTime(), op.getBaseInstantTime());
|
||||
Assert.assertEquals("Same file-id", slice.getFileId(), op.getFileId());
|
||||
if (slice.getDataFile().isPresent()) {
|
||||
HoodieDataFile df = slice.getDataFile().get();
|
||||
if (slice.getBaseFile().isPresent()) {
|
||||
HoodieBaseFile df = slice.getBaseFile().get();
|
||||
Assert.assertEquals("Same data-file", version == COMPACTION_METADATA_VERSION_1 ? df.getPath() : df.getFileName(),
|
||||
op.getDataFilePath());
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user