Create .hoodie_partition_metadata in each partition, linking back to basepath
- Concurreny handled via taskID, failure recovery handled via renames - Falls back to search 3 levels up - Cli tool has command to add this to existing tables
This commit is contained in:
committed by
vinoth chandar
parent
1e802ad4f2
commit
3129770fd0
@@ -0,0 +1,135 @@
|
||||
/*
|
||||
* Copyright (c) 2017 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.common.model;
|
||||
|
||||
import com.uber.hoodie.exception.HoodieException;
|
||||
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Properties;
|
||||
|
||||
/**
|
||||
* The metadata that goes into the meta file in each partition
|
||||
*/
|
||||
public class HoodiePartitionMetadata {
|
||||
|
||||
public static final String HOODIE_PARTITION_METAFILE = ".hoodie_partition_metadata";
|
||||
public static final String PARTITION_DEPTH_KEY = "partitionDepth";
|
||||
public static final String COMMIT_TIME_KEY = "commitTime";
|
||||
|
||||
/**
|
||||
* Contents of the metadata
|
||||
*/
|
||||
private final Properties props;
|
||||
|
||||
/**
|
||||
* Path to the partition, about which we have the metadata
|
||||
*/
|
||||
private final Path partitionPath;
|
||||
|
||||
private final FileSystem fs;
|
||||
|
||||
private static Logger log = LogManager.getLogger(HoodiePartitionMetadata.class);
|
||||
|
||||
|
||||
/**
|
||||
* Construct metadata from existing partition
|
||||
*/
|
||||
public HoodiePartitionMetadata(FileSystem fs, Path partitionPath) {
|
||||
this.fs = fs;
|
||||
this.props = new Properties();
|
||||
this.partitionPath = partitionPath;
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct metadata object to be written out.
|
||||
*/
|
||||
public HoodiePartitionMetadata(FileSystem fs, String commitTime, Path basePath, Path partitionPath) {
|
||||
this(fs, partitionPath);
|
||||
props.setProperty(COMMIT_TIME_KEY, commitTime);
|
||||
props.setProperty(PARTITION_DEPTH_KEY, String.valueOf(partitionPath.depth() - basePath.depth()));
|
||||
}
|
||||
|
||||
public int getPartitionDepth() {
|
||||
if (!props.contains(PARTITION_DEPTH_KEY)) {
|
||||
throw new HoodieException("Could not find partitionDepth in partition metafile");
|
||||
}
|
||||
return Integer.parseInt(props.getProperty(PARTITION_DEPTH_KEY));
|
||||
}
|
||||
|
||||
/**
|
||||
* Write the metadata safely into partition
|
||||
*/
|
||||
public void trySave(int taskPartitionId) {
|
||||
Path tmpMetaPath = new Path(partitionPath, HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE + "_" + taskPartitionId);
|
||||
Path metaPath = new Path(partitionPath, HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE);
|
||||
boolean metafileExists = false;
|
||||
|
||||
try {
|
||||
metafileExists = fs.exists(metaPath);
|
||||
if (!metafileExists) {
|
||||
// write to temporary file
|
||||
FSDataOutputStream os = fs.create(tmpMetaPath, true);
|
||||
props.store(os, "partition metadata");
|
||||
os.hsync();
|
||||
os.hflush();
|
||||
os.close();
|
||||
|
||||
// move to actual path
|
||||
fs.rename(tmpMetaPath, metaPath);
|
||||
}
|
||||
} catch (IOException ioe) {
|
||||
log.warn("Error trying to save partition metadata (this is okay, as long as atleast 1 of these succced), " +
|
||||
partitionPath, ioe);
|
||||
} finally {
|
||||
if (!metafileExists) {
|
||||
try {
|
||||
// clean up tmp file, if still lying around
|
||||
if (fs.exists(tmpMetaPath)) {
|
||||
fs.delete(tmpMetaPath, false);
|
||||
}
|
||||
} catch (IOException ioe) {
|
||||
log.warn("Error trying to clean up temporary files for " + partitionPath, ioe);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Read out the metadata for this partition
|
||||
*/
|
||||
public void readFromFS() {
|
||||
try {
|
||||
Path metaFile = new Path(partitionPath, HOODIE_PARTITION_METAFILE);
|
||||
FSDataInputStream is = fs.open(metaFile);
|
||||
props.load(is);
|
||||
} catch (IOException ioe) {
|
||||
throw new HoodieException("Error reading Hoodie partition metadata for " + partitionPath, ioe);
|
||||
}
|
||||
}
|
||||
|
||||
// methods related to partition meta data
|
||||
public static boolean hasPartitionMetadata(FileSystem fs, Path partitionPath) throws IOException {
|
||||
return fs.exists(new Path(partitionPath, HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
|
||||
}
|
||||
}
|
||||
@@ -233,7 +233,10 @@ public class HoodieTableFileSystemView implements TableFileSystemView, Serializa
|
||||
*/
|
||||
private Map<String, List<HoodieDataFile>> groupFilesByFileId(FileStatus[] files,
|
||||
String maxCommitTime) throws IOException {
|
||||
return Arrays.stream(files).flatMap(fileStatus -> {
|
||||
return Arrays.stream(files)
|
||||
// filter out files starting with "."
|
||||
.filter(file -> !file.getPath().getName().startsWith("."))
|
||||
.flatMap(fileStatus -> {
|
||||
HoodieDataFile dataFile = new HoodieDataFile(fileStatus);
|
||||
if (visibleActiveCommitTimeline.containsOrBeforeTimelineStarts(dataFile.getCommitTime())
|
||||
&& visibleActiveCommitTimeline
|
||||
|
||||
@@ -111,10 +111,6 @@ public class FSUtils {
|
||||
return fs.listStatus(path)[0].getLen();
|
||||
}
|
||||
|
||||
public static String globAllFiles(String basePath) {
|
||||
return String.format("%s/*/*/*/*", basePath);
|
||||
}
|
||||
|
||||
// TODO (weiy): rename the function for better readability
|
||||
public static String getFileId(String fullFileName) {
|
||||
return fullFileName.split("_")[0];
|
||||
|
||||
Reference in New Issue
Block a user