1
0

Create .hoodie_partition_metadata in each partition, linking back to basepath

- Concurreny handled via taskID, failure recovery handled via renames
 - Falls back to search 3 levels up
 - Cli tool has command to add this to existing tables
This commit is contained in:
Vinoth Chandar
2017-03-21 23:57:30 -07:00
committed by vinoth chandar
parent 1e802ad4f2
commit 3129770fd0
10 changed files with 291 additions and 86 deletions

View File

@@ -0,0 +1,135 @@
/*
* Copyright (c) 2017 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.model;
import com.uber.hoodie.exception.HoodieException;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import java.io.IOException;
import java.util.Properties;
/**
* The metadata that goes into the meta file in each partition
*/
public class HoodiePartitionMetadata {
public static final String HOODIE_PARTITION_METAFILE = ".hoodie_partition_metadata";
public static final String PARTITION_DEPTH_KEY = "partitionDepth";
public static final String COMMIT_TIME_KEY = "commitTime";
/**
* Contents of the metadata
*/
private final Properties props;
/**
* Path to the partition, about which we have the metadata
*/
private final Path partitionPath;
private final FileSystem fs;
private static Logger log = LogManager.getLogger(HoodiePartitionMetadata.class);
/**
* Construct metadata from existing partition
*/
public HoodiePartitionMetadata(FileSystem fs, Path partitionPath) {
this.fs = fs;
this.props = new Properties();
this.partitionPath = partitionPath;
}
/**
* Construct metadata object to be written out.
*/
public HoodiePartitionMetadata(FileSystem fs, String commitTime, Path basePath, Path partitionPath) {
this(fs, partitionPath);
props.setProperty(COMMIT_TIME_KEY, commitTime);
props.setProperty(PARTITION_DEPTH_KEY, String.valueOf(partitionPath.depth() - basePath.depth()));
}
public int getPartitionDepth() {
if (!props.contains(PARTITION_DEPTH_KEY)) {
throw new HoodieException("Could not find partitionDepth in partition metafile");
}
return Integer.parseInt(props.getProperty(PARTITION_DEPTH_KEY));
}
/**
* Write the metadata safely into partition
*/
public void trySave(int taskPartitionId) {
Path tmpMetaPath = new Path(partitionPath, HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE + "_" + taskPartitionId);
Path metaPath = new Path(partitionPath, HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE);
boolean metafileExists = false;
try {
metafileExists = fs.exists(metaPath);
if (!metafileExists) {
// write to temporary file
FSDataOutputStream os = fs.create(tmpMetaPath, true);
props.store(os, "partition metadata");
os.hsync();
os.hflush();
os.close();
// move to actual path
fs.rename(tmpMetaPath, metaPath);
}
} catch (IOException ioe) {
log.warn("Error trying to save partition metadata (this is okay, as long as atleast 1 of these succced), " +
partitionPath, ioe);
} finally {
if (!metafileExists) {
try {
// clean up tmp file, if still lying around
if (fs.exists(tmpMetaPath)) {
fs.delete(tmpMetaPath, false);
}
} catch (IOException ioe) {
log.warn("Error trying to clean up temporary files for " + partitionPath, ioe);
}
}
}
}
/**
* Read out the metadata for this partition
*/
public void readFromFS() {
try {
Path metaFile = new Path(partitionPath, HOODIE_PARTITION_METAFILE);
FSDataInputStream is = fs.open(metaFile);
props.load(is);
} catch (IOException ioe) {
throw new HoodieException("Error reading Hoodie partition metadata for " + partitionPath, ioe);
}
}
// methods related to partition meta data
public static boolean hasPartitionMetadata(FileSystem fs, Path partitionPath) throws IOException {
return fs.exists(new Path(partitionPath, HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
}
}

View File

@@ -233,7 +233,10 @@ public class HoodieTableFileSystemView implements TableFileSystemView, Serializa
*/
private Map<String, List<HoodieDataFile>> groupFilesByFileId(FileStatus[] files,
String maxCommitTime) throws IOException {
return Arrays.stream(files).flatMap(fileStatus -> {
return Arrays.stream(files)
// filter out files starting with "."
.filter(file -> !file.getPath().getName().startsWith("."))
.flatMap(fileStatus -> {
HoodieDataFile dataFile = new HoodieDataFile(fileStatus);
if (visibleActiveCommitTimeline.containsOrBeforeTimelineStarts(dataFile.getCommitTime())
&& visibleActiveCommitTimeline

View File

@@ -111,10 +111,6 @@ public class FSUtils {
return fs.listStatus(path)[0].getLen();
}
public static String globAllFiles(String basePath) {
return String.format("%s/*/*/*/*", basePath);
}
// TODO (weiy): rename the function for better readability
public static String getFileId(String fullFileName) {
return fullFileName.split("_")[0];