Create .hoodie_partition_metadata in each partition, linking back to basepath
- Concurreny handled via taskID, failure recovery handled via renames - Falls back to search 3 levels up - Cli tool has command to add this to existing tables
This commit is contained in:
committed by
vinoth chandar
parent
1e802ad4f2
commit
3129770fd0
@@ -16,6 +16,7 @@
|
||||
|
||||
package com.uber.hoodie.hadoop;
|
||||
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.mapreduce.JobContext;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
@@ -32,6 +33,7 @@ public class HoodieHiveUtil {
|
||||
public static final String DEFAULT_SCAN_MODE = LATEST_SCAN_MODE;
|
||||
public static final int DEFAULT_MAX_COMMITS = 1;
|
||||
public static final int MAX_COMMIT_ALL = -1;
|
||||
public static final int DEFAULT_LEVELS_TO_BASEPATH = 3;
|
||||
|
||||
public static Integer readMaxCommits(JobContext job, String tableName) {
|
||||
String maxCommitName = String.format(HOODIE_MAX_COMMIT_PATTERN, tableName);
|
||||
@@ -55,4 +57,12 @@ public class HoodieHiveUtil {
|
||||
LOG.info(modePropertyName + ": " + mode);
|
||||
return mode;
|
||||
}
|
||||
|
||||
public static Path getNthParent(Path path, int n) {
|
||||
Path parent = path;
|
||||
for (int i = 0; i < n; i++) {
|
||||
parent = parent.getParent();
|
||||
}
|
||||
return parent;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
package com.uber.hoodie.hadoop;
|
||||
|
||||
import com.uber.hoodie.common.model.HoodieDataFile;
|
||||
import com.uber.hoodie.common.model.HoodiePartitionMetadata;
|
||||
import com.uber.hoodie.common.model.HoodieRecord;
|
||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||
@@ -259,8 +260,13 @@ public class HoodieInputFormat extends MapredParquetInputFormat
|
||||
*/
|
||||
private HoodieTableMetaClient getTableMetaClient(Path dataPath) throws IOException {
|
||||
FileSystem fs = dataPath.getFileSystem(conf);
|
||||
// TODO - remove this hard-coding. Pass this in job conf, somehow. Or read the Table Location
|
||||
Path baseDir = dataPath.getParent().getParent().getParent();
|
||||
int levels = HoodieHiveUtil.DEFAULT_LEVELS_TO_BASEPATH;
|
||||
if (HoodiePartitionMetadata.hasPartitionMetadata(fs, dataPath)) {
|
||||
HoodiePartitionMetadata metadata = new HoodiePartitionMetadata(fs, dataPath);
|
||||
metadata.readFromFS();
|
||||
levels = metadata.getPartitionDepth();
|
||||
}
|
||||
Path baseDir = HoodieHiveUtil.getNthParent(dataPath, levels);
|
||||
LOG.info("Reading hoodie metadata from path " + baseDir.toString());
|
||||
return new HoodieTableMetaClient(fs, baseDir.toString());
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
package com.uber.hoodie.hadoop;
|
||||
|
||||
import com.uber.hoodie.common.model.HoodieDataFile;
|
||||
import com.uber.hoodie.common.model.HoodiePartitionMetadata;
|
||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||
import com.uber.hoodie.common.table.view.HoodieTableFileSystemView;
|
||||
import com.uber.hoodie.exception.DatasetNotFoundException;
|
||||
@@ -117,7 +118,15 @@ public class HoodieROTablePathFilter implements PathFilter, Serializable {
|
||||
}
|
||||
|
||||
// Perform actual checking.
|
||||
Path baseDir = safeGetParentsParent(folder);
|
||||
Path baseDir;
|
||||
if (HoodiePartitionMetadata.hasPartitionMetadata(fs, folder)) {
|
||||
HoodiePartitionMetadata metadata = new HoodiePartitionMetadata(fs, folder);
|
||||
metadata.readFromFS();
|
||||
baseDir = HoodieHiveUtil.getNthParent(folder, metadata.getPartitionDepth());
|
||||
} else {
|
||||
baseDir = safeGetParentsParent(folder);
|
||||
}
|
||||
|
||||
if (baseDir != null) {
|
||||
try {
|
||||
HoodieTableMetaClient metaClient =
|
||||
|
||||
Reference in New Issue
Block a user