1
0

Create .hoodie_partition_metadata in each partition, linking back to basepath

- Concurreny handled via taskID, failure recovery handled via renames
 - Falls back to search 3 levels up
 - Cli tool has command to add this to existing tables
This commit is contained in:
Vinoth Chandar
2017-03-21 23:57:30 -07:00
committed by vinoth chandar
parent 1e802ad4f2
commit 3129770fd0
10 changed files with 291 additions and 86 deletions

View File

@@ -16,6 +16,7 @@
package com.uber.hoodie.hadoop;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
@@ -32,6 +33,7 @@ public class HoodieHiveUtil {
public static final String DEFAULT_SCAN_MODE = LATEST_SCAN_MODE;
public static final int DEFAULT_MAX_COMMITS = 1;
public static final int MAX_COMMIT_ALL = -1;
public static final int DEFAULT_LEVELS_TO_BASEPATH = 3;
public static Integer readMaxCommits(JobContext job, String tableName) {
String maxCommitName = String.format(HOODIE_MAX_COMMIT_PATTERN, tableName);
@@ -55,4 +57,12 @@ public class HoodieHiveUtil {
LOG.info(modePropertyName + ": " + mode);
return mode;
}
public static Path getNthParent(Path path, int n) {
Path parent = path;
for (int i = 0; i < n; i++) {
parent = parent.getParent();
}
return parent;
}
}