From 9710b5a3a63633a3487431219b6af16b8753598f Mon Sep 17 00:00:00 2001 From: Balaji Varadarajan Date: Fri, 28 Sep 2018 21:41:28 -0700 Subject: [PATCH] Ensure Hoodie metadata folder and files are filtered out when constructing Parquet Data Source --- .../uber/hoodie/common/model/HoodieTestUtils.java | 10 ++++++++++ .../hoodie/hadoop/HoodieROTablePathFilter.java | 10 ++++++++++ .../hadoop/TestHoodieROTablePathFilter.java | 15 +++++++++++++++ 3 files changed, 35 insertions(+) diff --git a/hoodie-common/src/test/java/com/uber/hoodie/common/model/HoodieTestUtils.java b/hoodie-common/src/test/java/com/uber/hoodie/common/model/HoodieTestUtils.java index eda4bc77c..9049d81a4 100644 --- a/hoodie-common/src/test/java/com/uber/hoodie/common/model/HoodieTestUtils.java +++ b/hoodie-common/src/test/java/com/uber/hoodie/common/model/HoodieTestUtils.java @@ -219,6 +219,16 @@ public class HoodieTestUtils { return basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + commitTime + HoodieTimeline.COMMIT_EXTENSION; } + public static final String getInflightCommitFilePath(String basePath, String commitTime) throws IOException { + return basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + commitTime + + HoodieTimeline.INFLIGHT_COMMIT_EXTENSION; + } + + public static final String getRequestedCompactionFilePath(String basePath, String commitTime) throws IOException { + return basePath + "/" + HoodieTableMetaClient.AUXILIARYFOLDER_NAME + "/" + commitTime + + HoodieTimeline.INFLIGHT_COMMIT_EXTENSION; + } + public static final boolean doesDataFileExist(String basePath, String partitionPath, String commitTime, String fileID) throws IOException { return new File(getDataFilePath(basePath, partitionPath, commitTime, fileID)).exists(); diff --git a/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/HoodieROTablePathFilter.java b/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/HoodieROTablePathFilter.java index ae55ea21b..f408cb945 100644 --- a/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/HoodieROTablePathFilter.java +++ b/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/HoodieROTablePathFilter.java @@ -111,6 +111,16 @@ public class HoodieROTablePathFilter implements PathFilter, Serializable { return hoodiePathCache.get(folder.toString()).contains(path); } + // Skip all files that are descendants of .hoodie in its path. + String filePath = path.toString(); + if (filePath.contains("/" + HoodieTableMetaClient.METAFOLDER_NAME + "/") + || filePath.endsWith("/" + HoodieTableMetaClient.METAFOLDER_NAME)) { + if (LOG.isDebugEnabled()) { + LOG.debug(String.format("Skipping Hoodie Metadata file %s \n", filePath)); + } + return false; + } + // Perform actual checking. Path baseDir; if (HoodiePartitionMetadata.hasPartitionMetadata(fs, folder)) { diff --git a/hoodie-hadoop-mr/src/test/java/com/uber/hoodie/hadoop/TestHoodieROTablePathFilter.java b/hoodie-hadoop-mr/src/test/java/com/uber/hoodie/hadoop/TestHoodieROTablePathFilter.java index e139b7371..041e56db9 100644 --- a/hoodie-hadoop-mr/src/test/java/com/uber/hoodie/hadoop/TestHoodieROTablePathFilter.java +++ b/hoodie-hadoop-mr/src/test/java/com/uber/hoodie/hadoop/TestHoodieROTablePathFilter.java @@ -23,6 +23,7 @@ import com.uber.hoodie.common.model.HoodieTestUtils; import com.uber.hoodie.common.table.HoodieTableMetaClient; import java.io.File; import java.io.IOException; +import java.util.ArrayList; import org.apache.hadoop.fs.Path; import org.junit.Before; import org.junit.Rule; @@ -51,6 +52,7 @@ public class TestHoodieROTablePathFilter { HoodieTestUtils.createCommitFiles(basePath, "001", "002"); HoodieTestUtils.createInflightCommitFiles(basePath, "003"); + HoodieTestUtils.createCompactionRequest(metaClient, "004", new ArrayList<>()); HoodieTestUtils.createDataFile(basePath, "2017/01/01", "001", "f1"); HoodieTestUtils.createDataFile(basePath, "2017/01/01", "001", "f2"); @@ -72,6 +74,19 @@ public class TestHoodieROTablePathFilter { "file:///" + HoodieTestUtils.getDataFilePath(basePath, "2017/01/01", "002", "f2")))); assertFalse(pathFilter.accept(new Path( "file:///" + HoodieTestUtils.getDataFilePath(basePath, "2017/01/01", "003", "f3")))); + assertFalse(pathFilter.accept(new Path("file:///" + HoodieTestUtils.getCommitFilePath(basePath, "001")))); + assertFalse(pathFilter.accept(new Path("file:///" + HoodieTestUtils.getCommitFilePath(basePath, "002")))); + assertFalse(pathFilter.accept(new Path("file:///" + + HoodieTestUtils.getInflightCommitFilePath(basePath, "003")))); + assertFalse(pathFilter.accept(new Path("file:///" + + HoodieTestUtils.getRequestedCompactionFilePath(basePath, "004")))); + assertFalse(pathFilter.accept(new Path("file:///" + basePath + "/" + + HoodieTableMetaClient.METAFOLDER_NAME + "/"))); + assertFalse(pathFilter.accept(new Path("file:///" + basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME))); + + assertFalse(pathFilter.accept(new Path( + "file:///" + HoodieTestUtils.getDataFilePath(basePath, "2017/01/01", "003", "f3")))); + } @Test