From f612a20815d35667ce49f0dbf51b6877bf7b1ab4 Mon Sep 17 00:00:00 2001 From: RexAn Date: Thu, 9 Dec 2021 18:34:11 +0800 Subject: [PATCH] [HUDI-2779] Cache BaseDir if HudiTableNotFound Exception thrown (#4014) --- .../apache/hudi/hadoop/HoodieROTablePathFilter.java | 12 ++++++++++-- .../hudi/hadoop/TestHoodieROTablePathFilter.java | 4 ++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java index d94018b88..32dfd7c1e 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java @@ -71,7 +71,7 @@ public class HoodieROTablePathFilter implements Configurable, PathFilter, Serial /** * Paths that are known to be non-hoodie tables. */ - private Set nonHoodiePathCache; + Set nonHoodiePathCache; /** * Table Meta Client Cache. @@ -167,6 +167,13 @@ public class HoodieROTablePathFilter implements Configurable, PathFilter, Serial } if (baseDir != null) { + // Check whether baseDir in nonHoodiePathCache + if (nonHoodiePathCache.contains(baseDir.toString())) { + if (LOG.isDebugEnabled()) { + LOG.debug("Accepting non-hoodie path from cache: " + path); + } + return true; + } HoodieTableFileSystemView fsView = null; try { HoodieTableMetaClient metaClient = metaClientCache.get(baseDir.toString()); @@ -198,9 +205,10 @@ public class HoodieROTablePathFilter implements Configurable, PathFilter, Serial } catch (TableNotFoundException e) { // Non-hoodie path, accept it. if (LOG.isDebugEnabled()) { - LOG.debug(String.format("(1) Caching non-hoodie path under %s \n", folder.toString())); + LOG.debug(String.format("(1) Caching non-hoodie path under %s with basePath %s \n", folder.toString(), baseDir.toString())); } nonHoodiePathCache.add(folder.toString()); + nonHoodiePathCache.add(baseDir.toString()); return true; } finally { if (fsView != null) { diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieROTablePathFilter.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieROTablePathFilter.java index ba88df3e4..260afd5ce 100644 --- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieROTablePathFilter.java +++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieROTablePathFilter.java @@ -68,9 +68,11 @@ public class TestHoodieROTablePathFilter extends HoodieCommonTestHarness { assertFalse(pathFilter.accept(testTable.getInflightCommitFilePath("003"))); assertFalse(pathFilter.accept(testTable.getRequestedCompactionFilePath("004"))); assertFalse(pathFilter.accept(new Path("file:///" + basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"))); + assertFalse(pathFilter.accept(new Path("file:///" + basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/hoodie.properties"))); assertFalse(pathFilter.accept(new Path("file:///" + basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME))); assertEquals(1, pathFilter.metaClientCache.size()); + assertEquals(0, pathFilter.nonHoodiePathCache.size(), "NonHoodiePathCache size should be 0"); } @Test @@ -82,6 +84,7 @@ public class TestHoodieROTablePathFilter extends HoodieCommonTestHarness { java.nio.file.Path path2 = Paths.get(basePath, "nonhoodiefolder/somefile"); Files.createFile(path2); assertTrue(pathFilter.accept(new Path(path2.toUri()))); + assertEquals(2, pathFilter.nonHoodiePathCache.size(), "NonHoodiePathCache size should be 2"); } @Test @@ -93,5 +96,6 @@ public class TestHoodieROTablePathFilter extends HoodieCommonTestHarness { Path partitionPath2 = testTable.getPartitionPath(p2).getParent(); assertTrue(pathFilter.accept(partitionPath1), "Directories should be accepted"); assertTrue(pathFilter.accept(partitionPath2), "Directories should be accepted"); + assertEquals(2, pathFilter.nonHoodiePathCache.size(), "NonHoodiePathCache size should be 2"); } }