From 1e285dc3999c56a3302b5ed9e5c38a6bcb884f92 Mon Sep 17 00:00:00 2001 From: zhangyue19921010 <69956021+zhangyue19921010@users.noreply.github.com> Date: Sat, 23 Oct 2021 00:03:58 +0800 Subject: [PATCH] [HUDI-2489]Tuning HoodieROTablePathFilter by caching hoodieTableFileSystemView, aiming to reduce unnecessary list/get requests (#3719) Co-authored-by: yuezhang --- .../hudi/hadoop/HoodieROTablePathFilter.java | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java index d94018b88..c797f59ef 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java @@ -78,6 +78,11 @@ public class HoodieROTablePathFilter implements Configurable, PathFilter, Serial */ Map metaClientCache; + /** + * HoodieTableFileSystemView Cache. + */ + private Map hoodieTableFileSystemViewCache; + /** * Hadoop configurations for the FileSystem. */ @@ -97,6 +102,7 @@ public class HoodieROTablePathFilter implements Configurable, PathFilter, Serial this.nonHoodiePathCache = new HashSet<>(); this.conf = new SerializableConfiguration(conf); this.metaClientCache = new HashMap<>(); + this.hoodieTableFileSystemViewCache = new HashMap<>(); } /** @@ -175,8 +181,15 @@ public class HoodieROTablePathFilter implements Configurable, PathFilter, Serial metaClientCache.put(baseDir.toString(), metaClient); } - fsView = FileSystemViewManager.createInMemoryFileSystemView(engineContext, - metaClient, HoodieInputFormatUtils.buildMetadataConfig(getConf())); + HoodieTableMetaClient finalMetaClient = metaClient; + fsView = hoodieTableFileSystemViewCache.computeIfAbsent(baseDir.toString(), key -> + FileSystemViewManager.createInMemoryFileSystemView( + engineContext, + finalMetaClient, + HoodieInputFormatUtils.buildMetadataConfig(getConf()) + ) + ); + String partition = FSUtils.getRelativePartitionPath(new Path(metaClient.getBasePath()), folder); List latestFiles = fsView.getLatestBaseFiles(partition).collect(Collectors.toList()); // populate the cache @@ -202,10 +215,6 @@ public class HoodieROTablePathFilter implements Configurable, PathFilter, Serial } nonHoodiePathCache.add(folder.toString()); return true; - } finally { - if (fsView != null) { - fsView.close(); - } } } else { // files is at < 3 level depth in FS tree, can't be hoodie dataset