1
0

[HUDI-2489]Tuning HoodieROTablePathFilter by caching hoodieTableFileSystemView, aiming to reduce unnecessary list/get requests (#3719)

Co-authored-by: yuezhang <yuezhang@freewheel.tv>
This commit is contained in:
zhangyue19921010
2021-10-23 00:03:58 +08:00
committed by GitHub
parent 499af7c039
commit 1e285dc399

View File

@@ -78,6 +78,11 @@ public class HoodieROTablePathFilter implements Configurable, PathFilter, Serial
*/ */
Map<String, HoodieTableMetaClient> metaClientCache; Map<String, HoodieTableMetaClient> metaClientCache;
/**
* HoodieTableFileSystemView Cache.
*/
private Map<String, HoodieTableFileSystemView> hoodieTableFileSystemViewCache;
/** /**
* Hadoop configurations for the FileSystem. * Hadoop configurations for the FileSystem.
*/ */
@@ -97,6 +102,7 @@ public class HoodieROTablePathFilter implements Configurable, PathFilter, Serial
this.nonHoodiePathCache = new HashSet<>(); this.nonHoodiePathCache = new HashSet<>();
this.conf = new SerializableConfiguration(conf); this.conf = new SerializableConfiguration(conf);
this.metaClientCache = new HashMap<>(); this.metaClientCache = new HashMap<>();
this.hoodieTableFileSystemViewCache = new HashMap<>();
} }
/** /**
@@ -175,8 +181,15 @@ public class HoodieROTablePathFilter implements Configurable, PathFilter, Serial
metaClientCache.put(baseDir.toString(), metaClient); metaClientCache.put(baseDir.toString(), metaClient);
} }
fsView = FileSystemViewManager.createInMemoryFileSystemView(engineContext, HoodieTableMetaClient finalMetaClient = metaClient;
metaClient, HoodieInputFormatUtils.buildMetadataConfig(getConf())); fsView = hoodieTableFileSystemViewCache.computeIfAbsent(baseDir.toString(), key ->
FileSystemViewManager.createInMemoryFileSystemView(
engineContext,
finalMetaClient,
HoodieInputFormatUtils.buildMetadataConfig(getConf())
)
);
String partition = FSUtils.getRelativePartitionPath(new Path(metaClient.getBasePath()), folder); String partition = FSUtils.getRelativePartitionPath(new Path(metaClient.getBasePath()), folder);
List<HoodieBaseFile> latestFiles = fsView.getLatestBaseFiles(partition).collect(Collectors.toList()); List<HoodieBaseFile> latestFiles = fsView.getLatestBaseFiles(partition).collect(Collectors.toList());
// populate the cache // populate the cache
@@ -202,10 +215,6 @@ public class HoodieROTablePathFilter implements Configurable, PathFilter, Serial
} }
nonHoodiePathCache.add(folder.toString()); nonHoodiePathCache.add(folder.toString());
return true; return true;
} finally {
if (fsView != null) {
fsView.close();
}
} }
} else { } else {
// files is at < 3 level depth in FS tree, can't be hoodie dataset // files is at < 3 level depth in FS tree, can't be hoodie dataset