[HUDI-2489]Tuning HoodieROTablePathFilter by caching hoodieTableFileSystemView, aiming to reduce unnecessary list/get requests (#3719)
Co-authored-by: yuezhang <yuezhang@freewheel.tv>
This commit is contained in:
@@ -78,6 +78,11 @@ public class HoodieROTablePathFilter implements Configurable, PathFilter, Serial
|
|||||||
*/
|
*/
|
||||||
Map<String, HoodieTableMetaClient> metaClientCache;
|
Map<String, HoodieTableMetaClient> metaClientCache;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* HoodieTableFileSystemView Cache.
|
||||||
|
*/
|
||||||
|
private Map<String, HoodieTableFileSystemView> hoodieTableFileSystemViewCache;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Hadoop configurations for the FileSystem.
|
* Hadoop configurations for the FileSystem.
|
||||||
*/
|
*/
|
||||||
@@ -97,6 +102,7 @@ public class HoodieROTablePathFilter implements Configurable, PathFilter, Serial
|
|||||||
this.nonHoodiePathCache = new HashSet<>();
|
this.nonHoodiePathCache = new HashSet<>();
|
||||||
this.conf = new SerializableConfiguration(conf);
|
this.conf = new SerializableConfiguration(conf);
|
||||||
this.metaClientCache = new HashMap<>();
|
this.metaClientCache = new HashMap<>();
|
||||||
|
this.hoodieTableFileSystemViewCache = new HashMap<>();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -175,8 +181,15 @@ public class HoodieROTablePathFilter implements Configurable, PathFilter, Serial
|
|||||||
metaClientCache.put(baseDir.toString(), metaClient);
|
metaClientCache.put(baseDir.toString(), metaClient);
|
||||||
}
|
}
|
||||||
|
|
||||||
fsView = FileSystemViewManager.createInMemoryFileSystemView(engineContext,
|
HoodieTableMetaClient finalMetaClient = metaClient;
|
||||||
metaClient, HoodieInputFormatUtils.buildMetadataConfig(getConf()));
|
fsView = hoodieTableFileSystemViewCache.computeIfAbsent(baseDir.toString(), key ->
|
||||||
|
FileSystemViewManager.createInMemoryFileSystemView(
|
||||||
|
engineContext,
|
||||||
|
finalMetaClient,
|
||||||
|
HoodieInputFormatUtils.buildMetadataConfig(getConf())
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
String partition = FSUtils.getRelativePartitionPath(new Path(metaClient.getBasePath()), folder);
|
String partition = FSUtils.getRelativePartitionPath(new Path(metaClient.getBasePath()), folder);
|
||||||
List<HoodieBaseFile> latestFiles = fsView.getLatestBaseFiles(partition).collect(Collectors.toList());
|
List<HoodieBaseFile> latestFiles = fsView.getLatestBaseFiles(partition).collect(Collectors.toList());
|
||||||
// populate the cache
|
// populate the cache
|
||||||
@@ -202,10 +215,6 @@ public class HoodieROTablePathFilter implements Configurable, PathFilter, Serial
|
|||||||
}
|
}
|
||||||
nonHoodiePathCache.add(folder.toString());
|
nonHoodiePathCache.add(folder.toString());
|
||||||
return true;
|
return true;
|
||||||
} finally {
|
|
||||||
if (fsView != null) {
|
|
||||||
fsView.close();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// files is at < 3 level depth in FS tree, can't be hoodie dataset
|
// files is at < 3 level depth in FS tree, can't be hoodie dataset
|
||||||
|
|||||||
Reference in New Issue
Block a user