From d1bb804577168107f347b10913538e13b180d004 Mon Sep 17 00:00:00 2001 From: Nishith Agarwal Date: Fri, 4 Jan 2019 15:01:49 -0800 Subject: [PATCH] Passing a path filter to avoid including folders under .hoodie directory as partition paths --- .../java/com/uber/hoodie/common/util/FSUtils.java | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/util/FSUtils.java b/hoodie-common/src/main/java/com/uber/hoodie/common/util/FSUtils.java index ba9cef9ed..a18c6539f 100644 --- a/hoodie-common/src/main/java/com/uber/hoodie/common/util/FSUtils.java +++ b/hoodie-common/src/main/java/com/uber/hoodie/common/util/FSUtils.java @@ -19,6 +19,7 @@ package com.uber.hoodie.common.util; import com.google.common.base.Preconditions; import com.uber.hoodie.common.model.HoodieLogFile; import com.uber.hoodie.common.model.HoodiePartitionMetadata; +import com.uber.hoodie.common.table.HoodieTableMetaClient; import com.uber.hoodie.common.table.timeline.HoodieInstant; import com.uber.hoodie.exception.HoodieIOException; import com.uber.hoodie.exception.InvalidHoodiePathException; @@ -38,6 +39,7 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.log4j.LogManager; @@ -135,7 +137,14 @@ public class FSUtils { public static List getAllFoldersThreeLevelsDown(FileSystem fs, String basePath) throws IOException { List datePartitions = new ArrayList<>(); - FileStatus[] folders = fs.globStatus(new Path(basePath + "/*/*/*")); + // Avoid listing and including any folders under the metafolder + PathFilter filter = (path) -> { + if (path.toString().contains(HoodieTableMetaClient.METAFOLDER_NAME)) { + return false; + } + return true; + }; + FileStatus[] folders = fs.globStatus(new Path(basePath + "/*/*/*"), filter); for (FileStatus status : folders) { Path path = status.getPath(); datePartitions.add(String.format("%s/%s/%s", path.getParent().getParent().getName(),