[HUDI-920] Support Incremental query for MOR table (#1938)
This commit is contained in:
@@ -23,6 +23,7 @@ import org.apache.hudi.common.model.HoodieBaseFile;
|
||||
import org.apache.hudi.common.model.HoodieCommitMetadata;
|
||||
import org.apache.hudi.common.model.HoodieFileFormat;
|
||||
import org.apache.hudi.common.model.HoodiePartitionMetadata;
|
||||
import org.apache.hudi.common.model.HoodieWriteStat;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.timeline.HoodieDefaultTimeline;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
@@ -470,4 +471,44 @@ public class HoodieInputFormatUtils {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Iterate through a list of commits in ascending order, and extract the file status of
|
||||
* all affected files from the commits metadata grouping by partition path. If the files has
|
||||
* been touched multiple times in the given commits, the return value will keep the one
|
||||
* from the latest commit.
|
||||
* @param basePath
|
||||
* @param commitsToCheck
|
||||
* @param timeline
|
||||
* @return HashMap<partitionPath, HashMap<fileName, FileStatus>>
|
||||
* @throws IOException
|
||||
*/
|
||||
public static HashMap<String, HashMap<String, FileStatus>> listAffectedFilesForCommits(
|
||||
Path basePath, List<HoodieInstant> commitsToCheck, HoodieTimeline timeline) throws IOException {
|
||||
// TODO: Use HoodieMetaTable to extract affected file directly.
|
||||
HashMap<String, HashMap<String, FileStatus>> partitionToFileStatusesMap = new HashMap<>();
|
||||
List<HoodieInstant> sortedCommitsToCheck = new ArrayList<>(commitsToCheck);
|
||||
sortedCommitsToCheck.sort(HoodieInstant::compareTo);
|
||||
// Iterate through the given commits.
|
||||
for (HoodieInstant commit: sortedCommitsToCheck) {
|
||||
HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(timeline.getInstantDetails(commit).get(),
|
||||
HoodieCommitMetadata.class);
|
||||
// Iterate through all the affected partitions of a commit.
|
||||
for (Map.Entry<String, List<HoodieWriteStat>> entry: commitMetadata.getPartitionToWriteStats().entrySet()) {
|
||||
if (!partitionToFileStatusesMap.containsKey(entry.getKey())) {
|
||||
partitionToFileStatusesMap.put(entry.getKey(), new HashMap<>());
|
||||
}
|
||||
// Iterate through all the written files of this partition.
|
||||
for (HoodieWriteStat stat : entry.getValue()) {
|
||||
String relativeFilePath = stat.getPath();
|
||||
Path fullPath = relativeFilePath != null ? FSUtils.getPartitionPath(basePath, relativeFilePath) : null;
|
||||
if (fullPath != null) {
|
||||
FileStatus fs = new FileStatus(stat.getFileSizeInBytes(), false, 0, 0,
|
||||
0, fullPath);
|
||||
partitionToFileStatusesMap.get(entry.getKey()).put(fullPath.getName(), fs);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return partitionToFileStatusesMap;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user