1
0

[HUDI-2952] Fixing metadata table for non-partitioned dataset (#4243)

This commit is contained in:
Sivabalan Narayanan
2021-12-10 11:11:42 -05:00
committed by GitHub
parent f194566ed4
commit be368264f4
9 changed files with 104 additions and 33 deletions

View File

@@ -48,6 +48,7 @@ public interface HoodieTableMetadata extends Serializable, AutoCloseable {
String RECORDKEY_PARTITION_LIST = "__all_partitions__";
// The partition name used for non-partitioned tables
String NON_PARTITIONED_NAME = ".";
String EMPTY_PARTITION_NAME = "";
// Base path of the Metadata Table relative to the dataset (.hoodie/metadata)
static final String METADATA_TABLE_REL_PATH = HoodieTableMetaClient.METAFOLDER_NAME + Path.SEPARATOR + "metadata";

View File

@@ -52,6 +52,7 @@ import java.util.function.BiFunction;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static org.apache.hudi.metadata.HoodieTableMetadata.EMPTY_PARTITION_NAME;
import static org.apache.hudi.metadata.HoodieTableMetadata.NON_PARTITIONED_NAME;
/**
@@ -89,7 +90,7 @@ public class HoodieTableMetadataUtil {
List<HoodieRecord> records = new LinkedList<>();
List<String> allPartitions = new LinkedList<>();
commitMetadata.getPartitionToWriteStats().forEach((partitionStatName, writeStats) -> {
final String partition = partitionStatName.equals("") ? NON_PARTITIONED_NAME : partitionStatName;
final String partition = partitionStatName.equals(EMPTY_PARTITION_NAME) ? NON_PARTITIONED_NAME : partitionStatName;
allPartitions.add(partition);
Map<String, Long> newFiles = new HashMap<>(writeStats.size());
@@ -133,7 +134,8 @@ public class HoodieTableMetadataUtil {
public static List<HoodieRecord> convertMetadataToRecords(HoodieCleanMetadata cleanMetadata, String instantTime) {
List<HoodieRecord> records = new LinkedList<>();
int[] fileDeleteCount = {0};
cleanMetadata.getPartitionMetadata().forEach((partition, partitionMetadata) -> {
cleanMetadata.getPartitionMetadata().forEach((partitionName, partitionMetadata) -> {
final String partition = partitionName.equals(EMPTY_PARTITION_NAME) ? NON_PARTITIONED_NAME : partitionName;
// Files deleted from a partition
List<String> deletedFiles = partitionMetadata.getDeletePathPatterns();
HoodieRecord record = HoodieMetadataPayload.createPartitionFilesRecord(partition, Option.empty(),
@@ -282,12 +284,13 @@ public class HoodieTableMetadataUtil {
List<HoodieRecord> records = new LinkedList<>();
int[] fileChangeCount = {0, 0}; // deletes, appends
partitionToDeletedFiles.forEach((partition, deletedFiles) -> {
partitionToDeletedFiles.forEach((partitionName, deletedFiles) -> {
fileChangeCount[0] += deletedFiles.size();
final String partition = partitionName.equals(EMPTY_PARTITION_NAME) ? NON_PARTITIONED_NAME : partitionName;
Option<Map<String, Long>> filesAdded = Option.empty();
if (partitionToAppendedFiles.containsKey(partition)) {
filesAdded = Option.of(partitionToAppendedFiles.remove(partition));
if (partitionToAppendedFiles.containsKey(partitionName)) {
filesAdded = Option.of(partitionToAppendedFiles.remove(partitionName));
}
HoodieRecord record = HoodieMetadataPayload.createPartitionFilesRecord(partition, filesAdded,
@@ -295,7 +298,8 @@ public class HoodieTableMetadataUtil {
records.add(record);
});
partitionToAppendedFiles.forEach((partition, appendedFileMap) -> {
partitionToAppendedFiles.forEach((partitionName, appendedFileMap) -> {
final String partition = partitionName.equals(EMPTY_PARTITION_NAME) ? NON_PARTITIONED_NAME : partitionName;
fileChangeCount[1] += appendedFileMap.size();
// Validate that no appended file has been deleted

View File

@@ -367,7 +367,9 @@ public class FileCreateUtils {
if (Files.notExists(basePath)) {
return Collections.emptyList();
}
return Files.list(basePath).filter(entry -> !entry.getFileName().toString().equals(HoodieTableMetaClient.METAFOLDER_NAME)).collect(Collectors.toList());
return Files.list(basePath).filter(entry -> (!entry.getFileName().toString().equals(HoodieTableMetaClient.METAFOLDER_NAME)
&& !entry.getFileName().toString().contains("parquet") && !entry.getFileName().toString().contains("log"))
&& !entry.getFileName().toString().endsWith(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE)).collect(Collectors.toList());
}
/**

View File

@@ -602,7 +602,7 @@ public class HoodieTestTable {
}
public List<java.nio.file.Path> getAllPartitionPaths() throws IOException {
java.nio.file.Path basePathPath = Paths.get(basePath, HoodieTableMetaClient.TEMPFOLDER_NAME).getParent().getParent();
java.nio.file.Path basePathPath = Paths.get(basePath);
return FileCreateUtils.getPartitionPaths(basePathPath);
}
@@ -660,8 +660,10 @@ public class HoodieTestTable {
return FileSystemTestUtils.listRecursive(fs, new Path(Paths.get(basePath, partitionPath).toString())).stream()
.filter(entry -> {
boolean toReturn = true;
String filePath = entry.getPath().toString();
String fileName = entry.getPath().getName();
if (fileName.equals(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE)) {
if (fileName.equals(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE) || (!fileName.contains("log") && !fileName.contains("parquet"))
|| filePath.contains("metadata")) {
toReturn = false;
} else {
for (String inflight : inflightCommits) {