1
0

[HUDI-4400] Fix missing bloom filters in metadata table in non-partitioned table (#6113)

Fixes the missing bloom filters in metadata table in the non-partitioned table due to incorrect record key generation, because of wrong file names when generating the metadata payload for the bloom filter.
This commit is contained in:
Y Ethan Guo
2022-07-21 11:38:25 -07:00
committed by GitHub
parent f52b93fd10
commit 50cdb867c7
4 changed files with 190 additions and 50 deletions

View File

@@ -55,6 +55,7 @@ import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.exception.HoodieMetadataException;
import org.apache.hudi.io.storage.HoodieFileReader;
import org.apache.hudi.io.storage.HoodieFileReaderFactory;
import org.apache.hudi.util.Lazy;
import org.apache.avro.AvroTypeException;
import org.apache.avro.LogicalTypes;
@@ -63,7 +64,6 @@ import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.util.Lazy;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
@@ -409,8 +409,11 @@ public class HoodieTableMetadataUtil {
LOG.error("Failed to find path in write stat to update metadata table " + hoodieWriteStat);
return Collections.emptyListIterator();
}
int offset = partition.equals(NON_PARTITIONED_NAME) ? (pathWithPartition.startsWith("/") ? 1 : 0) :
partition.length() + 1;
// For partitioned table, "partition" contains the relative partition path;
// for non-partitioned table, "partition" is empty
int offset = StringUtils.isNullOrEmpty(partition)
? (pathWithPartition.startsWith("/") ? 1 : 0) : partition.length() + 1;
final String fileName = pathWithPartition.substring(offset);
if (!FSUtils.isBaseFile(new Path(fileName))) {

View File

@@ -19,17 +19,6 @@
package org.apache.hudi.common.testutils;
import org.apache.avro.Conversions;
import org.apache.avro.LogicalTypes;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericArray;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericFixed;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.avro.model.HoodieCompactionPlan;
import org.apache.hudi.common.fs.FSUtils;
@@ -47,6 +36,18 @@ import org.apache.hudi.common.util.AvroOrcUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.avro.Conversions;
import org.apache.avro.LogicalTypes;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericArray;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericFixed;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.orc.TypeDescription;

View File

@@ -52,6 +52,7 @@ import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
import org.apache.hudi.common.table.timeline.versioning.clean.CleanPlanV2MigrationHandler;
import org.apache.hudi.common.util.CompactionUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.common.util.ValidationUtils;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.exception.HoodieIOException;
@@ -1060,7 +1061,7 @@ public class HoodieTestTable {
FileCreateUtils.baseFileName(commitTime, fileIdInfo.getKey());
writeStat.setFileId(fileName);
writeStat.setPartitionPath(partition);
writeStat.setPath(partition + "/" + fileName);
writeStat.setPath(StringUtils.isNullOrEmpty(partition) ? fileName : partition + "/" + fileName);
writeStat.setTotalWriteBytes(fileIdInfo.getValue());
writeStat.setFileSizeInBytes(fileIdInfo.getValue());
writeStats.add(writeStat);
@@ -1086,7 +1087,7 @@ public class HoodieTestTable {
FileCreateUtils.logFileName(commitTime, fileIdInfo.getKey(), fileIdInfo.getValue()[0]);
writeStat.setFileId(fileName);
writeStat.setPartitionPath(partition);
writeStat.setPath(partition + "/" + fileName);
writeStat.setPath(StringUtils.isNullOrEmpty(partition) ? fileName : partition + "/" + fileName);
writeStat.setTotalWriteBytes(fileIdInfo.getValue()[1]);
writeStat.setFileSizeInBytes(fileIdInfo.getValue()[1]);
writeStats.add(writeStat);