1
0

[HUDI-3743] Support DELETE_PARTITION for metadata table (#5169)

In order to drop any metadata partition (index), we can reuse the DELETE_PARTITION operation in metadata table. Subsequent to this, we can support drop index (with table config update) for async metadata indexer.

- Add a new API in HoodieTableMetadataWriter
- Current only supported for Spark metadata writer
This commit is contained in:
Sagar Sumit
2022-04-01 06:59:17 +05:30
committed by GitHub
parent 28dafa774e
commit a048e940fd
10 changed files with 170 additions and 30 deletions

View File

@@ -18,13 +18,6 @@
package org.apache.hudi.metadata;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.avro.model.HoodieMetadataBloomFilter;
import org.apache.hudi.avro.model.HoodieMetadataColumnStats;
import org.apache.hudi.avro.model.HoodieMetadataFileInfo;
@@ -42,6 +35,14 @@ import org.apache.hudi.common.util.hash.PartitionIndexID;
import org.apache.hudi.exception.HoodieMetadataException;
import org.apache.hudi.io.storage.HoodieHFileReader;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Arrays;
@@ -239,6 +240,23 @@ public class HoodieMetadataPayload implements HoodieRecordPayload<HoodieMetadata
return new HoodieAvroRecord<>(key, payload);
}
/**
* Create and return a {@code HoodieMetadataPayload} to save list of partitions.
*
* @param partitionsAdded The list of added partitions
* @param partitionsDeleted The list of deleted partitions
*/
public static HoodieRecord<HoodieMetadataPayload> createPartitionListRecord(List<String> partitionsAdded, List<String> partitionsDeleted) {
Map<String, HoodieMetadataFileInfo> fileInfo = new HashMap<>();
partitionsAdded.forEach(partition -> fileInfo.put(partition, new HoodieMetadataFileInfo(0L, false)));
partitionsDeleted.forEach(partition -> fileInfo.put(partition, new HoodieMetadataFileInfo(0L, true)));
HoodieKey key = new HoodieKey(RECORDKEY_PARTITION_LIST, MetadataPartitionType.FILES.getPartitionPath());
HoodieMetadataPayload payload = new HoodieMetadataPayload(key.getRecordKey(), METADATA_TYPE_PARTITION_LIST,
fileInfo);
return new HoodieAvroRecord<>(key, payload);
}
/**
* Create and return a {@code HoodieMetadataPayload} to save list of files within a partition.
*

View File

@@ -18,11 +18,6 @@
package org.apache.hudi.metadata;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.avro.model.HoodieCleanMetadata;
import org.apache.hudi.avro.model.HoodieMetadataColumnStats;
import org.apache.hudi.avro.model.HoodieRestoreMetadata;
@@ -38,7 +33,9 @@ import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieDeltaWriteStat;
import org.apache.hudi.common.model.HoodieFileFormat;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieReplaceCommitMetadata;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.TableSchemaResolver;
@@ -58,10 +55,17 @@ import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.exception.HoodieMetadataException;
import org.apache.hudi.io.storage.HoodieFileReader;
import org.apache.hudi.io.storage.HoodieFileReaderFactory;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import javax.annotation.Nonnull;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
@@ -195,9 +199,12 @@ public class HoodieTableMetadataUtil {
List<HoodieRecord> records = new ArrayList<>(commitMetadata.getPartitionToWriteStats().size());
// Add record bearing added partitions list
ArrayList<String> partitionsAdded = new ArrayList<>(commitMetadata.getPartitionToWriteStats().keySet());
List<String> partitionsAdded = new ArrayList<>(commitMetadata.getPartitionToWriteStats().keySet());
records.add(HoodieMetadataPayload.createPartitionListRecord(partitionsAdded));
// Add record bearing deleted partitions list
List<String> partitionsDeleted = getPartitionsDeleted(commitMetadata);
records.add(HoodieMetadataPayload.createPartitionListRecord(partitionsAdded, partitionsDeleted));
// Update files listing records for each individual partition
List<HoodieRecord<HoodieMetadataPayload>> updatedPartitionFilesRecords =
@@ -247,6 +254,18 @@ public class HoodieTableMetadataUtil {
return records;
}
private static ArrayList<String> getPartitionsDeleted(HoodieCommitMetadata commitMetadata) {
if (commitMetadata instanceof HoodieReplaceCommitMetadata
&& WriteOperationType.DELETE_PARTITION.equals(commitMetadata.getOperationType())) {
Map<String, List<String>> partitionToReplaceFileIds =
((HoodieReplaceCommitMetadata) commitMetadata).getPartitionToReplaceFileIds();
if (!partitionToReplaceFileIds.isEmpty()) {
return new ArrayList<>(partitionToReplaceFileIds.keySet());
}
}
return new ArrayList<>();
}
/**
* Convert commit action metadata to bloom filter records.
*
@@ -371,7 +390,7 @@ public class HoodieTableMetadataUtil {
records.add(HoodieMetadataPayload.createPartitionListRecord(deletedPartitions, true));
}
LOG.info("Updating at " + instantTime + " from Clean. #partitions_updated=" + records.size()
+ ", #files_deleted=" + fileDeleteCount[0]);
+ ", #files_deleted=" + fileDeleteCount[0] + ", #partitions_deleted=" + deletedPartitions.size());
return records;
}