[HUDI-3743] Support DELETE_PARTITION for metadata table (#5169)
In order to drop any metadata partition (index), we can reuse the DELETE_PARTITION operation in metadata table. Subsequent to this, we can support drop index (with table config update) for async metadata indexer. - Add a new API in HoodieTableMetadataWriter - Current only supported for Spark metadata writer
This commit is contained in:
@@ -18,13 +18,6 @@
|
||||
|
||||
package org.apache.hudi.metadata;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.avro.generic.IndexedRecord;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hudi.avro.model.HoodieMetadataBloomFilter;
|
||||
import org.apache.hudi.avro.model.HoodieMetadataColumnStats;
|
||||
import org.apache.hudi.avro.model.HoodieMetadataFileInfo;
|
||||
@@ -42,6 +35,14 @@ import org.apache.hudi.common.util.hash.PartitionIndexID;
|
||||
import org.apache.hudi.exception.HoodieMetadataException;
|
||||
import org.apache.hudi.io.storage.HoodieHFileReader;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.avro.generic.IndexedRecord;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Arrays;
|
||||
@@ -239,6 +240,23 @@ public class HoodieMetadataPayload implements HoodieRecordPayload<HoodieMetadata
|
||||
return new HoodieAvroRecord<>(key, payload);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create and return a {@code HoodieMetadataPayload} to save list of partitions.
|
||||
*
|
||||
* @param partitionsAdded The list of added partitions
|
||||
* @param partitionsDeleted The list of deleted partitions
|
||||
*/
|
||||
public static HoodieRecord<HoodieMetadataPayload> createPartitionListRecord(List<String> partitionsAdded, List<String> partitionsDeleted) {
|
||||
Map<String, HoodieMetadataFileInfo> fileInfo = new HashMap<>();
|
||||
partitionsAdded.forEach(partition -> fileInfo.put(partition, new HoodieMetadataFileInfo(0L, false)));
|
||||
partitionsDeleted.forEach(partition -> fileInfo.put(partition, new HoodieMetadataFileInfo(0L, true)));
|
||||
|
||||
HoodieKey key = new HoodieKey(RECORDKEY_PARTITION_LIST, MetadataPartitionType.FILES.getPartitionPath());
|
||||
HoodieMetadataPayload payload = new HoodieMetadataPayload(key.getRecordKey(), METADATA_TYPE_PARTITION_LIST,
|
||||
fileInfo);
|
||||
return new HoodieAvroRecord<>(key, payload);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create and return a {@code HoodieMetadataPayload} to save list of files within a partition.
|
||||
*
|
||||
|
||||
@@ -18,11 +18,6 @@
|
||||
|
||||
package org.apache.hudi.metadata;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.avro.generic.IndexedRecord;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hudi.avro.model.HoodieCleanMetadata;
|
||||
import org.apache.hudi.avro.model.HoodieMetadataColumnStats;
|
||||
import org.apache.hudi.avro.model.HoodieRestoreMetadata;
|
||||
@@ -38,7 +33,9 @@ import org.apache.hudi.common.model.HoodieCommitMetadata;
|
||||
import org.apache.hudi.common.model.HoodieDeltaWriteStat;
|
||||
import org.apache.hudi.common.model.HoodieFileFormat;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.model.HoodieReplaceCommitMetadata;
|
||||
import org.apache.hudi.common.model.HoodieWriteStat;
|
||||
import org.apache.hudi.common.model.WriteOperationType;
|
||||
import org.apache.hudi.common.table.HoodieTableConfig;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.TableSchemaResolver;
|
||||
@@ -58,10 +55,17 @@ import org.apache.hudi.exception.HoodieIOException;
|
||||
import org.apache.hudi.exception.HoodieMetadataException;
|
||||
import org.apache.hudi.io.storage.HoodieFileReader;
|
||||
import org.apache.hudi.io.storage.HoodieFileReaderFactory;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.avro.generic.IndexedRecord;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.ArrayList;
|
||||
@@ -195,9 +199,12 @@ public class HoodieTableMetadataUtil {
|
||||
List<HoodieRecord> records = new ArrayList<>(commitMetadata.getPartitionToWriteStats().size());
|
||||
|
||||
// Add record bearing added partitions list
|
||||
ArrayList<String> partitionsAdded = new ArrayList<>(commitMetadata.getPartitionToWriteStats().keySet());
|
||||
List<String> partitionsAdded = new ArrayList<>(commitMetadata.getPartitionToWriteStats().keySet());
|
||||
|
||||
records.add(HoodieMetadataPayload.createPartitionListRecord(partitionsAdded));
|
||||
// Add record bearing deleted partitions list
|
||||
List<String> partitionsDeleted = getPartitionsDeleted(commitMetadata);
|
||||
|
||||
records.add(HoodieMetadataPayload.createPartitionListRecord(partitionsAdded, partitionsDeleted));
|
||||
|
||||
// Update files listing records for each individual partition
|
||||
List<HoodieRecord<HoodieMetadataPayload>> updatedPartitionFilesRecords =
|
||||
@@ -247,6 +254,18 @@ public class HoodieTableMetadataUtil {
|
||||
return records;
|
||||
}
|
||||
|
||||
private static ArrayList<String> getPartitionsDeleted(HoodieCommitMetadata commitMetadata) {
|
||||
if (commitMetadata instanceof HoodieReplaceCommitMetadata
|
||||
&& WriteOperationType.DELETE_PARTITION.equals(commitMetadata.getOperationType())) {
|
||||
Map<String, List<String>> partitionToReplaceFileIds =
|
||||
((HoodieReplaceCommitMetadata) commitMetadata).getPartitionToReplaceFileIds();
|
||||
if (!partitionToReplaceFileIds.isEmpty()) {
|
||||
return new ArrayList<>(partitionToReplaceFileIds.keySet());
|
||||
}
|
||||
}
|
||||
return new ArrayList<>();
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert commit action metadata to bloom filter records.
|
||||
*
|
||||
@@ -371,7 +390,7 @@ public class HoodieTableMetadataUtil {
|
||||
records.add(HoodieMetadataPayload.createPartitionListRecord(deletedPartitions, true));
|
||||
}
|
||||
LOG.info("Updating at " + instantTime + " from Clean. #partitions_updated=" + records.size()
|
||||
+ ", #files_deleted=" + fileDeleteCount[0]);
|
||||
+ ", #files_deleted=" + fileDeleteCount[0] + ", #partitions_deleted=" + deletedPartitions.size());
|
||||
return records;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user