[HUDI-2593] Virtual keys support for metadata table (#3968)
- Metadata table today has virtual keys disabled, thereby populating the metafields for each record written out and increasing the overall storage space used. Hereby adding virtual keys support for metadata table so that metafields are disabled for metadata table records. - Adding a custom KeyGenerator for Metadata table so as to not rely on the default Base/SimpleKeyGenerators which currently look for record key and partition field set in the table config. - AbstractHoodieLogRecordReader's version of processing next data block and createHoodieRecord() will be a generic version and making the derived class HoodieMetadataMergedLogRecordReader take care of the special creation of records from explictly passed in partition names.
This commit is contained in:
committed by
GitHub
parent
eba354e922
commit
459b34240b
@@ -360,7 +360,12 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload, I, K, O> extends
|
||||
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, writeSchemaWithMetaFields.toString());
|
||||
List<HoodieLogBlock> blocks = new ArrayList<>(2);
|
||||
if (recordList.size() > 0) {
|
||||
blocks.add(HoodieDataBlock.getBlock(hoodieTable.getLogDataBlockFormat(), recordList, header));
|
||||
if (config.populateMetaFields()) {
|
||||
blocks.add(HoodieDataBlock.getBlock(hoodieTable.getLogDataBlockFormat(), recordList, header));
|
||||
} else {
|
||||
final String keyField = hoodieTable.getMetaClient().getTableConfig().getRecordKeyFieldProp();
|
||||
blocks.add(HoodieDataBlock.getBlock(hoodieTable.getLogDataBlockFormat(), recordList, header, keyField));
|
||||
}
|
||||
}
|
||||
if (keysToDelete.size() > 0) {
|
||||
blocks.add(new HoodieDeleteBlock(keysToDelete.toArray(new HoodieKey[keysToDelete.size()]), header));
|
||||
|
||||
@@ -41,6 +41,7 @@ import org.apache.hudi.common.model.HoodiePartitionMetadata;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.model.HoodieTableType;
|
||||
import org.apache.hudi.common.model.WriteConcurrencyMode;
|
||||
import org.apache.hudi.common.table.HoodieTableConfig;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.log.HoodieLogFormat;
|
||||
import org.apache.hudi.common.table.log.block.HoodieDeleteBlock;
|
||||
@@ -76,6 +77,7 @@ import java.util.HashMap;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static org.apache.hudi.common.table.HoodieTableConfig.ARCHIVELOG_FOLDER;
|
||||
@@ -91,6 +93,10 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
|
||||
|
||||
private static final Logger LOG = LogManager.getLogger(HoodieBackedTableMetadataWriter.class);
|
||||
|
||||
// Virtual keys support for metadata table. This Field is
|
||||
// from the metadata payload schema.
|
||||
private static final String RECORD_KEY_FIELD = HoodieMetadataPayload.SCHEMA_FIELD_ID_KEY;
|
||||
|
||||
protected HoodieWriteConfig metadataWriteConfig;
|
||||
protected HoodieWriteConfig dataWriteConfig;
|
||||
protected String tableName;
|
||||
@@ -202,7 +208,15 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
|
||||
.withDeleteParallelism(parallelism)
|
||||
.withRollbackParallelism(parallelism)
|
||||
.withFinalizeWriteParallelism(parallelism)
|
||||
.withAllowMultiWriteOnSameInstant(true);
|
||||
.withAllowMultiWriteOnSameInstant(true)
|
||||
.withKeyGenerator(HoodieTableMetadataKeyGenerator.class.getCanonicalName())
|
||||
.withPopulateMetaFields(dataWriteConfig.getMetadataConfig().populateMetaFields());
|
||||
|
||||
// RecordKey properties are needed for the metadata table records
|
||||
final Properties properties = new Properties();
|
||||
properties.put(HoodieTableConfig.RECORDKEY_FIELDS.key(), RECORD_KEY_FIELD);
|
||||
properties.put("hoodie.datasource.write.recordkey.field", RECORD_KEY_FIELD);
|
||||
builder.withProperties(properties);
|
||||
|
||||
if (writeConfig.isMetricsOn()) {
|
||||
builder.withMetricsConfig(HoodieMetricsConfig.newBuilder()
|
||||
@@ -395,9 +409,12 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
|
||||
.setTableType(HoodieTableType.MERGE_ON_READ)
|
||||
.setTableName(tableName)
|
||||
.setArchiveLogFolder(ARCHIVELOG_FOLDER.defaultValue())
|
||||
.setPayloadClassName(HoodieMetadataPayload.class.getName())
|
||||
.setBaseFileFormat(HoodieFileFormat.HFILE.toString())
|
||||
.initTable(hadoopConf.get(), metadataWriteConfig.getBasePath());
|
||||
.setPayloadClassName(HoodieMetadataPayload.class.getName())
|
||||
.setBaseFileFormat(HoodieFileFormat.HFILE.toString())
|
||||
.setRecordKeyFields(RECORD_KEY_FIELD)
|
||||
.setPopulateMetaFields(dataWriteConfig.getMetadataConfig().populateMetaFields())
|
||||
.setKeyGeneratorClassProp(HoodieTableMetadataKeyGenerator.class.getCanonicalName())
|
||||
.initTable(hadoopConf.get(), metadataWriteConfig.getBasePath());
|
||||
|
||||
initTableMetadata();
|
||||
initializeFileGroups(dataMetaClient, MetadataPartitionType.FILES, createInstantTime, 1);
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.metadata;
|
||||
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.hudi.common.config.TypedProperties;
|
||||
import org.apache.hudi.keygen.BaseKeyGenerator;
|
||||
import org.apache.hudi.keygen.KeyGenUtils;
|
||||
|
||||
/**
|
||||
* Custom key generator for the Hoodie table metadata. The metadata table record payload
|
||||
* has an internal schema with a known key field HoodieMetadataPayload.SCHEMA_FIELD_ID_KEY.
|
||||
* With or without the virtual keys, getting the key from the metadata table record is always
|
||||
* via the above field and there is no real need for a key generator. But, when a write
|
||||
* client is instantiated for the metadata table, when virtual keys are enabled, and when
|
||||
* key generator class is not configured, the default SimpleKeyGenerator will be used.
|
||||
* To avoid using any other key generators for the metadata table which rely on certain
|
||||
* config properties, we need this custom key generator exclusively for the metadata table.
|
||||
*/
|
||||
public class HoodieTableMetadataKeyGenerator extends BaseKeyGenerator {
|
||||
|
||||
public HoodieTableMetadataKeyGenerator(TypedProperties config) {
|
||||
super(config);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getRecordKey(GenericRecord record) {
|
||||
return KeyGenUtils.getRecordKey(record, HoodieMetadataPayload.SCHEMA_FIELD_ID_KEY);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getPartitionPath(GenericRecord record) {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
@@ -346,7 +346,8 @@ public class HoodieTimelineArchiveLog<T extends HoodieAvroPayload, I, K, O> {
|
||||
if (records.size() > 0) {
|
||||
Map<HeaderMetadataType, String> header = new HashMap<>();
|
||||
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, wrapperSchema.toString());
|
||||
HoodieAvroDataBlock block = new HoodieAvroDataBlock(records, header);
|
||||
final String keyField = table.getMetaClient().getTableConfig().getRecordKeyFieldProp();
|
||||
HoodieAvroDataBlock block = new HoodieAvroDataBlock(records, header, keyField);
|
||||
writer.appendBlock(block);
|
||||
records.clear();
|
||||
}
|
||||
|
||||
@@ -180,6 +180,7 @@ public abstract class HoodieCompactor<T extends HoodieRecordPayload, I, K, O> im
|
||||
.withDiskMapType(config.getCommonConfig().getSpillableDiskMapType())
|
||||
.withBitCaskDiskMapCompressionEnabled(config.getCommonConfig().isBitCaskDiskMapCompressionEnabled())
|
||||
.withOperationField(config.allowOperationMetadataField())
|
||||
.withPartition(operation.getPartitionPath())
|
||||
.build();
|
||||
if (!scanner.iterator().hasNext()) {
|
||||
scanner.close();
|
||||
|
||||
Reference in New Issue
Block a user