[HUDI-2593] Virtual keys support for metadata table (#3968)
- Metadata table today has virtual keys disabled, thereby populating the metafields for each record written out and increasing the overall storage space used. Hereby adding virtual keys support for metadata table so that metafields are disabled for metadata table records. - Adding a custom KeyGenerator for Metadata table so as to not rely on the default Base/SimpleKeyGenerators which currently look for record key and partition field set in the table config. - AbstractHoodieLogRecordReader's version of processing next data block and createHoodieRecord() will be a generic version and making the derived class HoodieMetadataMergedLogRecordReader take care of the special creation of records from explictly passed in partition names.
This commit is contained in:
committed by
GitHub
parent
eba354e922
commit
459b34240b
@@ -203,6 +203,7 @@ public abstract class MultipleSparkJobExecutionStrategy<T extends HoodieRecordPa
|
||||
.withReverseReader(config.getCompactionReverseLogReadEnabled())
|
||||
.withBufferSize(config.getMaxDFSStreamBufferSize())
|
||||
.withSpillableMapBasePath(config.getSpillableMapBasePath())
|
||||
.withPartition(clusteringOp.getPartitionPath())
|
||||
.build();
|
||||
|
||||
Option<HoodieFileReader> baseFileReader = StringUtils.isNullOrEmpty(clusteringOp.getDataFilePath())
|
||||
|
||||
@@ -47,9 +47,9 @@ public class SimpleKeyGenerator extends BuiltinKeyGenerator {
|
||||
SimpleKeyGenerator(TypedProperties props, String recordKeyField, String partitionPathField) {
|
||||
super(props);
|
||||
this.recordKeyFields = recordKeyField == null
|
||||
? Collections.emptyList()
|
||||
: Collections.singletonList(recordKeyField);
|
||||
this.partitionPathFields = Collections.singletonList(partitionPathField);
|
||||
? Collections.emptyList() : Collections.singletonList(recordKeyField);
|
||||
this.partitionPathFields = partitionPathField == null
|
||||
? Collections.emptyList() : Collections.singletonList(partitionPathField);
|
||||
simpleAvroKeyGenerator = new SimpleAvroKeyGenerator(props, recordKeyField, partitionPathField);
|
||||
}
|
||||
|
||||
|
||||
@@ -88,6 +88,7 @@ import org.junit.jupiter.params.ParameterizedTest;
|
||||
import org.junit.jupiter.params.provider.Arguments;
|
||||
import org.junit.jupiter.params.provider.EnumSource;
|
||||
import org.junit.jupiter.params.provider.MethodSource;
|
||||
import org.junit.jupiter.params.provider.ValueSource;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
@@ -358,8 +359,9 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
|
||||
* Test that manual rollbacks work correctly and enough timeline history is maintained on the metadata table
|
||||
* timeline.
|
||||
*/
|
||||
@Test
|
||||
public void testManualRollbacks() throws Exception {
|
||||
@ParameterizedTest
|
||||
@ValueSource(booleans = {true, false})
|
||||
public void testManualRollbacks(final boolean populateMateFields) throws Exception {
|
||||
HoodieTableType tableType = COPY_ON_WRITE;
|
||||
init(tableType, false);
|
||||
// Setting to archive more aggressively on the Metadata Table than the Dataset
|
||||
@@ -369,7 +371,9 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
|
||||
writeConfig = getWriteConfigBuilder(true, true, false)
|
||||
.withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(true)
|
||||
.archiveCommitsWith(minArchiveCommitsMetadata, minArchiveCommitsMetadata + 1).retainCommits(1)
|
||||
.withMaxNumDeltaCommitsBeforeCompaction(maxDeltaCommitsBeforeCompaction).build())
|
||||
.withMaxNumDeltaCommitsBeforeCompaction(maxDeltaCommitsBeforeCompaction)
|
||||
.withPopulateMetaFields(populateMateFields)
|
||||
.build())
|
||||
.withCompactionConfig(HoodieCompactionConfig.newBuilder().archiveCommitsWith(minArchiveCommitsDataset, minArchiveCommitsDataset + 1)
|
||||
.retainCommits(1).retainFileVersions(1).withAutoClean(false).withAsyncClean(true).build())
|
||||
.build();
|
||||
|
||||
@@ -22,6 +22,7 @@ import org.apache.hudi.common.model.HoodieTableType;
|
||||
import org.apache.hudi.common.table.view.TableFileSystemView;
|
||||
import org.apache.hudi.common.testutils.HoodieTestTable;
|
||||
import org.apache.hudi.metadata.HoodieBackedTableMetadata;
|
||||
import org.apache.hudi.metadata.HoodieTableMetadataKeyGenerator;
|
||||
import org.apache.hudi.table.HoodieSparkTable;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
|
||||
@@ -29,6 +30,8 @@ import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.params.ParameterizedTest;
|
||||
import org.junit.jupiter.params.provider.EnumSource;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
@@ -90,4 +93,20 @@ public class TestHoodieBackedTableMetadata extends TestHoodieMetadataBase {
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Verify if the Metadata table is constructed with table properties including
|
||||
* the right key generator class name.
|
||||
*/
|
||||
@ParameterizedTest
|
||||
@EnumSource(HoodieTableType.class)
|
||||
public void testMetadataTableKeyGenerator(final HoodieTableType tableType) throws Exception {
|
||||
init(tableType);
|
||||
|
||||
HoodieBackedTableMetadata tableMetadata = new HoodieBackedTableMetadata(context,
|
||||
writeConfig.getMetadataConfig(), writeConfig.getBasePath(), writeConfig.getSpillableMapBasePath(), false);
|
||||
|
||||
assertEquals(HoodieTableMetadataKeyGenerator.class.getCanonicalName(),
|
||||
tableMetadata.getMetadataMetaClient().getTableConfig().getKeyGeneratorClassName());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -288,7 +288,9 @@ public class TestHoodieMetadataBase extends HoodieClientTestHarness {
|
||||
.withMetadataConfig(HoodieMetadataConfig.newBuilder()
|
||||
.enable(useFileListingMetadata)
|
||||
.enableFullScan(enableFullScan)
|
||||
.enableMetrics(enableMetrics).build())
|
||||
.enableMetrics(enableMetrics)
|
||||
.withPopulateMetaFields(false)
|
||||
.build())
|
||||
.withMetricsConfig(HoodieMetricsConfig.newBuilder().on(enableMetrics)
|
||||
.withExecutorMetrics(true).build())
|
||||
.withMetricsGraphiteConfig(HoodieMetricsGraphiteConfig.newBuilder()
|
||||
|
||||
@@ -283,13 +283,26 @@ public abstract class HoodieClientTestHarness extends HoodieCommonTestHarness im
|
||||
return properties;
|
||||
}
|
||||
|
||||
protected void addConfigsForPopulateMetaFields(HoodieWriteConfig.Builder configBuilder, boolean populateMetaFields) {
|
||||
protected Properties getPropertiesForMetadataTable() {
|
||||
Properties properties = new Properties();
|
||||
properties.put(HoodieTableConfig.POPULATE_META_FIELDS.key(), "false");
|
||||
properties.put("hoodie.datasource.write.recordkey.field", "key");
|
||||
properties.put(HoodieTableConfig.RECORDKEY_FIELDS.key(), "key");
|
||||
return properties;
|
||||
}
|
||||
|
||||
protected void addConfigsForPopulateMetaFields(HoodieWriteConfig.Builder configBuilder, boolean populateMetaFields,
|
||||
boolean isMetadataTable) {
|
||||
if (!populateMetaFields) {
|
||||
configBuilder.withProperties(getPropertiesForKeyGen())
|
||||
configBuilder.withProperties((isMetadataTable ? getPropertiesForMetadataTable() : getPropertiesForKeyGen()))
|
||||
.withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.SIMPLE).build());
|
||||
}
|
||||
}
|
||||
|
||||
protected void addConfigsForPopulateMetaFields(HoodieWriteConfig.Builder configBuilder, boolean populateMetaFields) {
|
||||
addConfigsForPopulateMetaFields(configBuilder, populateMetaFields, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleanups hoodie clients.
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user