1
0

[HUDI-2593] Virtual keys support for metadata table (#3968)

- Metadata table today has virtual keys disabled, thereby populating the metafields
  for each record written out and increasing the overall storage space used. Hereby
  adding virtual keys support for metadata table so that metafields are disabled
  for metadata table records.

- Adding a custom KeyGenerator for Metadata table so as to not rely on the
  default Base/SimpleKeyGenerators which currently look for record key
  and partition field set in the table config.

- AbstractHoodieLogRecordReader's version of processing next data block and
  createHoodieRecord() will be a generic version and making the derived class
  HoodieMetadataMergedLogRecordReader take care of the special creation of
  records from explictly passed in partition names.
This commit is contained in:
Manoj Govindassamy
2021-11-19 15:11:29 -08:00
committed by GitHub
parent eba354e922
commit 459b34240b
28 changed files with 423 additions and 123 deletions

View File

@@ -203,6 +203,7 @@ public abstract class MultipleSparkJobExecutionStrategy<T extends HoodieRecordPa
.withReverseReader(config.getCompactionReverseLogReadEnabled())
.withBufferSize(config.getMaxDFSStreamBufferSize())
.withSpillableMapBasePath(config.getSpillableMapBasePath())
.withPartition(clusteringOp.getPartitionPath())
.build();
Option<HoodieFileReader> baseFileReader = StringUtils.isNullOrEmpty(clusteringOp.getDataFilePath())

View File

@@ -47,9 +47,9 @@ public class SimpleKeyGenerator extends BuiltinKeyGenerator {
SimpleKeyGenerator(TypedProperties props, String recordKeyField, String partitionPathField) {
super(props);
this.recordKeyFields = recordKeyField == null
? Collections.emptyList()
: Collections.singletonList(recordKeyField);
this.partitionPathFields = Collections.singletonList(partitionPathField);
? Collections.emptyList() : Collections.singletonList(recordKeyField);
this.partitionPathFields = partitionPathField == null
? Collections.emptyList() : Collections.singletonList(partitionPathField);
simpleAvroKeyGenerator = new SimpleAvroKeyGenerator(props, recordKeyField, partitionPathField);
}

View File

@@ -88,6 +88,7 @@ import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.EnumSource;
import org.junit.jupiter.params.provider.MethodSource;
import org.junit.jupiter.params.provider.ValueSource;
import java.io.IOException;
import java.nio.file.Files;
@@ -358,8 +359,9 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
* Test that manual rollbacks work correctly and enough timeline history is maintained on the metadata table
* timeline.
*/
@Test
public void testManualRollbacks() throws Exception {
@ParameterizedTest
@ValueSource(booleans = {true, false})
public void testManualRollbacks(final boolean populateMateFields) throws Exception {
HoodieTableType tableType = COPY_ON_WRITE;
init(tableType, false);
// Setting to archive more aggressively on the Metadata Table than the Dataset
@@ -369,7 +371,9 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
writeConfig = getWriteConfigBuilder(true, true, false)
.withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(true)
.archiveCommitsWith(minArchiveCommitsMetadata, minArchiveCommitsMetadata + 1).retainCommits(1)
.withMaxNumDeltaCommitsBeforeCompaction(maxDeltaCommitsBeforeCompaction).build())
.withMaxNumDeltaCommitsBeforeCompaction(maxDeltaCommitsBeforeCompaction)
.withPopulateMetaFields(populateMateFields)
.build())
.withCompactionConfig(HoodieCompactionConfig.newBuilder().archiveCommitsWith(minArchiveCommitsDataset, minArchiveCommitsDataset + 1)
.retainCommits(1).retainFileVersions(1).withAutoClean(false).withAsyncClean(true).build())
.build();

View File

@@ -22,6 +22,7 @@ import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.table.view.TableFileSystemView;
import org.apache.hudi.common.testutils.HoodieTestTable;
import org.apache.hudi.metadata.HoodieBackedTableMetadata;
import org.apache.hudi.metadata.HoodieTableMetadataKeyGenerator;
import org.apache.hudi.table.HoodieSparkTable;
import org.apache.hudi.table.HoodieTable;
@@ -29,6 +30,8 @@ import org.apache.hadoop.fs.FileStatus;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.EnumSource;
import java.io.IOException;
import java.util.ArrayList;
@@ -90,4 +93,20 @@ public class TestHoodieBackedTableMetadata extends TestHoodieMetadataBase {
});
}
/**
* Verify if the Metadata table is constructed with table properties including
* the right key generator class name.
*/
@ParameterizedTest
@EnumSource(HoodieTableType.class)
public void testMetadataTableKeyGenerator(final HoodieTableType tableType) throws Exception {
init(tableType);
HoodieBackedTableMetadata tableMetadata = new HoodieBackedTableMetadata(context,
writeConfig.getMetadataConfig(), writeConfig.getBasePath(), writeConfig.getSpillableMapBasePath(), false);
assertEquals(HoodieTableMetadataKeyGenerator.class.getCanonicalName(),
tableMetadata.getMetadataMetaClient().getTableConfig().getKeyGeneratorClassName());
}
}

View File

@@ -288,7 +288,9 @@ public class TestHoodieMetadataBase extends HoodieClientTestHarness {
.withMetadataConfig(HoodieMetadataConfig.newBuilder()
.enable(useFileListingMetadata)
.enableFullScan(enableFullScan)
.enableMetrics(enableMetrics).build())
.enableMetrics(enableMetrics)
.withPopulateMetaFields(false)
.build())
.withMetricsConfig(HoodieMetricsConfig.newBuilder().on(enableMetrics)
.withExecutorMetrics(true).build())
.withMetricsGraphiteConfig(HoodieMetricsGraphiteConfig.newBuilder()

View File

@@ -283,13 +283,26 @@ public abstract class HoodieClientTestHarness extends HoodieCommonTestHarness im
return properties;
}
protected void addConfigsForPopulateMetaFields(HoodieWriteConfig.Builder configBuilder, boolean populateMetaFields) {
protected Properties getPropertiesForMetadataTable() {
Properties properties = new Properties();
properties.put(HoodieTableConfig.POPULATE_META_FIELDS.key(), "false");
properties.put("hoodie.datasource.write.recordkey.field", "key");
properties.put(HoodieTableConfig.RECORDKEY_FIELDS.key(), "key");
return properties;
}
protected void addConfigsForPopulateMetaFields(HoodieWriteConfig.Builder configBuilder, boolean populateMetaFields,
boolean isMetadataTable) {
if (!populateMetaFields) {
configBuilder.withProperties(getPropertiesForKeyGen())
configBuilder.withProperties((isMetadataTable ? getPropertiesForMetadataTable() : getPropertiesForKeyGen()))
.withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.SIMPLE).build());
}
}
protected void addConfigsForPopulateMetaFields(HoodieWriteConfig.Builder configBuilder, boolean populateMetaFields) {
addConfigsForPopulateMetaFields(configBuilder, populateMetaFields, false);
}
/**
* Cleanups hoodie clients.
*/