1
0

[HUDI-3544] Fixing "populate meta fields" update to metadata table (#4941)

* Fixing populateMeta fields update to metadata table

* Fix checkstyle violations

Co-authored-by: Sagar Sumit <sagarsumit09@gmail.com>
This commit is contained in:
Sivabalan Narayanan
2022-03-03 06:32:25 -05:00
committed by GitHub
parent 51ee5005a6
commit 876a891979
2 changed files with 53 additions and 14 deletions

View File

@@ -18,7 +18,6 @@
package org.apache.hudi.metadata;
import org.apache.avro.specific.SpecificRecordBase;
import org.apache.hudi.avro.model.HoodieCleanMetadata;
import org.apache.hudi.avro.model.HoodieInstantInfo;
import org.apache.hudi.avro.model.HoodieMetadataRecord;
@@ -57,13 +56,14 @@ import org.apache.hudi.common.util.HoodieTimer;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.ValidationUtils;
import org.apache.hudi.config.HoodieCompactionConfig;
import org.apache.hudi.config.metrics.HoodieMetricsConfig;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.config.metrics.HoodieMetricsConfig;
import org.apache.hudi.config.metrics.HoodieMetricsGraphiteConfig;
import org.apache.hudi.config.metrics.HoodieMetricsJmxConfig;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieMetadataException;
import org.apache.avro.specific.SpecificRecordBase;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@@ -367,8 +367,14 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
// If the un-synced instants have been archived, then
// the metadata table will need to be bootstrapped again.
if (exists) {
final HoodieTableMetaClient metadataMetaClient = HoodieTableMetaClient.builder().setConf(hadoopConf.get())
HoodieTableMetaClient metadataMetaClient = HoodieTableMetaClient.builder().setConf(hadoopConf.get())
.setBasePath(metadataWriteConfig.getBasePath()).build();
if (dataWriteConfig.getMetadataConfig().populateMetaFields() != metadataMetaClient.getTableConfig().populateMetaFields()) {
LOG.info("Re-initiating metadata table properties since populate meta fields have changed");
metadataMetaClient = initializeMetaClient(dataWriteConfig.getMetadataConfig().populateMetaFields());
}
final Option<HoodieInstant> latestMetadataInstant =
metadataMetaClient.getActiveTimeline().filterCompletedInstants().lastInstant();
@@ -504,17 +510,7 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
.getReverseOrderedInstants().findFirst().map(HoodieInstant::getTimestamp).orElse(SOLO_COMMIT_TIMESTAMP);
LOG.info("Creating a new metadata table in " + metadataWriteConfig.getBasePath() + " at instant " + createInstantTime);
HoodieTableMetaClient.withPropertyBuilder()
.setTableType(HoodieTableType.MERGE_ON_READ)
.setTableName(tableName)
.setArchiveLogFolder(ARCHIVELOG_FOLDER.defaultValue())
.setPayloadClassName(HoodieMetadataPayload.class.getName())
.setBaseFileFormat(HoodieFileFormat.HFILE.toString())
.setRecordKeyFields(RECORD_KEY_FIELD_NAME)
.setPopulateMetaFields(dataWriteConfig.getMetadataConfig().populateMetaFields())
.setKeyGeneratorClassProp(HoodieTableMetadataKeyGenerator.class.getCanonicalName())
.initTable(hadoopConf.get(), metadataWriteConfig.getBasePath());
initializeMetaClient(dataWriteConfig.getMetadataConfig().populateMetaFields());
initTableMetadata();
initializeEnabledFileGroups(dataMetaClient, createInstantTime);
@@ -530,6 +526,19 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
return true;
}
private HoodieTableMetaClient initializeMetaClient(boolean populatMetaFields) throws IOException {
return HoodieTableMetaClient.withPropertyBuilder()
.setTableType(HoodieTableType.MERGE_ON_READ)
.setTableName(tableName)
.setArchiveLogFolder(ARCHIVELOG_FOLDER.defaultValue())
.setPayloadClassName(HoodieMetadataPayload.class.getName())
.setBaseFileFormat(HoodieFileFormat.HFILE.toString())
.setRecordKeyFields(RECORD_KEY_FIELD_NAME)
.setPopulateMetaFields(populatMetaFields)
.setKeyGeneratorClassProp(HoodieTableMetadataKeyGenerator.class.getCanonicalName())
.initTable(hadoopConf.get(), metadataWriteConfig.getBasePath());
}
/**
* Function to find hoodie partitions and list files in them in parallel.
*

View File

@@ -328,6 +328,36 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
validateMetadata(testTable, emptyList(), true);
}
@Test
public void testUpdationOfPopulateMetaFieldsForMetadataTable() throws Exception {
tableType = COPY_ON_WRITE;
init(tableType, false);
writeConfig = getWriteConfigBuilder(true, true, false)
.withMetadataConfig(HoodieMetadataConfig.newBuilder()
.enable(true)
.withPopulateMetaFields(true)
.build())
.build();
initWriteConfigAndMetatableWriter(writeConfig, true);
doWriteOperation(testTable, "0000001", INSERT);
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setBasePath(writeConfig.getBasePath() + "/.hoodie/metadata").setConf(hadoopConf).build();
assertTrue(metaClient.getTableConfig().populateMetaFields());
// update populateMeta fields to false.
writeConfig = getWriteConfigBuilder(true, true, false)
.withMetadataConfig(HoodieMetadataConfig.newBuilder()
.enable(true)
.withPopulateMetaFields(false)
.build())
.build();
initWriteConfigAndMetatableWriter(writeConfig, true);
doWriteOperation(testTable, "0000002", INSERT);
metaClient = HoodieTableMetaClient.builder().setBasePath(writeConfig.getBasePath() + "/.hoodie/metadata").setConf(hadoopConf).build();
assertFalse(metaClient.getTableConfig().populateMetaFields());
}
@Test
public void testMetadataInsertUpsertCleanNonPartitioned() throws Exception {
HoodieTableType tableType = COPY_ON_WRITE;