1
0

[HUDI-3544] Fixing "populate meta fields" update to metadata table (#4941)

* Fixing populateMeta fields update to metadata table

* Fix checkstyle violations

Co-authored-by: Sagar Sumit <sagarsumit09@gmail.com>
This commit is contained in:
Sivabalan Narayanan
2022-03-03 06:32:25 -05:00
committed by GitHub
parent 51ee5005a6
commit 876a891979
2 changed files with 53 additions and 14 deletions

View File

@@ -18,7 +18,6 @@
package org.apache.hudi.metadata; package org.apache.hudi.metadata;
import org.apache.avro.specific.SpecificRecordBase;
import org.apache.hudi.avro.model.HoodieCleanMetadata; import org.apache.hudi.avro.model.HoodieCleanMetadata;
import org.apache.hudi.avro.model.HoodieInstantInfo; import org.apache.hudi.avro.model.HoodieInstantInfo;
import org.apache.hudi.avro.model.HoodieMetadataRecord; import org.apache.hudi.avro.model.HoodieMetadataRecord;
@@ -57,13 +56,14 @@ import org.apache.hudi.common.util.HoodieTimer;
import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.ValidationUtils; import org.apache.hudi.common.util.ValidationUtils;
import org.apache.hudi.config.HoodieCompactionConfig; import org.apache.hudi.config.HoodieCompactionConfig;
import org.apache.hudi.config.metrics.HoodieMetricsConfig;
import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.config.metrics.HoodieMetricsConfig;
import org.apache.hudi.config.metrics.HoodieMetricsGraphiteConfig; import org.apache.hudi.config.metrics.HoodieMetricsGraphiteConfig;
import org.apache.hudi.config.metrics.HoodieMetricsJmxConfig; import org.apache.hudi.config.metrics.HoodieMetricsJmxConfig;
import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieMetadataException; import org.apache.hudi.exception.HoodieMetadataException;
import org.apache.avro.specific.SpecificRecordBase;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
@@ -367,8 +367,14 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
// If the un-synced instants have been archived, then // If the un-synced instants have been archived, then
// the metadata table will need to be bootstrapped again. // the metadata table will need to be bootstrapped again.
if (exists) { if (exists) {
final HoodieTableMetaClient metadataMetaClient = HoodieTableMetaClient.builder().setConf(hadoopConf.get()) HoodieTableMetaClient metadataMetaClient = HoodieTableMetaClient.builder().setConf(hadoopConf.get())
.setBasePath(metadataWriteConfig.getBasePath()).build(); .setBasePath(metadataWriteConfig.getBasePath()).build();
if (dataWriteConfig.getMetadataConfig().populateMetaFields() != metadataMetaClient.getTableConfig().populateMetaFields()) {
LOG.info("Re-initiating metadata table properties since populate meta fields have changed");
metadataMetaClient = initializeMetaClient(dataWriteConfig.getMetadataConfig().populateMetaFields());
}
final Option<HoodieInstant> latestMetadataInstant = final Option<HoodieInstant> latestMetadataInstant =
metadataMetaClient.getActiveTimeline().filterCompletedInstants().lastInstant(); metadataMetaClient.getActiveTimeline().filterCompletedInstants().lastInstant();
@@ -504,17 +510,7 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
.getReverseOrderedInstants().findFirst().map(HoodieInstant::getTimestamp).orElse(SOLO_COMMIT_TIMESTAMP); .getReverseOrderedInstants().findFirst().map(HoodieInstant::getTimestamp).orElse(SOLO_COMMIT_TIMESTAMP);
LOG.info("Creating a new metadata table in " + metadataWriteConfig.getBasePath() + " at instant " + createInstantTime); LOG.info("Creating a new metadata table in " + metadataWriteConfig.getBasePath() + " at instant " + createInstantTime);
HoodieTableMetaClient.withPropertyBuilder() initializeMetaClient(dataWriteConfig.getMetadataConfig().populateMetaFields());
.setTableType(HoodieTableType.MERGE_ON_READ)
.setTableName(tableName)
.setArchiveLogFolder(ARCHIVELOG_FOLDER.defaultValue())
.setPayloadClassName(HoodieMetadataPayload.class.getName())
.setBaseFileFormat(HoodieFileFormat.HFILE.toString())
.setRecordKeyFields(RECORD_KEY_FIELD_NAME)
.setPopulateMetaFields(dataWriteConfig.getMetadataConfig().populateMetaFields())
.setKeyGeneratorClassProp(HoodieTableMetadataKeyGenerator.class.getCanonicalName())
.initTable(hadoopConf.get(), metadataWriteConfig.getBasePath());
initTableMetadata(); initTableMetadata();
initializeEnabledFileGroups(dataMetaClient, createInstantTime); initializeEnabledFileGroups(dataMetaClient, createInstantTime);
@@ -530,6 +526,19 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
return true; return true;
} }
private HoodieTableMetaClient initializeMetaClient(boolean populatMetaFields) throws IOException {
return HoodieTableMetaClient.withPropertyBuilder()
.setTableType(HoodieTableType.MERGE_ON_READ)
.setTableName(tableName)
.setArchiveLogFolder(ARCHIVELOG_FOLDER.defaultValue())
.setPayloadClassName(HoodieMetadataPayload.class.getName())
.setBaseFileFormat(HoodieFileFormat.HFILE.toString())
.setRecordKeyFields(RECORD_KEY_FIELD_NAME)
.setPopulateMetaFields(populatMetaFields)
.setKeyGeneratorClassProp(HoodieTableMetadataKeyGenerator.class.getCanonicalName())
.initTable(hadoopConf.get(), metadataWriteConfig.getBasePath());
}
/** /**
* Function to find hoodie partitions and list files in them in parallel. * Function to find hoodie partitions and list files in them in parallel.
* *

View File

@@ -328,6 +328,36 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
validateMetadata(testTable, emptyList(), true); validateMetadata(testTable, emptyList(), true);
} }
@Test
public void testUpdationOfPopulateMetaFieldsForMetadataTable() throws Exception {
tableType = COPY_ON_WRITE;
init(tableType, false);
writeConfig = getWriteConfigBuilder(true, true, false)
.withMetadataConfig(HoodieMetadataConfig.newBuilder()
.enable(true)
.withPopulateMetaFields(true)
.build())
.build();
initWriteConfigAndMetatableWriter(writeConfig, true);
doWriteOperation(testTable, "0000001", INSERT);
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setBasePath(writeConfig.getBasePath() + "/.hoodie/metadata").setConf(hadoopConf).build();
assertTrue(metaClient.getTableConfig().populateMetaFields());
// update populateMeta fields to false.
writeConfig = getWriteConfigBuilder(true, true, false)
.withMetadataConfig(HoodieMetadataConfig.newBuilder()
.enable(true)
.withPopulateMetaFields(false)
.build())
.build();
initWriteConfigAndMetatableWriter(writeConfig, true);
doWriteOperation(testTable, "0000002", INSERT);
metaClient = HoodieTableMetaClient.builder().setBasePath(writeConfig.getBasePath() + "/.hoodie/metadata").setConf(hadoopConf).build();
assertFalse(metaClient.getTableConfig().populateMetaFields());
}
@Test @Test
public void testMetadataInsertUpsertCleanNonPartitioned() throws Exception { public void testMetadataInsertUpsertCleanNonPartitioned() throws Exception {
HoodieTableType tableType = COPY_ON_WRITE; HoodieTableType tableType = COPY_ON_WRITE;