[HUDI-1292] Created a config to enable/disable syncing of metadata table. (#3427)
* [HUDI-1292] Created a config to enable/disable syncing of metadata table. - Metadata Table should only be synced from a single pipeline to prevent conflicts. - Skip syncing metadata table for clustering and compaction - Renamed useFileListingMetadata Co-authored-by: Vinoth Chandar <vinoth@apache.org>
This commit is contained in:
@@ -447,6 +447,11 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
|
||||
|
||||
@Override
|
||||
public void syncTableMetadata() {
|
||||
if (!config.getMetadataConfig().enableSync()) {
|
||||
LOG.info("Metadata table sync is disabled in the config.");
|
||||
return;
|
||||
}
|
||||
|
||||
// Open up the metadata table again, for syncing
|
||||
try (HoodieTableMetadataWriter writer = SparkHoodieBackedTableMetadataWriter.create(hadoopConf, config, context)) {
|
||||
LOG.info("Successfully synced to metadata table");
|
||||
|
||||
@@ -194,6 +194,54 @@ public class TestHoodieBackedMetadata extends HoodieClientTestHarness {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test enable/disable sync via the config.
|
||||
*/
|
||||
@Test
|
||||
public void testSyncConfig() throws Exception {
|
||||
init(HoodieTableType.COPY_ON_WRITE);
|
||||
HoodieSparkEngineContext engineContext = new HoodieSparkEngineContext(jsc);
|
||||
|
||||
// Create the metadata table
|
||||
String firstCommitTime = HoodieActiveTimeline.createNewInstantTime();
|
||||
try (SparkRDDWriteClient client = new SparkRDDWriteClient(engineContext, getWriteConfig(true, true), true)) {
|
||||
client.startCommitWithTime(firstCommitTime);
|
||||
client.insert(jsc.parallelize(dataGen.generateInserts(firstCommitTime, 2)), firstCommitTime);
|
||||
client.syncTableMetadata();
|
||||
assertTrue(fs.exists(new Path(metadataTableBasePath)));
|
||||
validateMetadata(client);
|
||||
}
|
||||
|
||||
// If sync is disabled, the table will not sync
|
||||
HoodieWriteConfig config = getWriteConfigBuilder(true, true, false)
|
||||
.withMetadataConfig(HoodieMetadataConfig.newBuilder()
|
||||
.enable(true).enableMetrics(false).enableSync(false).build()).build();
|
||||
final String metadataTableMetaPath = metadataTableBasePath + Path.SEPARATOR + HoodieTableMetaClient.METAFOLDER_NAME;
|
||||
String secondCommitTime = HoodieActiveTimeline.createNewInstantTime();
|
||||
try (SparkRDDWriteClient client = new SparkRDDWriteClient(engineContext, config, true)) {
|
||||
client.startCommitWithTime(secondCommitTime);
|
||||
client.insert(jsc.parallelize(dataGen.generateInserts(secondCommitTime, 2)), secondCommitTime);
|
||||
client.syncTableMetadata();
|
||||
|
||||
// Metadata Table should not have synced
|
||||
assertTrue(fs.exists(new Path(metadataTableMetaPath, HoodieTimeline.makeDeltaFileName(firstCommitTime))));
|
||||
assertFalse(fs.exists(new Path(metadataTableMetaPath, HoodieTimeline.makeDeltaFileName(secondCommitTime))));
|
||||
}
|
||||
|
||||
// If sync is enabled, the table will sync
|
||||
String thirdCommitTime = HoodieActiveTimeline.createNewInstantTime();
|
||||
try (SparkRDDWriteClient client = new SparkRDDWriteClient(engineContext, getWriteConfig(true, true), true)) {
|
||||
client.startCommitWithTime(thirdCommitTime);
|
||||
client.insert(jsc.parallelize(dataGen.generateInserts(thirdCommitTime, 2)), thirdCommitTime);
|
||||
client.syncTableMetadata();
|
||||
|
||||
// Metadata Table should have synced
|
||||
assertTrue(fs.exists(new Path(metadataTableMetaPath, HoodieTimeline.makeDeltaFileName(firstCommitTime))));
|
||||
assertTrue(fs.exists(new Path(metadataTableMetaPath, HoodieTimeline.makeDeltaFileName(secondCommitTime))));
|
||||
assertTrue(fs.exists(new Path(metadataTableMetaPath, HoodieTimeline.makeDeltaFileName(thirdCommitTime))));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Only valid partition directories are added to the metadata.
|
||||
*/
|
||||
@@ -932,7 +980,7 @@ public class TestHoodieBackedMetadata extends HoodieClientTestHarness {
|
||||
|
||||
HoodieTableMetadata tableMetadata = metadata(client);
|
||||
assertNotNull(tableMetadata, "MetadataReader should have been initialized");
|
||||
if (!config.useFileListingMetadata()) {
|
||||
if (!config.isMetadataTableEnabled()) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1033,7 +1081,7 @@ public class TestHoodieBackedMetadata extends HoodieClientTestHarness {
|
||||
|
||||
// Validate write config for metadata table
|
||||
HoodieWriteConfig metadataWriteConfig = metadataWriter.getWriteConfig();
|
||||
assertFalse(metadataWriteConfig.useFileListingMetadata(), "No metadata table for metadata table");
|
||||
assertFalse(metadataWriteConfig.isMetadataTableEnabled(), "No metadata table for metadata table");
|
||||
assertFalse(metadataWriteConfig.getFileListingMetadataVerify(), "No verify for metadata table");
|
||||
|
||||
// Metadata table should be in sync with the dataset
|
||||
|
||||
Reference in New Issue
Block a user