From 23c9c5c2962e1ac053cb4395fa148b9015272990 Mon Sep 17 00:00:00 2001 From: xi chaomin <36392121+xicm@users.noreply.github.com> Date: Tue, 5 Jul 2022 22:50:17 +0800 Subject: [PATCH] [HUDI-3836] Improve the way of fetching metadata partitions from table (#5286) Co-authored-by: xicm --- .../hudi/index/bloom/HoodieBloomIndex.java | 3 +- .../apache/hudi/io/HoodieKeyLookupHandle.java | 3 +- .../HoodieBackedTableMetadataWriter.java | 7 ++-- .../org/apache/hudi/table/HoodieTable.java | 5 ++- .../action/index/RunIndexActionExecutor.java | 5 ++- .../bloom/SparkHoodieBloomIndexHelper.java | 3 +- .../functional/TestHoodieBackedMetadata.java | 27 ++++++++------- .../client/functional/TestHoodieIndex.java | 3 +- .../hudi/common/table/HoodieTableConfig.java | 10 +++--- .../metadata/HoodieTableMetadataUtil.java | 6 +--- .../org/apache/hudi/HoodieFileIndex.scala | 2 +- .../apache/hudi/utilities/HoodieIndexer.java | 5 ++- .../hudi/utilities/TestHoodieIndexer.java | 33 +++++++++---------- 13 files changed, 49 insertions(+), 63 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndex.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndex.java index 6545c642c..aeefe6c30 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndex.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndex.java @@ -56,7 +56,6 @@ import static java.util.stream.Collectors.toList; import static org.apache.hudi.common.util.CollectionUtils.isNullOrEmpty; import static org.apache.hudi.index.HoodieIndexUtils.getLatestBaseFilesForAllPartitions; import static org.apache.hudi.metadata.HoodieMetadataPayload.unwrapStatisticValueWrapper; -import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getCompletedMetadataPartitions; import static org.apache.hudi.metadata.MetadataPartitionType.COLUMN_STATS; /** @@ -143,7 +142,7 @@ public class HoodieBloomIndex extends HoodieIndex { if (config.getBloomIndexPruneByRanges()) { // load column ranges from metadata index if column stats index is enabled and column_stats metadata partition is available if (config.getBloomIndexUseMetadata() - && getCompletedMetadataPartitions(hoodieTable.getMetaClient().getTableConfig()).contains(COLUMN_STATS.getPartitionPath())) { + && hoodieTable.getMetaClient().getTableConfig().getMetadataPartitions().contains(COLUMN_STATS.getPartitionPath())) { fileInfoList = loadColumnRangesFromMetaIndex(affectedPartitionPathList, context, hoodieTable); } // fallback to loading column ranges from files diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieKeyLookupHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieKeyLookupHandle.java index 36ee7d967..a38ae7f1f 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieKeyLookupHandle.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieKeyLookupHandle.java @@ -37,7 +37,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; -import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getCompletedMetadataPartitions; import static org.apache.hudi.metadata.MetadataPartitionType.BLOOM_FILTERS; /** @@ -64,7 +63,7 @@ public class HoodieKeyLookupHandle exten HoodieTimer timer = new HoodieTimer().startTimer(); try { if (config.getBloomIndexUseMetadata() - && getCompletedMetadataPartitions(hoodieTable.getMetaClient().getTableConfig()) + && hoodieTable.getMetaClient().getTableConfig().getMetadataPartitions() .contains(BLOOM_FILTERS.getPartitionPath())) { bloomFilter = hoodieTable.getMetadataTable().getBloomFilter(partitionPathFileIDPair.getLeft(), partitionPathFileIDPair.getRight()) .orElseThrow(() -> new HoodieIndexException("BloomFilter missing for " + partitionPathFileIDPair.getRight())); diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java index f5a96fb67..e36adf6be 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java @@ -96,7 +96,6 @@ import static org.apache.hudi.common.table.timeline.TimelineMetadataUtils.deseri import static org.apache.hudi.common.util.StringUtils.EMPTY_STRING; import static org.apache.hudi.metadata.HoodieTableMetadata.METADATA_TABLE_NAME_SUFFIX; import static org.apache.hudi.metadata.HoodieTableMetadata.SOLO_COMMIT_TIMESTAMP; -import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getCompletedMetadataPartitions; import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getInflightAndCompletedMetadataPartitions; import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getInflightMetadataPartitions; @@ -579,7 +578,7 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta } private void updateInitializedPartitionsInTableConfig(List partitionTypes) { - Set completedPartitions = getCompletedMetadataPartitions(dataMetaClient.getTableConfig()); + Set completedPartitions = dataMetaClient.getTableConfig().getMetadataPartitions(); completedPartitions.addAll(partitionTypes.stream().map(MetadataPartitionType::getPartitionPath).collect(Collectors.toSet())); dataMetaClient.getTableConfig().setValue(HoodieTableConfig.TABLE_METADATA_PARTITIONS.key(), String.join(",", completedPartitions)); HoodieTableConfig.update(dataMetaClient.getFs(), new Path(dataMetaClient.getMetaPath()), dataMetaClient.getTableConfig().getProps()); @@ -716,7 +715,7 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta } public void dropMetadataPartitions(List metadataPartitions) throws IOException { - Set completedIndexes = getCompletedMetadataPartitions(dataMetaClient.getTableConfig()); + Set completedIndexes = dataMetaClient.getTableConfig().getMetadataPartitions(); Set inflightIndexes = getInflightMetadataPartitions(dataMetaClient.getTableConfig()); for (MetadataPartitionType partitionType : metadataPartitions) { @@ -806,7 +805,7 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta private Set getMetadataPartitionsToUpdate() { // fetch partitions to update from table config - Set partitionsToUpdate = getCompletedMetadataPartitions(dataMetaClient.getTableConfig()); + Set partitionsToUpdate = dataMetaClient.getTableConfig().getMetadataPartitions(); // add inflight indexes as well because the file groups have already been initialized, so writers can log updates // NOTE: Async HoodieIndexer can move some partition to inflight. While that partition is still being built, // the regular ingestion writers should not be blocked. They can go ahead and log updates to the metadata partition. diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java index b6541ac66..57e816619 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java @@ -102,7 +102,6 @@ import static org.apache.hudi.common.table.HoodieTableConfig.TABLE_METADATA_PART import static org.apache.hudi.common.util.StringUtils.EMPTY_STRING; import static org.apache.hudi.metadata.HoodieTableMetadataUtil.deleteMetadataPartition; import static org.apache.hudi.metadata.HoodieTableMetadataUtil.deleteMetadataTable; -import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getCompletedMetadataPartitions; import static org.apache.hudi.metadata.HoodieTableMetadataUtil.metadataPartitionExists; /** @@ -900,7 +899,7 @@ public abstract class HoodieTable implem return false; } return metadataIndexDisabled - && getCompletedMetadataPartitions(metaClient.getTableConfig()).contains(partitionType.getPartitionPath()); + && metaClient.getTableConfig().getMetadataPartitions().contains(partitionType.getPartitionPath()); } private boolean shouldExecuteMetadataTableDeletion() { @@ -919,7 +918,7 @@ public abstract class HoodieTable implem * Clears hoodie.table.metadata.partitions in hoodie.properties */ private void clearMetadataTablePartitionsConfig(Option partitionType, boolean clearAll) { - Set partitions = getCompletedMetadataPartitions(metaClient.getTableConfig()); + Set partitions = metaClient.getTableConfig().getMetadataPartitions(); if (clearAll && partitions.size() > 0) { LOG.info("Clear hoodie.table.metadata.partitions in hoodie.properties"); metaClient.getTableConfig().setValue(TABLE_METADATA_PARTITIONS.key(), EMPTY_STRING); diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/index/RunIndexActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/index/RunIndexActionExecutor.java index 182cf9450..96d46928e 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/index/RunIndexActionExecutor.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/index/RunIndexActionExecutor.java @@ -74,7 +74,6 @@ import static org.apache.hudi.common.table.timeline.HoodieTimeline.ROLLBACK_ACTI import static org.apache.hudi.config.HoodieWriteConfig.WRITE_CONCURRENCY_MODE; import static org.apache.hudi.metadata.HoodieTableMetadata.getMetadataTableBasePath; import static org.apache.hudi.metadata.HoodieTableMetadataUtil.deleteMetadataPartition; -import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getCompletedMetadataPartitions; import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getInflightAndCompletedMetadataPartitions; import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getInflightMetadataPartitions; import static org.apache.hudi.metadata.HoodieTableMetadataUtil.metadataPartitionExists; @@ -192,7 +191,7 @@ public class RunIndexActionExecutor exte private void abort(HoodieInstant indexInstant, Set requestedPartitions) { Set inflightPartitions = getInflightMetadataPartitions(table.getMetaClient().getTableConfig()); - Set completedPartitions = getCompletedMetadataPartitions(table.getMetaClient().getTableConfig()); + Set completedPartitions = table.getMetaClient().getTableConfig().getMetadataPartitions(); // update table config requestedPartitions.forEach(partition -> { inflightPartitions.remove(partition); @@ -302,7 +301,7 @@ public class RunIndexActionExecutor exte private void updateMetadataPartitionsTableConfig(HoodieTableMetaClient metaClient, Set metadataPartitions) { // remove from inflight and update completed indexes Set inflightPartitions = getInflightMetadataPartitions(metaClient.getTableConfig()); - Set completedPartitions = getCompletedMetadataPartitions(metaClient.getTableConfig()); + Set completedPartitions = metaClient.getTableConfig().getMetadataPartitions(); inflightPartitions.removeAll(metadataPartitions); completedPartitions.addAll(metadataPartitions); // update table config diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/bloom/SparkHoodieBloomIndexHelper.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/bloom/SparkHoodieBloomIndexHelper.java index c9fb895ad..5736024dc 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/bloom/SparkHoodieBloomIndexHelper.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/bloom/SparkHoodieBloomIndexHelper.java @@ -43,7 +43,6 @@ import java.util.stream.Collectors; import scala.Tuple2; -import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getCompletedMetadataPartitions; import static org.apache.hudi.metadata.MetadataPartitionType.BLOOM_FILTERS; /** @@ -81,7 +80,7 @@ public class SparkHoodieBloomIndexHelper extends BaseHoodieBloomIndexHelper { JavaRDD> keyLookupResultRDD; if (config.getBloomIndexUseMetadata() - && getCompletedMetadataPartitions(hoodieTable.getMetaClient().getTableConfig()) + && hoodieTable.getMetaClient().getTableConfig().getMetadataPartitions() .contains(BLOOM_FILTERS.getPartitionPath())) { // Step 1: Sort by file id JavaRDD> sortedFileIdAndKeyPairs = diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java index 34f470eb1..1a618a01d 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java @@ -149,7 +149,6 @@ import static org.apache.hudi.common.model.WriteOperationType.DELETE; import static org.apache.hudi.common.model.WriteOperationType.INSERT; import static org.apache.hudi.common.model.WriteOperationType.UPSERT; import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA; -import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getCompletedMetadataPartitions; import static org.apache.hudi.metadata.MetadataPartitionType.BLOOM_FILTERS; import static org.apache.hudi.metadata.MetadataPartitionType.COLUMN_STATS; import static org.apache.hudi.metadata.MetadataPartitionType.FILES; @@ -240,9 +239,9 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase { HoodieTableMetaClient.reload(metaClient); HoodieTableConfig tableConfig = metaClient.getTableConfig(); assertFalse(tableConfig.getMetadataPartitions().isEmpty()); - assertTrue(getCompletedMetadataPartitions(tableConfig).contains(FILES.getPartitionPath())); - assertFalse(getCompletedMetadataPartitions(tableConfig).contains(COLUMN_STATS.getPartitionPath())); - assertFalse(getCompletedMetadataPartitions(tableConfig).contains(BLOOM_FILTERS.getPartitionPath())); + assertTrue(tableConfig.getMetadataPartitions().contains(FILES.getPartitionPath())); + assertFalse(tableConfig.getMetadataPartitions().contains(COLUMN_STATS.getPartitionPath())); + assertFalse(tableConfig.getMetadataPartitions().contains(BLOOM_FILTERS.getPartitionPath())); // enable column stats and run 1 upserts HoodieWriteConfig cfgWithColStatsEnabled = HoodieWriteConfig.newBuilder() @@ -265,9 +264,9 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase { HoodieTableMetaClient.reload(metaClient); tableConfig = metaClient.getTableConfig(); assertFalse(tableConfig.getMetadataPartitions().isEmpty()); - assertTrue(getCompletedMetadataPartitions(tableConfig).contains(FILES.getPartitionPath())); - assertTrue(getCompletedMetadataPartitions(tableConfig).contains(COLUMN_STATS.getPartitionPath())); - assertFalse(getCompletedMetadataPartitions(tableConfig).contains(BLOOM_FILTERS.getPartitionPath())); + assertTrue(tableConfig.getMetadataPartitions().contains(FILES.getPartitionPath())); + assertTrue(tableConfig.getMetadataPartitions().contains(COLUMN_STATS.getPartitionPath())); + assertFalse(tableConfig.getMetadataPartitions().contains(BLOOM_FILTERS.getPartitionPath())); // disable column stats and run 1 upsert HoodieWriteConfig cfgWithColStatsDisabled = HoodieWriteConfig.newBuilder() @@ -291,9 +290,9 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase { HoodieTableMetaClient.reload(metaClient); tableConfig = metaClient.getTableConfig(); assertFalse(tableConfig.getMetadataPartitions().isEmpty()); - assertTrue(getCompletedMetadataPartitions(tableConfig).contains(FILES.getPartitionPath())); - assertFalse(getCompletedMetadataPartitions(tableConfig).contains(COLUMN_STATS.getPartitionPath())); - assertFalse(getCompletedMetadataPartitions(tableConfig).contains(BLOOM_FILTERS.getPartitionPath())); + assertTrue(tableConfig.getMetadataPartitions().contains(FILES.getPartitionPath())); + assertFalse(tableConfig.getMetadataPartitions().contains(COLUMN_STATS.getPartitionPath())); + assertFalse(tableConfig.getMetadataPartitions().contains(BLOOM_FILTERS.getPartitionPath())); // enable bloom filter as well as column stats and run 1 upsert HoodieWriteConfig cfgWithBloomFilterEnabled = HoodieWriteConfig.newBuilder() @@ -317,9 +316,9 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase { HoodieTableMetaClient.reload(metaClient); tableConfig = metaClient.getTableConfig(); assertFalse(tableConfig.getMetadataPartitions().isEmpty()); - assertTrue(getCompletedMetadataPartitions(tableConfig).contains(FILES.getPartitionPath())); - assertTrue(getCompletedMetadataPartitions(tableConfig).contains(COLUMN_STATS.getPartitionPath())); - assertTrue(getCompletedMetadataPartitions(tableConfig).contains(BLOOM_FILTERS.getPartitionPath())); + assertTrue(tableConfig.getMetadataPartitions().contains(FILES.getPartitionPath())); + assertTrue(tableConfig.getMetadataPartitions().contains(COLUMN_STATS.getPartitionPath())); + assertTrue(tableConfig.getMetadataPartitions().contains(BLOOM_FILTERS.getPartitionPath())); } @Test @@ -360,7 +359,7 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase { HoodieTableConfig hoodieTableConfig2 = new HoodieTableConfig(this.fs, metaClient.getMetaPath(), writeConfig2.getPayloadClass()); - assertEquals(Collections.emptyList(), hoodieTableConfig2.getMetadataPartitions()); + assertEquals(Collections.emptySet(), hoodieTableConfig2.getMetadataPartitions()); // Assert metadata table folder is deleted assertFalse(metaClient.getFs().exists( new Path(HoodieTableMetadata.getMetadataTableBasePath(writeConfig2.getBasePath())))); diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieIndex.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieIndex.java index 8cbb74e6f..8c92f8189 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieIndex.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieIndex.java @@ -79,7 +79,6 @@ import scala.Tuple2; import static org.apache.hudi.common.testutils.SchemaTestUtil.getSchemaFromResource; import static org.apache.hudi.metadata.HoodieTableMetadataUtil.deleteMetadataPartition; -import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getCompletedMetadataPartitions; import static org.apache.hudi.metadata.HoodieTableMetadataUtil.metadataPartitionExists; import static org.apache.hudi.metadata.MetadataPartitionType.COLUMN_STATS; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -234,7 +233,7 @@ public class TestHoodieIndex extends TestHoodieMetadataBase { // check column_stats partition exists metaClient = HoodieTableMetaClient.reload(metaClient); assertTrue(metadataPartitionExists(metaClient.getBasePath(), context, COLUMN_STATS)); - assertTrue(getCompletedMetadataPartitions(metaClient.getTableConfig()).contains(COLUMN_STATS.getPartitionPath())); + assertTrue(metaClient.getTableConfig().getMetadataPartitions().contains(COLUMN_STATS.getPartitionPath())); // delete the column_stats partition deleteMetadataPartition(metaClient.getBasePath(), context, COLUMN_STATS); diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java index 886911466..1f52912d1 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java @@ -57,6 +57,7 @@ import java.util.List; import java.util.Map; import java.util.Properties; import java.util.Set; +import java.util.HashSet; import java.util.function.BiConsumer; import java.util.stream.Collectors; @@ -618,11 +619,10 @@ public class HoodieTableConfig extends HoodieConfig { ); } - public List getMetadataPartitions() { - return StringUtils.split( - getStringOrDefault(TABLE_METADATA_PARTITIONS, StringUtils.EMPTY_STRING), - CONFIG_VALUES_DELIMITER - ); + public Set getMetadataPartitions() { + return new HashSet<>( + StringUtils.split(getStringOrDefault(TABLE_METADATA_PARTITIONS, StringUtils.EMPTY_STRING), + CONFIG_VALUES_DELIMITER)); } /** diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java index c0e97f330..822fff32e 100644 --- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java +++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java @@ -1353,13 +1353,9 @@ public class HoodieTableMetadataUtil { return new HashSet<>(tableConfig.getMetadataPartitionsInflight()); } - public static Set getCompletedMetadataPartitions(HoodieTableConfig tableConfig) { - return new HashSet<>(tableConfig.getMetadataPartitions()); - } - public static Set getInflightAndCompletedMetadataPartitions(HoodieTableConfig tableConfig) { Set inflightAndCompletedPartitions = getInflightMetadataPartitions(tableConfig); - inflightAndCompletedPartitions.addAll(getCompletedMetadataPartitions(tableConfig)); + inflightAndCompletedPartitions.addAll(tableConfig.getMetadataPartitions()); return inflightAndCompletedPartitions; } diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala index 23c860536..807b31e56 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala @@ -251,7 +251,7 @@ case class HoodieFileIndex(spark: SparkSession, override def sizeInBytes: Long = cachedFileSize private def isColumnStatsIndexAvailable = - HoodieTableMetadataUtil.getCompletedMetadataPartitions(metaClient.getTableConfig) + metaClient.getTableConfig.getMetadataPartitions .contains(HoodieTableMetadataUtil.PARTITION_NAME_COLUMN_STATS) private def isDataSkippingEnabled: Boolean = diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieIndexer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieIndexer.java index 6f78487cc..1b1cbca45 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieIndexer.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieIndexer.java @@ -53,7 +53,6 @@ import static org.apache.hudi.common.util.StringUtils.isNullOrEmpty; import static org.apache.hudi.common.util.ValidationUtils.checkArgument; import static org.apache.hudi.metadata.HoodieTableMetadataUtil.PARTITION_NAME_BLOOM_FILTERS; import static org.apache.hudi.metadata.HoodieTableMetadataUtil.PARTITION_NAME_COLUMN_STATS; -import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getCompletedMetadataPartitions; import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getInflightAndCompletedMetadataPartitions; import static org.apache.hudi.utilities.UtilHelpers.EXECUTE; import static org.apache.hudi.utilities.UtilHelpers.SCHEDULE; @@ -243,7 +242,7 @@ public class HoodieIndexer { } private boolean indexExists(List partitionTypes) { - Set indexedMetadataPartitions = getCompletedMetadataPartitions(metaClient.getTableConfig()); + Set indexedMetadataPartitions = metaClient.getTableConfig().getMetadataPartitions(); Set requestedIndexPartitionPaths = partitionTypes.stream().map(MetadataPartitionType::getPartitionPath).collect(Collectors.toSet()); requestedIndexPartitionPaths.retainAll(indexedMetadataPartitions); if (!requestedIndexPartitionPaths.isEmpty()) { @@ -254,7 +253,7 @@ public class HoodieIndexer { } private boolean isMetadataInitialized() { - Set indexedMetadataPartitions = getCompletedMetadataPartitions(metaClient.getTableConfig()); + Set indexedMetadataPartitions = metaClient.getTableConfig().getMetadataPartitions(); return !indexedMetadataPartitions.isEmpty(); } diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieIndexer.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieIndexer.java index 0f57e3b1b..7117041ad 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieIndexer.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieIndexer.java @@ -56,7 +56,6 @@ import java.util.Set; import static org.apache.hudi.common.table.HoodieTableMetaClient.reload; import static org.apache.hudi.common.table.timeline.HoodieInstant.State.REQUESTED; -import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getCompletedMetadataPartitions; import static org.apache.hudi.metadata.HoodieTableMetadataUtil.metadataPartitionExists; import static org.apache.hudi.metadata.MetadataPartitionType.BLOOM_FILTERS; import static org.apache.hudi.metadata.MetadataPartitionType.COLUMN_STATS; @@ -142,8 +141,8 @@ public class TestHoodieIndexer extends HoodieCommonTestHarness implements SparkP initializeWriteClient(metadataConfigBuilder.build()); // validate table config - assertTrue(getCompletedMetadataPartitions(reload(metaClient).getTableConfig()).contains(FILES.getPartitionPath())); - assertTrue(getCompletedMetadataPartitions(reload(metaClient).getTableConfig()).contains(BLOOM_FILTERS.getPartitionPath())); + assertTrue(reload(metaClient).getTableConfig().getMetadataPartitions().contains(FILES.getPartitionPath())); + assertTrue(reload(metaClient).getTableConfig().getMetadataPartitions().contains(BLOOM_FILTERS.getPartitionPath())); // build indexer config which has only column_stats enabled (files and bloom filter is already enabled) indexMetadataPartitionsAndAssert(COLUMN_STATS, Arrays.asList(new MetadataPartitionType[]{FILES, BLOOM_FILTERS}), Collections.emptyList()); @@ -158,7 +157,7 @@ public class TestHoodieIndexer extends HoodieCommonTestHarness implements SparkP initializeWriteClient(metadataConfigBuilder.build()); // validate table config - assertFalse(getCompletedMetadataPartitions(reload(metaClient).getTableConfig()).contains(FILES.getPartitionPath())); + assertFalse(reload(metaClient).getTableConfig().getMetadataPartitions().contains(FILES.getPartitionPath())); // build indexer config which has only files enabled indexMetadataPartitionsAndAssert(FILES, Collections.emptyList(), Arrays.asList(new MetadataPartitionType[]{COLUMN_STATS, BLOOM_FILTERS})); @@ -176,7 +175,7 @@ public class TestHoodieIndexer extends HoodieCommonTestHarness implements SparkP HoodieMetadataConfig.Builder metadataConfigBuilder = getMetadataConfigBuilder(false, false); initializeWriteClient(metadataConfigBuilder.build()); // validate table config - assertFalse(getCompletedMetadataPartitions(reload(metaClient).getTableConfig()).contains(FILES.getPartitionPath())); + assertFalse(reload(metaClient).getTableConfig().getMetadataPartitions().contains(FILES.getPartitionPath())); // build indexer config which has only column stats enabled. expected to throw exception. HoodieIndexer.Config config = new HoodieIndexer.Config(); @@ -192,9 +191,9 @@ public class TestHoodieIndexer extends HoodieCommonTestHarness implements SparkP // validate table config metaClient = reload(metaClient); - assertFalse(getCompletedMetadataPartitions(metaClient.getTableConfig()).contains(FILES.getPartitionPath())); - assertFalse(getCompletedMetadataPartitions(metaClient.getTableConfig()).contains(COLUMN_STATS.getPartitionPath())); - assertFalse(getCompletedMetadataPartitions(metaClient.getTableConfig()).contains(BLOOM_FILTERS.getPartitionPath())); + assertFalse(metaClient.getTableConfig().getMetadataPartitions().contains(FILES.getPartitionPath())); + assertFalse(metaClient.getTableConfig().getMetadataPartitions().contains(COLUMN_STATS.getPartitionPath())); + assertFalse(metaClient.getTableConfig().getMetadataPartitions().contains(BLOOM_FILTERS.getPartitionPath())); // validate metadata partitions actually exist assertFalse(metadataPartitionExists(basePath, context, FILES)); @@ -229,7 +228,7 @@ public class TestHoodieIndexer extends HoodieCommonTestHarness implements SparkP // validate table config metaClient = reload(metaClient); - Set completedPartitions = getCompletedMetadataPartitions(metaClient.getTableConfig()); + Set completedPartitions = metaClient.getTableConfig().getMetadataPartitions(); assertTrue(completedPartitions.contains(partitionTypeToIndex.getPartitionPath())); alreadyCompletedPartitions.forEach(entry -> assertTrue(completedPartitions.contains(entry.getPartitionPath()))); nonExistantPartitions.forEach(entry -> assertFalse(completedPartitions.contains(entry.getPartitionPath()))); @@ -257,9 +256,9 @@ public class TestHoodieIndexer extends HoodieCommonTestHarness implements SparkP assertNoWriteErrors(statuses); // validate partitions built successfully - assertTrue(getCompletedMetadataPartitions(reload(metaClient).getTableConfig()).contains(FILES.getPartitionPath())); + assertTrue(reload(metaClient).getTableConfig().getMetadataPartitions().contains(FILES.getPartitionPath())); assertTrue(metadataPartitionExists(basePath, context, FILES)); - assertTrue(getCompletedMetadataPartitions(reload(metaClient).getTableConfig()).contains(BLOOM_FILTERS.getPartitionPath())); + assertTrue(reload(metaClient).getTableConfig().getMetadataPartitions().contains(BLOOM_FILTERS.getPartitionPath())); assertTrue(metadataPartitionExists(basePath, context, BLOOM_FILTERS)); // build indexer config which has only column_stats enabled (files is enabled by default) @@ -288,9 +287,9 @@ public class TestHoodieIndexer extends HoodieCommonTestHarness implements SparkP assertFalse(metadataPartitionExists(basePath, context, COLUMN_STATS)); // check other partitions are intact - assertTrue(getCompletedMetadataPartitions(reload(metaClient).getTableConfig()).contains(FILES.getPartitionPath())); + assertTrue(reload(metaClient).getTableConfig().getMetadataPartitions().contains(FILES.getPartitionPath())); assertTrue(metadataPartitionExists(basePath, context, FILES)); - assertTrue(getCompletedMetadataPartitions(reload(metaClient).getTableConfig()).contains(BLOOM_FILTERS.getPartitionPath())); + assertTrue(reload(metaClient).getTableConfig().getMetadataPartitions().contains(BLOOM_FILTERS.getPartitionPath())); assertTrue(metadataPartitionExists(basePath, context, BLOOM_FILTERS)); } @@ -312,7 +311,7 @@ public class TestHoodieIndexer extends HoodieCommonTestHarness implements SparkP assertNoWriteErrors(statuses); // validate files partition built successfully - assertTrue(getCompletedMetadataPartitions(reload(metaClient).getTableConfig()).contains(FILES.getPartitionPath())); + assertTrue(reload(metaClient).getTableConfig().getMetadataPartitions().contains(FILES.getPartitionPath())); assertTrue(metadataPartitionExists(basePath, context, FILES)); // build indexer config which has only bloom_filters enabled @@ -320,7 +319,7 @@ public class TestHoodieIndexer extends HoodieCommonTestHarness implements SparkP // start the indexer and validate bloom_filters index is also complete HoodieIndexer indexer = new HoodieIndexer(jsc, config); assertEquals(0, indexer.start(0)); - assertTrue(getCompletedMetadataPartitions(reload(metaClient).getTableConfig()).contains(BLOOM_FILTERS.getPartitionPath())); + assertTrue(reload(metaClient).getTableConfig().getMetadataPartitions().contains(BLOOM_FILTERS.getPartitionPath())); assertTrue(metadataPartitionExists(basePath, context, BLOOM_FILTERS)); // completed index timeline for later validation @@ -344,9 +343,9 @@ public class TestHoodieIndexer extends HoodieCommonTestHarness implements SparkP dropIndexAndAssert(COLUMN_STATS, "delta-streamer-config/indexer.properties", Option.empty()); // check other partitions are intact - assertTrue(getCompletedMetadataPartitions(reload(metaClient).getTableConfig()).contains(FILES.getPartitionPath())); + assertTrue(reload(metaClient).getTableConfig().getMetadataPartitions().contains(FILES.getPartitionPath())); assertTrue(metadataPartitionExists(basePath, context, FILES)); - assertTrue(getCompletedMetadataPartitions(reload(metaClient).getTableConfig()).contains(BLOOM_FILTERS.getPartitionPath())); + assertTrue(reload(metaClient).getTableConfig().getMetadataPartitions().contains(BLOOM_FILTERS.getPartitionPath())); assertTrue(metadataPartitionExists(basePath, context, BLOOM_FILTERS)); // drop bloom filter partition. timeline files should not be deleted since the index building is complete.