1
0

[HUDI-3836] Improve the way of fetching metadata partitions from table (#5286)

Co-authored-by: xicm <xicm@asiainfo.com>
This commit is contained in:
xi chaomin
2022-07-05 22:50:17 +08:00
committed by GitHub
parent fbda4ad5bd
commit 23c9c5c296
13 changed files with 49 additions and 63 deletions

View File

@@ -56,7 +56,6 @@ import static java.util.stream.Collectors.toList;
import static org.apache.hudi.common.util.CollectionUtils.isNullOrEmpty; import static org.apache.hudi.common.util.CollectionUtils.isNullOrEmpty;
import static org.apache.hudi.index.HoodieIndexUtils.getLatestBaseFilesForAllPartitions; import static org.apache.hudi.index.HoodieIndexUtils.getLatestBaseFilesForAllPartitions;
import static org.apache.hudi.metadata.HoodieMetadataPayload.unwrapStatisticValueWrapper; import static org.apache.hudi.metadata.HoodieMetadataPayload.unwrapStatisticValueWrapper;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getCompletedMetadataPartitions;
import static org.apache.hudi.metadata.MetadataPartitionType.COLUMN_STATS; import static org.apache.hudi.metadata.MetadataPartitionType.COLUMN_STATS;
/** /**
@@ -143,7 +142,7 @@ public class HoodieBloomIndex extends HoodieIndex<Object, Object> {
if (config.getBloomIndexPruneByRanges()) { if (config.getBloomIndexPruneByRanges()) {
// load column ranges from metadata index if column stats index is enabled and column_stats metadata partition is available // load column ranges from metadata index if column stats index is enabled and column_stats metadata partition is available
if (config.getBloomIndexUseMetadata() if (config.getBloomIndexUseMetadata()
&& getCompletedMetadataPartitions(hoodieTable.getMetaClient().getTableConfig()).contains(COLUMN_STATS.getPartitionPath())) { && hoodieTable.getMetaClient().getTableConfig().getMetadataPartitions().contains(COLUMN_STATS.getPartitionPath())) {
fileInfoList = loadColumnRangesFromMetaIndex(affectedPartitionPathList, context, hoodieTable); fileInfoList = loadColumnRangesFromMetaIndex(affectedPartitionPathList, context, hoodieTable);
} }
// fallback to loading column ranges from files // fallback to loading column ranges from files

View File

@@ -37,7 +37,6 @@ import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getCompletedMetadataPartitions;
import static org.apache.hudi.metadata.MetadataPartitionType.BLOOM_FILTERS; import static org.apache.hudi.metadata.MetadataPartitionType.BLOOM_FILTERS;
/** /**
@@ -64,7 +63,7 @@ public class HoodieKeyLookupHandle<T extends HoodieRecordPayload, I, K, O> exten
HoodieTimer timer = new HoodieTimer().startTimer(); HoodieTimer timer = new HoodieTimer().startTimer();
try { try {
if (config.getBloomIndexUseMetadata() if (config.getBloomIndexUseMetadata()
&& getCompletedMetadataPartitions(hoodieTable.getMetaClient().getTableConfig()) && hoodieTable.getMetaClient().getTableConfig().getMetadataPartitions()
.contains(BLOOM_FILTERS.getPartitionPath())) { .contains(BLOOM_FILTERS.getPartitionPath())) {
bloomFilter = hoodieTable.getMetadataTable().getBloomFilter(partitionPathFileIDPair.getLeft(), partitionPathFileIDPair.getRight()) bloomFilter = hoodieTable.getMetadataTable().getBloomFilter(partitionPathFileIDPair.getLeft(), partitionPathFileIDPair.getRight())
.orElseThrow(() -> new HoodieIndexException("BloomFilter missing for " + partitionPathFileIDPair.getRight())); .orElseThrow(() -> new HoodieIndexException("BloomFilter missing for " + partitionPathFileIDPair.getRight()));

View File

@@ -96,7 +96,6 @@ import static org.apache.hudi.common.table.timeline.TimelineMetadataUtils.deseri
import static org.apache.hudi.common.util.StringUtils.EMPTY_STRING; import static org.apache.hudi.common.util.StringUtils.EMPTY_STRING;
import static org.apache.hudi.metadata.HoodieTableMetadata.METADATA_TABLE_NAME_SUFFIX; import static org.apache.hudi.metadata.HoodieTableMetadata.METADATA_TABLE_NAME_SUFFIX;
import static org.apache.hudi.metadata.HoodieTableMetadata.SOLO_COMMIT_TIMESTAMP; import static org.apache.hudi.metadata.HoodieTableMetadata.SOLO_COMMIT_TIMESTAMP;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getCompletedMetadataPartitions;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getInflightAndCompletedMetadataPartitions; import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getInflightAndCompletedMetadataPartitions;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getInflightMetadataPartitions; import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getInflightMetadataPartitions;
@@ -579,7 +578,7 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
} }
private void updateInitializedPartitionsInTableConfig(List<MetadataPartitionType> partitionTypes) { private void updateInitializedPartitionsInTableConfig(List<MetadataPartitionType> partitionTypes) {
Set<String> completedPartitions = getCompletedMetadataPartitions(dataMetaClient.getTableConfig()); Set<String> completedPartitions = dataMetaClient.getTableConfig().getMetadataPartitions();
completedPartitions.addAll(partitionTypes.stream().map(MetadataPartitionType::getPartitionPath).collect(Collectors.toSet())); completedPartitions.addAll(partitionTypes.stream().map(MetadataPartitionType::getPartitionPath).collect(Collectors.toSet()));
dataMetaClient.getTableConfig().setValue(HoodieTableConfig.TABLE_METADATA_PARTITIONS.key(), String.join(",", completedPartitions)); dataMetaClient.getTableConfig().setValue(HoodieTableConfig.TABLE_METADATA_PARTITIONS.key(), String.join(",", completedPartitions));
HoodieTableConfig.update(dataMetaClient.getFs(), new Path(dataMetaClient.getMetaPath()), dataMetaClient.getTableConfig().getProps()); HoodieTableConfig.update(dataMetaClient.getFs(), new Path(dataMetaClient.getMetaPath()), dataMetaClient.getTableConfig().getProps());
@@ -716,7 +715,7 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
} }
public void dropMetadataPartitions(List<MetadataPartitionType> metadataPartitions) throws IOException { public void dropMetadataPartitions(List<MetadataPartitionType> metadataPartitions) throws IOException {
Set<String> completedIndexes = getCompletedMetadataPartitions(dataMetaClient.getTableConfig()); Set<String> completedIndexes = dataMetaClient.getTableConfig().getMetadataPartitions();
Set<String> inflightIndexes = getInflightMetadataPartitions(dataMetaClient.getTableConfig()); Set<String> inflightIndexes = getInflightMetadataPartitions(dataMetaClient.getTableConfig());
for (MetadataPartitionType partitionType : metadataPartitions) { for (MetadataPartitionType partitionType : metadataPartitions) {
@@ -806,7 +805,7 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
private Set<String> getMetadataPartitionsToUpdate() { private Set<String> getMetadataPartitionsToUpdate() {
// fetch partitions to update from table config // fetch partitions to update from table config
Set<String> partitionsToUpdate = getCompletedMetadataPartitions(dataMetaClient.getTableConfig()); Set<String> partitionsToUpdate = dataMetaClient.getTableConfig().getMetadataPartitions();
// add inflight indexes as well because the file groups have already been initialized, so writers can log updates // add inflight indexes as well because the file groups have already been initialized, so writers can log updates
// NOTE: Async HoodieIndexer can move some partition to inflight. While that partition is still being built, // NOTE: Async HoodieIndexer can move some partition to inflight. While that partition is still being built,
// the regular ingestion writers should not be blocked. They can go ahead and log updates to the metadata partition. // the regular ingestion writers should not be blocked. They can go ahead and log updates to the metadata partition.

View File

@@ -102,7 +102,6 @@ import static org.apache.hudi.common.table.HoodieTableConfig.TABLE_METADATA_PART
import static org.apache.hudi.common.util.StringUtils.EMPTY_STRING; import static org.apache.hudi.common.util.StringUtils.EMPTY_STRING;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.deleteMetadataPartition; import static org.apache.hudi.metadata.HoodieTableMetadataUtil.deleteMetadataPartition;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.deleteMetadataTable; import static org.apache.hudi.metadata.HoodieTableMetadataUtil.deleteMetadataTable;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getCompletedMetadataPartitions;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.metadataPartitionExists; import static org.apache.hudi.metadata.HoodieTableMetadataUtil.metadataPartitionExists;
/** /**
@@ -900,7 +899,7 @@ public abstract class HoodieTable<T extends HoodieRecordPayload, I, K, O> implem
return false; return false;
} }
return metadataIndexDisabled return metadataIndexDisabled
&& getCompletedMetadataPartitions(metaClient.getTableConfig()).contains(partitionType.getPartitionPath()); && metaClient.getTableConfig().getMetadataPartitions().contains(partitionType.getPartitionPath());
} }
private boolean shouldExecuteMetadataTableDeletion() { private boolean shouldExecuteMetadataTableDeletion() {
@@ -919,7 +918,7 @@ public abstract class HoodieTable<T extends HoodieRecordPayload, I, K, O> implem
* Clears hoodie.table.metadata.partitions in hoodie.properties * Clears hoodie.table.metadata.partitions in hoodie.properties
*/ */
private void clearMetadataTablePartitionsConfig(Option<MetadataPartitionType> partitionType, boolean clearAll) { private void clearMetadataTablePartitionsConfig(Option<MetadataPartitionType> partitionType, boolean clearAll) {
Set<String> partitions = getCompletedMetadataPartitions(metaClient.getTableConfig()); Set<String> partitions = metaClient.getTableConfig().getMetadataPartitions();
if (clearAll && partitions.size() > 0) { if (clearAll && partitions.size() > 0) {
LOG.info("Clear hoodie.table.metadata.partitions in hoodie.properties"); LOG.info("Clear hoodie.table.metadata.partitions in hoodie.properties");
metaClient.getTableConfig().setValue(TABLE_METADATA_PARTITIONS.key(), EMPTY_STRING); metaClient.getTableConfig().setValue(TABLE_METADATA_PARTITIONS.key(), EMPTY_STRING);

View File

@@ -74,7 +74,6 @@ import static org.apache.hudi.common.table.timeline.HoodieTimeline.ROLLBACK_ACTI
import static org.apache.hudi.config.HoodieWriteConfig.WRITE_CONCURRENCY_MODE; import static org.apache.hudi.config.HoodieWriteConfig.WRITE_CONCURRENCY_MODE;
import static org.apache.hudi.metadata.HoodieTableMetadata.getMetadataTableBasePath; import static org.apache.hudi.metadata.HoodieTableMetadata.getMetadataTableBasePath;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.deleteMetadataPartition; import static org.apache.hudi.metadata.HoodieTableMetadataUtil.deleteMetadataPartition;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getCompletedMetadataPartitions;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getInflightAndCompletedMetadataPartitions; import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getInflightAndCompletedMetadataPartitions;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getInflightMetadataPartitions; import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getInflightMetadataPartitions;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.metadataPartitionExists; import static org.apache.hudi.metadata.HoodieTableMetadataUtil.metadataPartitionExists;
@@ -192,7 +191,7 @@ public class RunIndexActionExecutor<T extends HoodieRecordPayload, I, K, O> exte
private void abort(HoodieInstant indexInstant, Set<String> requestedPartitions) { private void abort(HoodieInstant indexInstant, Set<String> requestedPartitions) {
Set<String> inflightPartitions = getInflightMetadataPartitions(table.getMetaClient().getTableConfig()); Set<String> inflightPartitions = getInflightMetadataPartitions(table.getMetaClient().getTableConfig());
Set<String> completedPartitions = getCompletedMetadataPartitions(table.getMetaClient().getTableConfig()); Set<String> completedPartitions = table.getMetaClient().getTableConfig().getMetadataPartitions();
// update table config // update table config
requestedPartitions.forEach(partition -> { requestedPartitions.forEach(partition -> {
inflightPartitions.remove(partition); inflightPartitions.remove(partition);
@@ -302,7 +301,7 @@ public class RunIndexActionExecutor<T extends HoodieRecordPayload, I, K, O> exte
private void updateMetadataPartitionsTableConfig(HoodieTableMetaClient metaClient, Set<String> metadataPartitions) { private void updateMetadataPartitionsTableConfig(HoodieTableMetaClient metaClient, Set<String> metadataPartitions) {
// remove from inflight and update completed indexes // remove from inflight and update completed indexes
Set<String> inflightPartitions = getInflightMetadataPartitions(metaClient.getTableConfig()); Set<String> inflightPartitions = getInflightMetadataPartitions(metaClient.getTableConfig());
Set<String> completedPartitions = getCompletedMetadataPartitions(metaClient.getTableConfig()); Set<String> completedPartitions = metaClient.getTableConfig().getMetadataPartitions();
inflightPartitions.removeAll(metadataPartitions); inflightPartitions.removeAll(metadataPartitions);
completedPartitions.addAll(metadataPartitions); completedPartitions.addAll(metadataPartitions);
// update table config // update table config

View File

@@ -43,7 +43,6 @@ import java.util.stream.Collectors;
import scala.Tuple2; import scala.Tuple2;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getCompletedMetadataPartitions;
import static org.apache.hudi.metadata.MetadataPartitionType.BLOOM_FILTERS; import static org.apache.hudi.metadata.MetadataPartitionType.BLOOM_FILTERS;
/** /**
@@ -81,7 +80,7 @@ public class SparkHoodieBloomIndexHelper extends BaseHoodieBloomIndexHelper {
JavaRDD<List<HoodieKeyLookupResult>> keyLookupResultRDD; JavaRDD<List<HoodieKeyLookupResult>> keyLookupResultRDD;
if (config.getBloomIndexUseMetadata() if (config.getBloomIndexUseMetadata()
&& getCompletedMetadataPartitions(hoodieTable.getMetaClient().getTableConfig()) && hoodieTable.getMetaClient().getTableConfig().getMetadataPartitions()
.contains(BLOOM_FILTERS.getPartitionPath())) { .contains(BLOOM_FILTERS.getPartitionPath())) {
// Step 1: Sort by file id // Step 1: Sort by file id
JavaRDD<Tuple2<String, HoodieKey>> sortedFileIdAndKeyPairs = JavaRDD<Tuple2<String, HoodieKey>> sortedFileIdAndKeyPairs =

View File

@@ -149,7 +149,6 @@ import static org.apache.hudi.common.model.WriteOperationType.DELETE;
import static org.apache.hudi.common.model.WriteOperationType.INSERT; import static org.apache.hudi.common.model.WriteOperationType.INSERT;
import static org.apache.hudi.common.model.WriteOperationType.UPSERT; import static org.apache.hudi.common.model.WriteOperationType.UPSERT;
import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA; import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getCompletedMetadataPartitions;
import static org.apache.hudi.metadata.MetadataPartitionType.BLOOM_FILTERS; import static org.apache.hudi.metadata.MetadataPartitionType.BLOOM_FILTERS;
import static org.apache.hudi.metadata.MetadataPartitionType.COLUMN_STATS; import static org.apache.hudi.metadata.MetadataPartitionType.COLUMN_STATS;
import static org.apache.hudi.metadata.MetadataPartitionType.FILES; import static org.apache.hudi.metadata.MetadataPartitionType.FILES;
@@ -240,9 +239,9 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
HoodieTableMetaClient.reload(metaClient); HoodieTableMetaClient.reload(metaClient);
HoodieTableConfig tableConfig = metaClient.getTableConfig(); HoodieTableConfig tableConfig = metaClient.getTableConfig();
assertFalse(tableConfig.getMetadataPartitions().isEmpty()); assertFalse(tableConfig.getMetadataPartitions().isEmpty());
assertTrue(getCompletedMetadataPartitions(tableConfig).contains(FILES.getPartitionPath())); assertTrue(tableConfig.getMetadataPartitions().contains(FILES.getPartitionPath()));
assertFalse(getCompletedMetadataPartitions(tableConfig).contains(COLUMN_STATS.getPartitionPath())); assertFalse(tableConfig.getMetadataPartitions().contains(COLUMN_STATS.getPartitionPath()));
assertFalse(getCompletedMetadataPartitions(tableConfig).contains(BLOOM_FILTERS.getPartitionPath())); assertFalse(tableConfig.getMetadataPartitions().contains(BLOOM_FILTERS.getPartitionPath()));
// enable column stats and run 1 upserts // enable column stats and run 1 upserts
HoodieWriteConfig cfgWithColStatsEnabled = HoodieWriteConfig.newBuilder() HoodieWriteConfig cfgWithColStatsEnabled = HoodieWriteConfig.newBuilder()
@@ -265,9 +264,9 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
HoodieTableMetaClient.reload(metaClient); HoodieTableMetaClient.reload(metaClient);
tableConfig = metaClient.getTableConfig(); tableConfig = metaClient.getTableConfig();
assertFalse(tableConfig.getMetadataPartitions().isEmpty()); assertFalse(tableConfig.getMetadataPartitions().isEmpty());
assertTrue(getCompletedMetadataPartitions(tableConfig).contains(FILES.getPartitionPath())); assertTrue(tableConfig.getMetadataPartitions().contains(FILES.getPartitionPath()));
assertTrue(getCompletedMetadataPartitions(tableConfig).contains(COLUMN_STATS.getPartitionPath())); assertTrue(tableConfig.getMetadataPartitions().contains(COLUMN_STATS.getPartitionPath()));
assertFalse(getCompletedMetadataPartitions(tableConfig).contains(BLOOM_FILTERS.getPartitionPath())); assertFalse(tableConfig.getMetadataPartitions().contains(BLOOM_FILTERS.getPartitionPath()));
// disable column stats and run 1 upsert // disable column stats and run 1 upsert
HoodieWriteConfig cfgWithColStatsDisabled = HoodieWriteConfig.newBuilder() HoodieWriteConfig cfgWithColStatsDisabled = HoodieWriteConfig.newBuilder()
@@ -291,9 +290,9 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
HoodieTableMetaClient.reload(metaClient); HoodieTableMetaClient.reload(metaClient);
tableConfig = metaClient.getTableConfig(); tableConfig = metaClient.getTableConfig();
assertFalse(tableConfig.getMetadataPartitions().isEmpty()); assertFalse(tableConfig.getMetadataPartitions().isEmpty());
assertTrue(getCompletedMetadataPartitions(tableConfig).contains(FILES.getPartitionPath())); assertTrue(tableConfig.getMetadataPartitions().contains(FILES.getPartitionPath()));
assertFalse(getCompletedMetadataPartitions(tableConfig).contains(COLUMN_STATS.getPartitionPath())); assertFalse(tableConfig.getMetadataPartitions().contains(COLUMN_STATS.getPartitionPath()));
assertFalse(getCompletedMetadataPartitions(tableConfig).contains(BLOOM_FILTERS.getPartitionPath())); assertFalse(tableConfig.getMetadataPartitions().contains(BLOOM_FILTERS.getPartitionPath()));
// enable bloom filter as well as column stats and run 1 upsert // enable bloom filter as well as column stats and run 1 upsert
HoodieWriteConfig cfgWithBloomFilterEnabled = HoodieWriteConfig.newBuilder() HoodieWriteConfig cfgWithBloomFilterEnabled = HoodieWriteConfig.newBuilder()
@@ -317,9 +316,9 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
HoodieTableMetaClient.reload(metaClient); HoodieTableMetaClient.reload(metaClient);
tableConfig = metaClient.getTableConfig(); tableConfig = metaClient.getTableConfig();
assertFalse(tableConfig.getMetadataPartitions().isEmpty()); assertFalse(tableConfig.getMetadataPartitions().isEmpty());
assertTrue(getCompletedMetadataPartitions(tableConfig).contains(FILES.getPartitionPath())); assertTrue(tableConfig.getMetadataPartitions().contains(FILES.getPartitionPath()));
assertTrue(getCompletedMetadataPartitions(tableConfig).contains(COLUMN_STATS.getPartitionPath())); assertTrue(tableConfig.getMetadataPartitions().contains(COLUMN_STATS.getPartitionPath()));
assertTrue(getCompletedMetadataPartitions(tableConfig).contains(BLOOM_FILTERS.getPartitionPath())); assertTrue(tableConfig.getMetadataPartitions().contains(BLOOM_FILTERS.getPartitionPath()));
} }
@Test @Test
@@ -360,7 +359,7 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
HoodieTableConfig hoodieTableConfig2 = HoodieTableConfig hoodieTableConfig2 =
new HoodieTableConfig(this.fs, metaClient.getMetaPath(), writeConfig2.getPayloadClass()); new HoodieTableConfig(this.fs, metaClient.getMetaPath(), writeConfig2.getPayloadClass());
assertEquals(Collections.emptyList(), hoodieTableConfig2.getMetadataPartitions()); assertEquals(Collections.emptySet(), hoodieTableConfig2.getMetadataPartitions());
// Assert metadata table folder is deleted // Assert metadata table folder is deleted
assertFalse(metaClient.getFs().exists( assertFalse(metaClient.getFs().exists(
new Path(HoodieTableMetadata.getMetadataTableBasePath(writeConfig2.getBasePath())))); new Path(HoodieTableMetadata.getMetadataTableBasePath(writeConfig2.getBasePath()))));

View File

@@ -79,7 +79,6 @@ import scala.Tuple2;
import static org.apache.hudi.common.testutils.SchemaTestUtil.getSchemaFromResource; import static org.apache.hudi.common.testutils.SchemaTestUtil.getSchemaFromResource;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.deleteMetadataPartition; import static org.apache.hudi.metadata.HoodieTableMetadataUtil.deleteMetadataPartition;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getCompletedMetadataPartitions;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.metadataPartitionExists; import static org.apache.hudi.metadata.HoodieTableMetadataUtil.metadataPartitionExists;
import static org.apache.hudi.metadata.MetadataPartitionType.COLUMN_STATS; import static org.apache.hudi.metadata.MetadataPartitionType.COLUMN_STATS;
import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -234,7 +233,7 @@ public class TestHoodieIndex extends TestHoodieMetadataBase {
// check column_stats partition exists // check column_stats partition exists
metaClient = HoodieTableMetaClient.reload(metaClient); metaClient = HoodieTableMetaClient.reload(metaClient);
assertTrue(metadataPartitionExists(metaClient.getBasePath(), context, COLUMN_STATS)); assertTrue(metadataPartitionExists(metaClient.getBasePath(), context, COLUMN_STATS));
assertTrue(getCompletedMetadataPartitions(metaClient.getTableConfig()).contains(COLUMN_STATS.getPartitionPath())); assertTrue(metaClient.getTableConfig().getMetadataPartitions().contains(COLUMN_STATS.getPartitionPath()));
// delete the column_stats partition // delete the column_stats partition
deleteMetadataPartition(metaClient.getBasePath(), context, COLUMN_STATS); deleteMetadataPartition(metaClient.getBasePath(), context, COLUMN_STATS);

View File

@@ -57,6 +57,7 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Properties; import java.util.Properties;
import java.util.Set; import java.util.Set;
import java.util.HashSet;
import java.util.function.BiConsumer; import java.util.function.BiConsumer;
import java.util.stream.Collectors; import java.util.stream.Collectors;
@@ -618,11 +619,10 @@ public class HoodieTableConfig extends HoodieConfig {
); );
} }
public List<String> getMetadataPartitions() { public Set<String> getMetadataPartitions() {
return StringUtils.split( return new HashSet<>(
getStringOrDefault(TABLE_METADATA_PARTITIONS, StringUtils.EMPTY_STRING), StringUtils.split(getStringOrDefault(TABLE_METADATA_PARTITIONS, StringUtils.EMPTY_STRING),
CONFIG_VALUES_DELIMITER CONFIG_VALUES_DELIMITER));
);
} }
/** /**

View File

@@ -1353,13 +1353,9 @@ public class HoodieTableMetadataUtil {
return new HashSet<>(tableConfig.getMetadataPartitionsInflight()); return new HashSet<>(tableConfig.getMetadataPartitionsInflight());
} }
public static Set<String> getCompletedMetadataPartitions(HoodieTableConfig tableConfig) {
return new HashSet<>(tableConfig.getMetadataPartitions());
}
public static Set<String> getInflightAndCompletedMetadataPartitions(HoodieTableConfig tableConfig) { public static Set<String> getInflightAndCompletedMetadataPartitions(HoodieTableConfig tableConfig) {
Set<String> inflightAndCompletedPartitions = getInflightMetadataPartitions(tableConfig); Set<String> inflightAndCompletedPartitions = getInflightMetadataPartitions(tableConfig);
inflightAndCompletedPartitions.addAll(getCompletedMetadataPartitions(tableConfig)); inflightAndCompletedPartitions.addAll(tableConfig.getMetadataPartitions());
return inflightAndCompletedPartitions; return inflightAndCompletedPartitions;
} }

View File

@@ -251,7 +251,7 @@ case class HoodieFileIndex(spark: SparkSession,
override def sizeInBytes: Long = cachedFileSize override def sizeInBytes: Long = cachedFileSize
private def isColumnStatsIndexAvailable = private def isColumnStatsIndexAvailable =
HoodieTableMetadataUtil.getCompletedMetadataPartitions(metaClient.getTableConfig) metaClient.getTableConfig.getMetadataPartitions
.contains(HoodieTableMetadataUtil.PARTITION_NAME_COLUMN_STATS) .contains(HoodieTableMetadataUtil.PARTITION_NAME_COLUMN_STATS)
private def isDataSkippingEnabled: Boolean = private def isDataSkippingEnabled: Boolean =

View File

@@ -53,7 +53,6 @@ import static org.apache.hudi.common.util.StringUtils.isNullOrEmpty;
import static org.apache.hudi.common.util.ValidationUtils.checkArgument; import static org.apache.hudi.common.util.ValidationUtils.checkArgument;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.PARTITION_NAME_BLOOM_FILTERS; import static org.apache.hudi.metadata.HoodieTableMetadataUtil.PARTITION_NAME_BLOOM_FILTERS;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.PARTITION_NAME_COLUMN_STATS; import static org.apache.hudi.metadata.HoodieTableMetadataUtil.PARTITION_NAME_COLUMN_STATS;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getCompletedMetadataPartitions;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getInflightAndCompletedMetadataPartitions; import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getInflightAndCompletedMetadataPartitions;
import static org.apache.hudi.utilities.UtilHelpers.EXECUTE; import static org.apache.hudi.utilities.UtilHelpers.EXECUTE;
import static org.apache.hudi.utilities.UtilHelpers.SCHEDULE; import static org.apache.hudi.utilities.UtilHelpers.SCHEDULE;
@@ -243,7 +242,7 @@ public class HoodieIndexer {
} }
private boolean indexExists(List<MetadataPartitionType> partitionTypes) { private boolean indexExists(List<MetadataPartitionType> partitionTypes) {
Set<String> indexedMetadataPartitions = getCompletedMetadataPartitions(metaClient.getTableConfig()); Set<String> indexedMetadataPartitions = metaClient.getTableConfig().getMetadataPartitions();
Set<String> requestedIndexPartitionPaths = partitionTypes.stream().map(MetadataPartitionType::getPartitionPath).collect(Collectors.toSet()); Set<String> requestedIndexPartitionPaths = partitionTypes.stream().map(MetadataPartitionType::getPartitionPath).collect(Collectors.toSet());
requestedIndexPartitionPaths.retainAll(indexedMetadataPartitions); requestedIndexPartitionPaths.retainAll(indexedMetadataPartitions);
if (!requestedIndexPartitionPaths.isEmpty()) { if (!requestedIndexPartitionPaths.isEmpty()) {
@@ -254,7 +253,7 @@ public class HoodieIndexer {
} }
private boolean isMetadataInitialized() { private boolean isMetadataInitialized() {
Set<String> indexedMetadataPartitions = getCompletedMetadataPartitions(metaClient.getTableConfig()); Set<String> indexedMetadataPartitions = metaClient.getTableConfig().getMetadataPartitions();
return !indexedMetadataPartitions.isEmpty(); return !indexedMetadataPartitions.isEmpty();
} }

View File

@@ -56,7 +56,6 @@ import java.util.Set;
import static org.apache.hudi.common.table.HoodieTableMetaClient.reload; import static org.apache.hudi.common.table.HoodieTableMetaClient.reload;
import static org.apache.hudi.common.table.timeline.HoodieInstant.State.REQUESTED; import static org.apache.hudi.common.table.timeline.HoodieInstant.State.REQUESTED;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getCompletedMetadataPartitions;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.metadataPartitionExists; import static org.apache.hudi.metadata.HoodieTableMetadataUtil.metadataPartitionExists;
import static org.apache.hudi.metadata.MetadataPartitionType.BLOOM_FILTERS; import static org.apache.hudi.metadata.MetadataPartitionType.BLOOM_FILTERS;
import static org.apache.hudi.metadata.MetadataPartitionType.COLUMN_STATS; import static org.apache.hudi.metadata.MetadataPartitionType.COLUMN_STATS;
@@ -142,8 +141,8 @@ public class TestHoodieIndexer extends HoodieCommonTestHarness implements SparkP
initializeWriteClient(metadataConfigBuilder.build()); initializeWriteClient(metadataConfigBuilder.build());
// validate table config // validate table config
assertTrue(getCompletedMetadataPartitions(reload(metaClient).getTableConfig()).contains(FILES.getPartitionPath())); assertTrue(reload(metaClient).getTableConfig().getMetadataPartitions().contains(FILES.getPartitionPath()));
assertTrue(getCompletedMetadataPartitions(reload(metaClient).getTableConfig()).contains(BLOOM_FILTERS.getPartitionPath())); assertTrue(reload(metaClient).getTableConfig().getMetadataPartitions().contains(BLOOM_FILTERS.getPartitionPath()));
// build indexer config which has only column_stats enabled (files and bloom filter is already enabled) // build indexer config which has only column_stats enabled (files and bloom filter is already enabled)
indexMetadataPartitionsAndAssert(COLUMN_STATS, Arrays.asList(new MetadataPartitionType[]{FILES, BLOOM_FILTERS}), Collections.emptyList()); indexMetadataPartitionsAndAssert(COLUMN_STATS, Arrays.asList(new MetadataPartitionType[]{FILES, BLOOM_FILTERS}), Collections.emptyList());
@@ -158,7 +157,7 @@ public class TestHoodieIndexer extends HoodieCommonTestHarness implements SparkP
initializeWriteClient(metadataConfigBuilder.build()); initializeWriteClient(metadataConfigBuilder.build());
// validate table config // validate table config
assertFalse(getCompletedMetadataPartitions(reload(metaClient).getTableConfig()).contains(FILES.getPartitionPath())); assertFalse(reload(metaClient).getTableConfig().getMetadataPartitions().contains(FILES.getPartitionPath()));
// build indexer config which has only files enabled // build indexer config which has only files enabled
indexMetadataPartitionsAndAssert(FILES, Collections.emptyList(), Arrays.asList(new MetadataPartitionType[]{COLUMN_STATS, BLOOM_FILTERS})); indexMetadataPartitionsAndAssert(FILES, Collections.emptyList(), Arrays.asList(new MetadataPartitionType[]{COLUMN_STATS, BLOOM_FILTERS}));
@@ -176,7 +175,7 @@ public class TestHoodieIndexer extends HoodieCommonTestHarness implements SparkP
HoodieMetadataConfig.Builder metadataConfigBuilder = getMetadataConfigBuilder(false, false); HoodieMetadataConfig.Builder metadataConfigBuilder = getMetadataConfigBuilder(false, false);
initializeWriteClient(metadataConfigBuilder.build()); initializeWriteClient(metadataConfigBuilder.build());
// validate table config // validate table config
assertFalse(getCompletedMetadataPartitions(reload(metaClient).getTableConfig()).contains(FILES.getPartitionPath())); assertFalse(reload(metaClient).getTableConfig().getMetadataPartitions().contains(FILES.getPartitionPath()));
// build indexer config which has only column stats enabled. expected to throw exception. // build indexer config which has only column stats enabled. expected to throw exception.
HoodieIndexer.Config config = new HoodieIndexer.Config(); HoodieIndexer.Config config = new HoodieIndexer.Config();
@@ -192,9 +191,9 @@ public class TestHoodieIndexer extends HoodieCommonTestHarness implements SparkP
// validate table config // validate table config
metaClient = reload(metaClient); metaClient = reload(metaClient);
assertFalse(getCompletedMetadataPartitions(metaClient.getTableConfig()).contains(FILES.getPartitionPath())); assertFalse(metaClient.getTableConfig().getMetadataPartitions().contains(FILES.getPartitionPath()));
assertFalse(getCompletedMetadataPartitions(metaClient.getTableConfig()).contains(COLUMN_STATS.getPartitionPath())); assertFalse(metaClient.getTableConfig().getMetadataPartitions().contains(COLUMN_STATS.getPartitionPath()));
assertFalse(getCompletedMetadataPartitions(metaClient.getTableConfig()).contains(BLOOM_FILTERS.getPartitionPath())); assertFalse(metaClient.getTableConfig().getMetadataPartitions().contains(BLOOM_FILTERS.getPartitionPath()));
// validate metadata partitions actually exist // validate metadata partitions actually exist
assertFalse(metadataPartitionExists(basePath, context, FILES)); assertFalse(metadataPartitionExists(basePath, context, FILES));
@@ -229,7 +228,7 @@ public class TestHoodieIndexer extends HoodieCommonTestHarness implements SparkP
// validate table config // validate table config
metaClient = reload(metaClient); metaClient = reload(metaClient);
Set<String> completedPartitions = getCompletedMetadataPartitions(metaClient.getTableConfig()); Set<String> completedPartitions = metaClient.getTableConfig().getMetadataPartitions();
assertTrue(completedPartitions.contains(partitionTypeToIndex.getPartitionPath())); assertTrue(completedPartitions.contains(partitionTypeToIndex.getPartitionPath()));
alreadyCompletedPartitions.forEach(entry -> assertTrue(completedPartitions.contains(entry.getPartitionPath()))); alreadyCompletedPartitions.forEach(entry -> assertTrue(completedPartitions.contains(entry.getPartitionPath())));
nonExistantPartitions.forEach(entry -> assertFalse(completedPartitions.contains(entry.getPartitionPath()))); nonExistantPartitions.forEach(entry -> assertFalse(completedPartitions.contains(entry.getPartitionPath())));
@@ -257,9 +256,9 @@ public class TestHoodieIndexer extends HoodieCommonTestHarness implements SparkP
assertNoWriteErrors(statuses); assertNoWriteErrors(statuses);
// validate partitions built successfully // validate partitions built successfully
assertTrue(getCompletedMetadataPartitions(reload(metaClient).getTableConfig()).contains(FILES.getPartitionPath())); assertTrue(reload(metaClient).getTableConfig().getMetadataPartitions().contains(FILES.getPartitionPath()));
assertTrue(metadataPartitionExists(basePath, context, FILES)); assertTrue(metadataPartitionExists(basePath, context, FILES));
assertTrue(getCompletedMetadataPartitions(reload(metaClient).getTableConfig()).contains(BLOOM_FILTERS.getPartitionPath())); assertTrue(reload(metaClient).getTableConfig().getMetadataPartitions().contains(BLOOM_FILTERS.getPartitionPath()));
assertTrue(metadataPartitionExists(basePath, context, BLOOM_FILTERS)); assertTrue(metadataPartitionExists(basePath, context, BLOOM_FILTERS));
// build indexer config which has only column_stats enabled (files is enabled by default) // build indexer config which has only column_stats enabled (files is enabled by default)
@@ -288,9 +287,9 @@ public class TestHoodieIndexer extends HoodieCommonTestHarness implements SparkP
assertFalse(metadataPartitionExists(basePath, context, COLUMN_STATS)); assertFalse(metadataPartitionExists(basePath, context, COLUMN_STATS));
// check other partitions are intact // check other partitions are intact
assertTrue(getCompletedMetadataPartitions(reload(metaClient).getTableConfig()).contains(FILES.getPartitionPath())); assertTrue(reload(metaClient).getTableConfig().getMetadataPartitions().contains(FILES.getPartitionPath()));
assertTrue(metadataPartitionExists(basePath, context, FILES)); assertTrue(metadataPartitionExists(basePath, context, FILES));
assertTrue(getCompletedMetadataPartitions(reload(metaClient).getTableConfig()).contains(BLOOM_FILTERS.getPartitionPath())); assertTrue(reload(metaClient).getTableConfig().getMetadataPartitions().contains(BLOOM_FILTERS.getPartitionPath()));
assertTrue(metadataPartitionExists(basePath, context, BLOOM_FILTERS)); assertTrue(metadataPartitionExists(basePath, context, BLOOM_FILTERS));
} }
@@ -312,7 +311,7 @@ public class TestHoodieIndexer extends HoodieCommonTestHarness implements SparkP
assertNoWriteErrors(statuses); assertNoWriteErrors(statuses);
// validate files partition built successfully // validate files partition built successfully
assertTrue(getCompletedMetadataPartitions(reload(metaClient).getTableConfig()).contains(FILES.getPartitionPath())); assertTrue(reload(metaClient).getTableConfig().getMetadataPartitions().contains(FILES.getPartitionPath()));
assertTrue(metadataPartitionExists(basePath, context, FILES)); assertTrue(metadataPartitionExists(basePath, context, FILES));
// build indexer config which has only bloom_filters enabled // build indexer config which has only bloom_filters enabled
@@ -320,7 +319,7 @@ public class TestHoodieIndexer extends HoodieCommonTestHarness implements SparkP
// start the indexer and validate bloom_filters index is also complete // start the indexer and validate bloom_filters index is also complete
HoodieIndexer indexer = new HoodieIndexer(jsc, config); HoodieIndexer indexer = new HoodieIndexer(jsc, config);
assertEquals(0, indexer.start(0)); assertEquals(0, indexer.start(0));
assertTrue(getCompletedMetadataPartitions(reload(metaClient).getTableConfig()).contains(BLOOM_FILTERS.getPartitionPath())); assertTrue(reload(metaClient).getTableConfig().getMetadataPartitions().contains(BLOOM_FILTERS.getPartitionPath()));
assertTrue(metadataPartitionExists(basePath, context, BLOOM_FILTERS)); assertTrue(metadataPartitionExists(basePath, context, BLOOM_FILTERS));
// completed index timeline for later validation // completed index timeline for later validation
@@ -344,9 +343,9 @@ public class TestHoodieIndexer extends HoodieCommonTestHarness implements SparkP
dropIndexAndAssert(COLUMN_STATS, "delta-streamer-config/indexer.properties", Option.empty()); dropIndexAndAssert(COLUMN_STATS, "delta-streamer-config/indexer.properties", Option.empty());
// check other partitions are intact // check other partitions are intact
assertTrue(getCompletedMetadataPartitions(reload(metaClient).getTableConfig()).contains(FILES.getPartitionPath())); assertTrue(reload(metaClient).getTableConfig().getMetadataPartitions().contains(FILES.getPartitionPath()));
assertTrue(metadataPartitionExists(basePath, context, FILES)); assertTrue(metadataPartitionExists(basePath, context, FILES));
assertTrue(getCompletedMetadataPartitions(reload(metaClient).getTableConfig()).contains(BLOOM_FILTERS.getPartitionPath())); assertTrue(reload(metaClient).getTableConfig().getMetadataPartitions().contains(BLOOM_FILTERS.getPartitionPath()));
assertTrue(metadataPartitionExists(basePath, context, BLOOM_FILTERS)); assertTrue(metadataPartitionExists(basePath, context, BLOOM_FILTERS));
// drop bloom filter partition. timeline files should not be deleted since the index building is complete. // drop bloom filter partition. timeline files should not be deleted since the index building is complete.