[HUDI-3825] Fixing Column Stats Index updating sequence (#5267)
This commit is contained in:
@@ -1557,11 +1557,11 @@ public class HoodieWriteConfig extends HoodieConfig {
|
|||||||
return isMetadataTableEnabled() && getMetadataConfig().isColumnStatsIndexEnabled();
|
return isMetadataTableEnabled() && getMetadataConfig().isColumnStatsIndexEnabled();
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getColumnsEnabledForColumnStatsIndex() {
|
public List<String> getColumnsEnabledForColumnStatsIndex() {
|
||||||
return getMetadataConfig().getColumnsEnabledForColumnStatsIndex();
|
return getMetadataConfig().getColumnsEnabledForColumnStatsIndex();
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getColumnsEnabledForBloomFilterIndex() {
|
public List<String> getColumnsEnabledForBloomFilterIndex() {
|
||||||
return getMetadataConfig().getColumnsEnabledForBloomFilterIndex();
|
return getMetadataConfig().getColumnsEnabledForBloomFilterIndex();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -54,7 +54,6 @@ import org.apache.hudi.common.util.DefaultSizeEstimator;
|
|||||||
import org.apache.hudi.common.util.Option;
|
import org.apache.hudi.common.util.Option;
|
||||||
import org.apache.hudi.common.util.ReflectionUtils;
|
import org.apache.hudi.common.util.ReflectionUtils;
|
||||||
import org.apache.hudi.common.util.SizeEstimator;
|
import org.apache.hudi.common.util.SizeEstimator;
|
||||||
import org.apache.hudi.common.util.StringUtils;
|
|
||||||
import org.apache.hudi.config.HoodieWriteConfig;
|
import org.apache.hudi.config.HoodieWriteConfig;
|
||||||
import org.apache.hudi.exception.HoodieAppendException;
|
import org.apache.hudi.exception.HoodieAppendException;
|
||||||
import org.apache.hudi.exception.HoodieException;
|
import org.apache.hudi.exception.HoodieException;
|
||||||
@@ -66,6 +65,7 @@ import org.apache.log4j.Logger;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
@@ -73,7 +73,6 @@ import java.util.Properties;
|
|||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.concurrent.atomic.AtomicLong;
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
|
||||||
|
|
||||||
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.collectColumnRangeMetadata;
|
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.collectColumnRangeMetadata;
|
||||||
|
|
||||||
@@ -348,15 +347,16 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload, I, K, O> extends
|
|||||||
|
|
||||||
if (config.isMetadataColumnStatsIndexEnabled()) {
|
if (config.isMetadataColumnStatsIndexEnabled()) {
|
||||||
final List<Schema.Field> fieldsToIndex;
|
final List<Schema.Field> fieldsToIndex;
|
||||||
if (StringUtils.isNullOrEmpty(config.getColumnsEnabledForColumnStatsIndex())) {
|
// If column stats index is enabled but columns not configured then we assume that
|
||||||
// If column stats index is enabled but columns not configured then we assume that all columns should be indexed
|
// all columns should be indexed
|
||||||
|
if (config.getColumnsEnabledForColumnStatsIndex().isEmpty()) {
|
||||||
fieldsToIndex = writeSchemaWithMetaFields.getFields();
|
fieldsToIndex = writeSchemaWithMetaFields.getFields();
|
||||||
} else {
|
} else {
|
||||||
Set<String> columnsToIndex = Stream.of(config.getColumnsEnabledForColumnStatsIndex().split(","))
|
Set<String> columnsToIndexSet = new HashSet<>(config.getColumnsEnabledForColumnStatsIndex());
|
||||||
.map(String::trim).filter(s -> !s.isEmpty()).collect(Collectors.toSet());
|
|
||||||
|
|
||||||
fieldsToIndex = writeSchemaWithMetaFields.getFields().stream()
|
fieldsToIndex = writeSchemaWithMetaFields.getFields().stream()
|
||||||
.filter(field -> columnsToIndex.contains(field.name())).collect(Collectors.toList());
|
.filter(field -> columnsToIndexSet.contains(field.name()))
|
||||||
|
.collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
Map<String, HoodieColumnRangeMetadata<Comparable>> columnRangesMetadataMap =
|
Map<String, HoodieColumnRangeMetadata<Comparable>> columnRangesMetadataMap =
|
||||||
|
|||||||
@@ -18,6 +18,11 @@
|
|||||||
|
|
||||||
package org.apache.hudi.metadata;
|
package org.apache.hudi.metadata;
|
||||||
|
|
||||||
|
import org.apache.avro.specific.SpecificRecordBase;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hudi.avro.model.HoodieCleanMetadata;
|
import org.apache.hudi.avro.model.HoodieCleanMetadata;
|
||||||
import org.apache.hudi.avro.model.HoodieIndexPartitionInfo;
|
import org.apache.hudi.avro.model.HoodieIndexPartitionInfo;
|
||||||
import org.apache.hudi.avro.model.HoodieInstantInfo;
|
import org.apache.hudi.avro.model.HoodieInstantInfo;
|
||||||
@@ -55,7 +60,6 @@ import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
|
|||||||
import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
|
import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
|
||||||
import org.apache.hudi.common.util.HoodieTimer;
|
import org.apache.hudi.common.util.HoodieTimer;
|
||||||
import org.apache.hudi.common.util.Option;
|
import org.apache.hudi.common.util.Option;
|
||||||
import org.apache.hudi.common.util.StringUtils;
|
|
||||||
import org.apache.hudi.common.util.ValidationUtils;
|
import org.apache.hudi.common.util.ValidationUtils;
|
||||||
import org.apache.hudi.common.util.collection.Pair;
|
import org.apache.hudi.common.util.collection.Pair;
|
||||||
import org.apache.hudi.config.HoodieCompactionConfig;
|
import org.apache.hudi.config.HoodieCompactionConfig;
|
||||||
@@ -66,12 +70,6 @@ import org.apache.hudi.config.metrics.HoodieMetricsJmxConfig;
|
|||||||
import org.apache.hudi.exception.HoodieException;
|
import org.apache.hudi.exception.HoodieException;
|
||||||
import org.apache.hudi.exception.HoodieIndexException;
|
import org.apache.hudi.exception.HoodieIndexException;
|
||||||
import org.apache.hudi.exception.HoodieMetadataException;
|
import org.apache.hudi.exception.HoodieMetadataException;
|
||||||
|
|
||||||
import org.apache.avro.specific.SpecificRecordBase;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.log4j.LogManager;
|
import org.apache.log4j.LogManager;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
@@ -729,12 +727,14 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
|
|||||||
|
|
||||||
private MetadataRecordsGenerationParams getRecordsGenerationParams() {
|
private MetadataRecordsGenerationParams getRecordsGenerationParams() {
|
||||||
return new MetadataRecordsGenerationParams(
|
return new MetadataRecordsGenerationParams(
|
||||||
dataMetaClient, enabledPartitionTypes, dataWriteConfig.getBloomFilterType(),
|
dataMetaClient,
|
||||||
|
enabledPartitionTypes,
|
||||||
|
dataWriteConfig.getBloomFilterType(),
|
||||||
dataWriteConfig.getMetadataBloomFilterIndexParallelism(),
|
dataWriteConfig.getMetadataBloomFilterIndexParallelism(),
|
||||||
dataWriteConfig.isMetadataColumnStatsIndexEnabled(),
|
dataWriteConfig.isMetadataColumnStatsIndexEnabled(),
|
||||||
dataWriteConfig.getColumnStatsIndexParallelism(),
|
dataWriteConfig.getColumnStatsIndexParallelism(),
|
||||||
StringUtils.toList(dataWriteConfig.getColumnsEnabledForColumnStatsIndex()),
|
dataWriteConfig.getColumnsEnabledForColumnStatsIndex(),
|
||||||
StringUtils.toList(dataWriteConfig.getColumnsEnabledForBloomFilterIndex()));
|
dataWriteConfig.getColumnsEnabledForBloomFilterIndex());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -1021,6 +1021,7 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
|
|||||||
})
|
})
|
||||||
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
|
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
|
||||||
|
|
||||||
|
int totalDataFilesCount = partitionToFilesMap.values().stream().mapToInt(Map::size).sum();
|
||||||
List<String> partitions = new ArrayList<>(partitionToFilesMap.keySet());
|
List<String> partitions = new ArrayList<>(partitionToFilesMap.keySet());
|
||||||
|
|
||||||
if (partitionTypes.contains(MetadataPartitionType.FILES)) {
|
if (partitionTypes.contains(MetadataPartitionType.FILES)) {
|
||||||
@@ -1031,19 +1032,19 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
|
|||||||
partitionToRecordsMap.put(MetadataPartitionType.FILES, filesPartitionRecords);
|
partitionToRecordsMap.put(MetadataPartitionType.FILES, filesPartitionRecords);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (partitionTypes.contains(MetadataPartitionType.BLOOM_FILTERS)) {
|
if (partitionTypes.contains(MetadataPartitionType.BLOOM_FILTERS) && totalDataFilesCount > 0) {
|
||||||
final HoodieData<HoodieRecord> recordsRDD = HoodieTableMetadataUtil.convertFilesToBloomFilterRecords(
|
final HoodieData<HoodieRecord> recordsRDD = HoodieTableMetadataUtil.convertFilesToBloomFilterRecords(
|
||||||
engineContext, Collections.emptyMap(), partitionToFilesMap, getRecordsGenerationParams(), createInstantTime);
|
engineContext, Collections.emptyMap(), partitionToFilesMap, getRecordsGenerationParams(), createInstantTime);
|
||||||
partitionToRecordsMap.put(MetadataPartitionType.BLOOM_FILTERS, recordsRDD);
|
partitionToRecordsMap.put(MetadataPartitionType.BLOOM_FILTERS, recordsRDD);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (partitionTypes.contains(MetadataPartitionType.COLUMN_STATS)) {
|
if (partitionTypes.contains(MetadataPartitionType.COLUMN_STATS) && totalDataFilesCount > 0) {
|
||||||
final HoodieData<HoodieRecord> recordsRDD = HoodieTableMetadataUtil.convertFilesToColumnStatsRecords(
|
final HoodieData<HoodieRecord> recordsRDD = HoodieTableMetadataUtil.convertFilesToColumnStatsRecords(
|
||||||
engineContext, Collections.emptyMap(), partitionToFilesMap, getRecordsGenerationParams());
|
engineContext, Collections.emptyMap(), partitionToFilesMap, getRecordsGenerationParams());
|
||||||
partitionToRecordsMap.put(MetadataPartitionType.COLUMN_STATS, recordsRDD);
|
partitionToRecordsMap.put(MetadataPartitionType.COLUMN_STATS, recordsRDD);
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG.info("Committing " + partitions.size() + " partitions and " + partitionToFilesMap.values().size() + " files to metadata");
|
LOG.info("Committing " + partitions.size() + " partitions and " + totalDataFilesCount + " files to metadata");
|
||||||
|
|
||||||
commit(createInstantTime, partitionToRecordsMap, false);
|
commit(createInstantTime, partitionToRecordsMap, false);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -18,6 +18,11 @@
|
|||||||
|
|
||||||
package org.apache.hudi.table;
|
package org.apache.hudi.table;
|
||||||
|
|
||||||
|
import org.apache.avro.Schema;
|
||||||
|
import org.apache.avro.specific.SpecificRecordBase;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hudi.avro.HoodieAvroUtils;
|
import org.apache.hudi.avro.HoodieAvroUtils;
|
||||||
import org.apache.hudi.avro.model.HoodieCleanMetadata;
|
import org.apache.hudi.avro.model.HoodieCleanMetadata;
|
||||||
import org.apache.hudi.avro.model.HoodieCleanerPlan;
|
import org.apache.hudi.avro.model.HoodieCleanerPlan;
|
||||||
@@ -60,7 +65,6 @@ import org.apache.hudi.common.table.view.TableFileSystemView.BaseFileOnlyView;
|
|||||||
import org.apache.hudi.common.table.view.TableFileSystemView.SliceView;
|
import org.apache.hudi.common.table.view.TableFileSystemView.SliceView;
|
||||||
import org.apache.hudi.common.util.Functions;
|
import org.apache.hudi.common.util.Functions;
|
||||||
import org.apache.hudi.common.util.Option;
|
import org.apache.hudi.common.util.Option;
|
||||||
import org.apache.hudi.common.util.StringUtils;
|
|
||||||
import org.apache.hudi.common.util.collection.Pair;
|
import org.apache.hudi.common.util.collection.Pair;
|
||||||
import org.apache.hudi.config.HoodieWriteConfig;
|
import org.apache.hudi.config.HoodieWriteConfig;
|
||||||
import org.apache.hudi.exception.HoodieException;
|
import org.apache.hudi.exception.HoodieException;
|
||||||
@@ -78,12 +82,6 @@ import org.apache.hudi.table.marker.WriteMarkers;
|
|||||||
import org.apache.hudi.table.marker.WriteMarkersFactory;
|
import org.apache.hudi.table.marker.WriteMarkersFactory;
|
||||||
import org.apache.hudi.table.storage.HoodieLayoutFactory;
|
import org.apache.hudi.table.storage.HoodieLayoutFactory;
|
||||||
import org.apache.hudi.table.storage.HoodieStorageLayout;
|
import org.apache.hudi.table.storage.HoodieStorageLayout;
|
||||||
|
|
||||||
import org.apache.avro.Schema;
|
|
||||||
import org.apache.avro.specific.SpecificRecordBase;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.log4j.LogManager;
|
import org.apache.log4j.LogManager;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
@@ -888,7 +886,7 @@ public abstract class HoodieTable<T extends HoodieRecordPayload, I, K, O> implem
|
|||||||
return !HoodieTableMetadata.isMetadataTable(metaClient.getBasePath())
|
return !HoodieTableMetadata.isMetadataTable(metaClient.getBasePath())
|
||||||
&& !config.isMetadataTableEnabled()
|
&& !config.isMetadataTableEnabled()
|
||||||
&& (!metaClient.getTableConfig().contains(TABLE_METADATA_PARTITIONS)
|
&& (!metaClient.getTableConfig().contains(TABLE_METADATA_PARTITIONS)
|
||||||
|| !StringUtils.isNullOrEmpty(metaClient.getTableConfig().getMetadataPartitions()));
|
|| !metaClient.getTableConfig().getMetadataPartitions().isEmpty());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -18,6 +18,14 @@
|
|||||||
|
|
||||||
package org.apache.hudi.client.functional;
|
package org.apache.hudi.client.functional;
|
||||||
|
|
||||||
|
import org.apache.avro.Schema;
|
||||||
|
import org.apache.avro.generic.GenericRecord;
|
||||||
|
import org.apache.avro.generic.IndexedRecord;
|
||||||
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
|
||||||
|
import org.apache.hadoop.util.Time;
|
||||||
import org.apache.hudi.avro.HoodieAvroUtils;
|
import org.apache.hudi.avro.HoodieAvroUtils;
|
||||||
import org.apache.hudi.avro.model.HoodieCleanMetadata;
|
import org.apache.hudi.avro.model.HoodieCleanMetadata;
|
||||||
import org.apache.hudi.avro.model.HoodieMetadataColumnStats;
|
import org.apache.hudi.avro.model.HoodieMetadataColumnStats;
|
||||||
@@ -70,7 +78,6 @@ import org.apache.hudi.common.testutils.HoodieTestTable;
|
|||||||
import org.apache.hudi.common.util.ClosableIterator;
|
import org.apache.hudi.common.util.ClosableIterator;
|
||||||
import org.apache.hudi.common.util.HoodieTimer;
|
import org.apache.hudi.common.util.HoodieTimer;
|
||||||
import org.apache.hudi.common.util.Option;
|
import org.apache.hudi.common.util.Option;
|
||||||
import org.apache.hudi.common.util.StringUtils;
|
|
||||||
import org.apache.hudi.common.util.collection.ExternalSpillableMap;
|
import org.apache.hudi.common.util.collection.ExternalSpillableMap;
|
||||||
import org.apache.hudi.common.util.hash.ColumnIndexID;
|
import org.apache.hudi.common.util.hash.ColumnIndexID;
|
||||||
import org.apache.hudi.common.util.hash.PartitionIndexID;
|
import org.apache.hudi.common.util.hash.PartitionIndexID;
|
||||||
@@ -98,15 +105,6 @@ import org.apache.hudi.table.action.HoodieWriteMetadata;
|
|||||||
import org.apache.hudi.table.upgrade.SparkUpgradeDowngradeHelper;
|
import org.apache.hudi.table.upgrade.SparkUpgradeDowngradeHelper;
|
||||||
import org.apache.hudi.table.upgrade.UpgradeDowngrade;
|
import org.apache.hudi.table.upgrade.UpgradeDowngrade;
|
||||||
import org.apache.hudi.testutils.MetadataMergeWriteStatus;
|
import org.apache.hudi.testutils.MetadataMergeWriteStatus;
|
||||||
|
|
||||||
import org.apache.avro.Schema;
|
|
||||||
import org.apache.avro.generic.GenericRecord;
|
|
||||||
import org.apache.avro.generic.IndexedRecord;
|
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
|
|
||||||
import org.apache.hadoop.util.Time;
|
|
||||||
import org.apache.log4j.LogManager;
|
import org.apache.log4j.LogManager;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.apache.parquet.avro.AvroSchemaConverter;
|
import org.apache.parquet.avro.AvroSchemaConverter;
|
||||||
@@ -362,7 +360,7 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
|
|||||||
|
|
||||||
HoodieTableConfig hoodieTableConfig2 =
|
HoodieTableConfig hoodieTableConfig2 =
|
||||||
new HoodieTableConfig(this.fs, metaClient.getMetaPath(), writeConfig2.getPayloadClass());
|
new HoodieTableConfig(this.fs, metaClient.getMetaPath(), writeConfig2.getPayloadClass());
|
||||||
assertEquals(StringUtils.EMPTY_STRING, hoodieTableConfig2.getMetadataPartitions());
|
assertEquals(Collections.emptyList(), hoodieTableConfig2.getMetadataPartitions());
|
||||||
// Assert metadata table folder is deleted
|
// Assert metadata table folder is deleted
|
||||||
assertFalse(metaClient.getFs().exists(
|
assertFalse(metaClient.getFs().exists(
|
||||||
new Path(HoodieTableMetadata.getMetadataTableBasePath(writeConfig2.getBasePath()))));
|
new Path(HoodieTableMetadata.getMetadataTableBasePath(writeConfig2.getBasePath()))));
|
||||||
@@ -623,7 +621,6 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
|
|||||||
.withMetadataIndexBloomFilterFileGroups(4)
|
.withMetadataIndexBloomFilterFileGroups(4)
|
||||||
.withMetadataIndexColumnStats(true)
|
.withMetadataIndexColumnStats(true)
|
||||||
.withMetadataIndexBloomFilterFileGroups(2)
|
.withMetadataIndexBloomFilterFileGroups(2)
|
||||||
.withMetadataIndexForAllColumns(true)
|
|
||||||
.build())
|
.build())
|
||||||
.build();
|
.build();
|
||||||
init(tableType, writeConfig);
|
init(tableType, writeConfig);
|
||||||
|
|||||||
@@ -38,6 +38,8 @@ public class HoodieConfig implements Serializable {
|
|||||||
|
|
||||||
private static final Logger LOG = LogManager.getLogger(HoodieConfig.class);
|
private static final Logger LOG = LogManager.getLogger(HoodieConfig.class);
|
||||||
|
|
||||||
|
protected static final String CONFIG_VALUES_DELIMITER = ",";
|
||||||
|
|
||||||
public static HoodieConfig create(FSDataInputStream inputStream) throws IOException {
|
public static HoodieConfig create(FSDataInputStream inputStream) throws IOException {
|
||||||
HoodieConfig config = new HoodieConfig();
|
HoodieConfig config = new HoodieConfig();
|
||||||
config.props.load(inputStream);
|
config.props.load(inputStream);
|
||||||
|
|||||||
@@ -19,6 +19,7 @@
|
|||||||
package org.apache.hudi.common.config;
|
package org.apache.hudi.common.config;
|
||||||
|
|
||||||
import org.apache.hudi.common.engine.EngineType;
|
import org.apache.hudi.common.engine.EngineType;
|
||||||
|
import org.apache.hudi.common.util.StringUtils;
|
||||||
import org.apache.hudi.exception.HoodieNotSupportedException;
|
import org.apache.hudi.exception.HoodieNotSupportedException;
|
||||||
|
|
||||||
import javax.annotation.concurrent.Immutable;
|
import javax.annotation.concurrent.Immutable;
|
||||||
@@ -26,6 +27,7 @@ import javax.annotation.concurrent.Immutable;
|
|||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileReader;
|
import java.io.FileReader;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -173,15 +175,6 @@ public final class HoodieMetadataConfig extends HoodieConfig {
|
|||||||
+ "log files and read parallelism in the column stats index partition. The recommendation is to size the "
|
+ "log files and read parallelism in the column stats index partition. The recommendation is to size the "
|
||||||
+ "file group count such that the base files are under 1GB.");
|
+ "file group count such that the base files are under 1GB.");
|
||||||
|
|
||||||
public static final ConfigProperty<Boolean> ENABLE_METADATA_INDEX_COLUMN_STATS_FOR_ALL_COLUMNS = ConfigProperty
|
|
||||||
.key(METADATA_PREFIX + ".index.column.stats.all_columns.enable")
|
|
||||||
.defaultValue(true)
|
|
||||||
.sinceVersion("0.11.0")
|
|
||||||
.withDocumentation("Enable indexing column ranges of user data files for all columns under "
|
|
||||||
+ "metadata table key lookups. When enabled, metadata table will have a partition to "
|
|
||||||
+ "store the column ranges and will be used for pruning files during the index lookups. "
|
|
||||||
+ "Only applies if " + ENABLE_METADATA_INDEX_COLUMN_STATS.key() + " is enabled.");
|
|
||||||
|
|
||||||
public static final ConfigProperty<Integer> COLUMN_STATS_INDEX_PARALLELISM = ConfigProperty
|
public static final ConfigProperty<Integer> COLUMN_STATS_INDEX_PARALLELISM = ConfigProperty
|
||||||
.key(METADATA_PREFIX + ".index.column.stats.parallelism")
|
.key(METADATA_PREFIX + ".index.column.stats.parallelism")
|
||||||
.defaultValue(10)
|
.defaultValue(10)
|
||||||
@@ -249,16 +242,12 @@ public final class HoodieMetadataConfig extends HoodieConfig {
|
|||||||
return getBooleanOrDefault(ENABLE_METADATA_INDEX_COLUMN_STATS);
|
return getBooleanOrDefault(ENABLE_METADATA_INDEX_COLUMN_STATS);
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isMetadataColumnStatsIndexForAllColumnsEnabled() {
|
public List<String> getColumnsEnabledForColumnStatsIndex() {
|
||||||
return getBooleanOrDefault(ENABLE_METADATA_INDEX_COLUMN_STATS_FOR_ALL_COLUMNS);
|
return StringUtils.split(getString(COLUMN_STATS_INDEX_FOR_COLUMNS), CONFIG_VALUES_DELIMITER);
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getColumnsEnabledForColumnStatsIndex() {
|
public List<String> getColumnsEnabledForBloomFilterIndex() {
|
||||||
return getString(COLUMN_STATS_INDEX_FOR_COLUMNS);
|
return StringUtils.split(getString(BLOOM_FILTER_INDEX_FOR_COLUMNS), CONFIG_VALUES_DELIMITER);
|
||||||
}
|
|
||||||
|
|
||||||
public String getColumnsEnabledForBloomFilterIndex() {
|
|
||||||
return getString(BLOOM_FILTER_INDEX_FOR_COLUMNS);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getBloomFilterIndexFileGroupCount() {
|
public int getBloomFilterIndexFileGroupCount() {
|
||||||
@@ -353,11 +342,6 @@ public final class HoodieMetadataConfig extends HoodieConfig {
|
|||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Builder withMetadataIndexForAllColumns(boolean enable) {
|
|
||||||
metadataConfig.setValue(ENABLE_METADATA_INDEX_COLUMN_STATS_FOR_ALL_COLUMNS, String.valueOf(enable));
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Builder withColumnStatsIndexForColumns(String columns) {
|
public Builder withColumnStatsIndexForColumns(String columns) {
|
||||||
metadataConfig.setValue(COLUMN_STATS_INDEX_FOR_COLUMNS, columns);
|
metadataConfig.setValue(COLUMN_STATS_INDEX_FOR_COLUMNS, columns);
|
||||||
return this;
|
return this;
|
||||||
|
|||||||
@@ -618,12 +618,18 @@ public class HoodieTableConfig extends HoodieConfig {
|
|||||||
return getLong(TABLE_CHECKSUM);
|
return getLong(TABLE_CHECKSUM);
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getMetadataPartitionsInflight() {
|
public List<String> getMetadataPartitionsInflight() {
|
||||||
return getStringOrDefault(TABLE_METADATA_PARTITIONS_INFLIGHT, StringUtils.EMPTY_STRING);
|
return StringUtils.split(
|
||||||
|
getStringOrDefault(TABLE_METADATA_PARTITIONS_INFLIGHT, StringUtils.EMPTY_STRING),
|
||||||
|
CONFIG_VALUES_DELIMITER
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getMetadataPartitions() {
|
public List<String> getMetadataPartitions() {
|
||||||
return getStringOrDefault(TABLE_METADATA_PARTITIONS, StringUtils.EMPTY_STRING);
|
return StringUtils.split(
|
||||||
|
getStringOrDefault(TABLE_METADATA_PARTITIONS, StringUtils.EMPTY_STRING),
|
||||||
|
CONFIG_VALUES_DELIMITER
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -19,12 +19,8 @@
|
|||||||
package org.apache.hudi.common.util;
|
package org.apache.hudi.common.util;
|
||||||
|
|
||||||
import javax.annotation.Nullable;
|
import javax.annotation.Nullable;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
|
||||||
import java.util.function.Function;
|
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
@@ -34,10 +30,6 @@ import java.util.stream.Stream;
|
|||||||
public class StringUtils {
|
public class StringUtils {
|
||||||
|
|
||||||
public static final String EMPTY_STRING = "";
|
public static final String EMPTY_STRING = "";
|
||||||
private static final Function<String, Set<String>> STRING_TO_SET = (str) -> Stream.of(str.split(","))
|
|
||||||
.map(String::trim).filter(s -> !s.isEmpty()).collect(Collectors.toSet());
|
|
||||||
private static final Function<String, List<String>> STRING_TO_LIST = (str) -> Stream.of(str.split(","))
|
|
||||||
.map(String::trim).filter(s -> !s.isEmpty()).collect(Collectors.toList());
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>
|
* <p>
|
||||||
@@ -114,22 +106,13 @@ public class StringUtils {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Converts the input string, delimited by comma, to a set of strings.
|
* Splits input string, delimited {@code delimiter} into a list of non-empty strings
|
||||||
*
|
* (skipping any empty string produced during splitting)
|
||||||
* @param input
|
|
||||||
* @return
|
|
||||||
*/
|
*/
|
||||||
public static Set<String> toSet(@Nullable String input) {
|
public static List<String> split(@Nullable String input, String delimiter) {
|
||||||
return isNullOrEmpty(input) ? new HashSet<>() : STRING_TO_SET.apply(input);
|
if (isNullOrEmpty(input)) {
|
||||||
}
|
return Collections.emptyList();
|
||||||
|
}
|
||||||
/**
|
return Stream.of(input.split(delimiter)).map(String::trim).filter(s -> !s.isEmpty()).collect(Collectors.toList());
|
||||||
* Converts the input string, delimited by comma, to a list of strings.
|
|
||||||
*
|
|
||||||
* @param input
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
public static List<String> toList(@Nullable String input) {
|
|
||||||
return isNullOrEmpty(input) ? new ArrayList<>() : STRING_TO_LIST.apply(input);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -42,7 +42,7 @@ import org.apache.hudi.common.util.Option;
|
|||||||
import org.apache.hudi.common.util.io.ByteBufferBackedInputStream;
|
import org.apache.hudi.common.util.io.ByteBufferBackedInputStream;
|
||||||
import org.apache.hudi.exception.HoodieException;
|
import org.apache.hudi.exception.HoodieException;
|
||||||
import org.apache.hudi.exception.HoodieIOException;
|
import org.apache.hudi.exception.HoodieIOException;
|
||||||
import org.apache.hudi.util.LazyRef;
|
import org.apache.hudi.util.Lazy;
|
||||||
import org.apache.log4j.LogManager;
|
import org.apache.log4j.LogManager;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
@@ -79,7 +79,7 @@ public class HoodieHFileReader<R extends IndexedRecord> implements HoodieFileRea
|
|||||||
|
|
||||||
private final Path path;
|
private final Path path;
|
||||||
|
|
||||||
private final LazyRef<Schema> schema;
|
private final Lazy<Schema> schema;
|
||||||
|
|
||||||
// NOTE: Reader is ONLY THREAD-SAFE for {@code Scanner} operating in Positional Read ("pread")
|
// NOTE: Reader is ONLY THREAD-SAFE for {@code Scanner} operating in Positional Read ("pread")
|
||||||
// mode (ie created w/ "pread = true")
|
// mode (ie created w/ "pread = true")
|
||||||
@@ -110,8 +110,8 @@ public class HoodieHFileReader<R extends IndexedRecord> implements HoodieFileRea
|
|||||||
// For shared scanner, which is primarily used for point-lookups, we're caching blocks
|
// For shared scanner, which is primarily used for point-lookups, we're caching blocks
|
||||||
// by default, to minimize amount of traffic to the underlying storage
|
// by default, to minimize amount of traffic to the underlying storage
|
||||||
this.sharedScanner = getHFileScanner(reader, true);
|
this.sharedScanner = getHFileScanner(reader, true);
|
||||||
this.schema = schemaOpt.map(LazyRef::eager)
|
this.schema = schemaOpt.map(Lazy::eagerly)
|
||||||
.orElseGet(() -> LazyRef.lazy(() -> fetchSchema(reader)));
|
.orElseGet(() -> Lazy.lazily(() -> fetchSchema(reader)));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|||||||
@@ -49,7 +49,6 @@ import org.apache.hudi.common.util.CollectionUtils;
|
|||||||
import org.apache.hudi.common.util.Option;
|
import org.apache.hudi.common.util.Option;
|
||||||
import org.apache.hudi.common.util.ParquetUtils;
|
import org.apache.hudi.common.util.ParquetUtils;
|
||||||
import org.apache.hudi.common.util.StringUtils;
|
import org.apache.hudi.common.util.StringUtils;
|
||||||
import org.apache.hudi.common.util.ValidationUtils;
|
|
||||||
import org.apache.hudi.common.util.collection.Pair;
|
import org.apache.hudi.common.util.collection.Pair;
|
||||||
import org.apache.hudi.exception.HoodieException;
|
import org.apache.hudi.exception.HoodieException;
|
||||||
import org.apache.hudi.exception.HoodieIOException;
|
import org.apache.hudi.exception.HoodieIOException;
|
||||||
@@ -64,6 +63,7 @@ import org.apache.avro.generic.GenericRecord;
|
|||||||
import org.apache.avro.generic.IndexedRecord;
|
import org.apache.avro.generic.IndexedRecord;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hudi.util.Lazy;
|
||||||
import org.apache.log4j.LogManager;
|
import org.apache.log4j.LogManager;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
@@ -74,11 +74,11 @@ import java.math.BigDecimal;
|
|||||||
import java.math.RoundingMode;
|
import java.math.RoundingMode;
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
@@ -94,6 +94,7 @@ import static org.apache.hudi.avro.HoodieAvroUtils.convertValueForSpecificDataTy
|
|||||||
import static org.apache.hudi.avro.HoodieAvroUtils.getNestedFieldSchemaFromWriteSchema;
|
import static org.apache.hudi.avro.HoodieAvroUtils.getNestedFieldSchemaFromWriteSchema;
|
||||||
import static org.apache.hudi.avro.HoodieAvroUtils.resolveNullableSchema;
|
import static org.apache.hudi.avro.HoodieAvroUtils.resolveNullableSchema;
|
||||||
import static org.apache.hudi.common.util.StringUtils.isNullOrEmpty;
|
import static org.apache.hudi.common.util.StringUtils.isNullOrEmpty;
|
||||||
|
import static org.apache.hudi.common.util.ValidationUtils.checkState;
|
||||||
import static org.apache.hudi.metadata.HoodieMetadataPayload.unwrapStatisticValueWrapper;
|
import static org.apache.hudi.metadata.HoodieMetadataPayload.unwrapStatisticValueWrapper;
|
||||||
import static org.apache.hudi.metadata.HoodieTableMetadata.EMPTY_PARTITION_NAME;
|
import static org.apache.hudi.metadata.HoodieTableMetadata.EMPTY_PARTITION_NAME;
|
||||||
import static org.apache.hudi.metadata.HoodieTableMetadata.NON_PARTITIONED_NAME;
|
import static org.apache.hudi.metadata.HoodieTableMetadata.NON_PARTITIONED_NAME;
|
||||||
@@ -445,21 +446,24 @@ public class HoodieTableMetadataUtil {
|
|||||||
/**
|
/**
|
||||||
* Convert the clean action to metadata records.
|
* Convert the clean action to metadata records.
|
||||||
*/
|
*/
|
||||||
public static Map<MetadataPartitionType, HoodieData<HoodieRecord>> convertMetadataToRecords(
|
public static Map<MetadataPartitionType, HoodieData<HoodieRecord>> convertMetadataToRecords(HoodieEngineContext engineContext,
|
||||||
HoodieEngineContext engineContext, HoodieCleanMetadata cleanMetadata,
|
HoodieCleanMetadata cleanMetadata,
|
||||||
MetadataRecordsGenerationParams recordsGenerationParams, String instantTime) {
|
MetadataRecordsGenerationParams recordsGenerationParams,
|
||||||
|
String instantTime) {
|
||||||
final Map<MetadataPartitionType, HoodieData<HoodieRecord>> partitionToRecordsMap = new HashMap<>();
|
final Map<MetadataPartitionType, HoodieData<HoodieRecord>> partitionToRecordsMap = new HashMap<>();
|
||||||
final HoodieData<HoodieRecord> filesPartitionRecordsRDD = engineContext.parallelize(
|
final HoodieData<HoodieRecord> filesPartitionRecordsRDD = engineContext.parallelize(
|
||||||
convertMetadataToFilesPartitionRecords(cleanMetadata, instantTime), 1);
|
convertMetadataToFilesPartitionRecords(cleanMetadata, instantTime), 1);
|
||||||
partitionToRecordsMap.put(MetadataPartitionType.FILES, filesPartitionRecordsRDD);
|
partitionToRecordsMap.put(MetadataPartitionType.FILES, filesPartitionRecordsRDD);
|
||||||
|
|
||||||
if (recordsGenerationParams.getEnabledPartitionTypes().contains(MetadataPartitionType.BLOOM_FILTERS)) {
|
if (recordsGenerationParams.getEnabledPartitionTypes().contains(MetadataPartitionType.BLOOM_FILTERS)) {
|
||||||
final HoodieData<HoodieRecord> metadataBloomFilterRecordsRDD = convertMetadataToBloomFilterRecords(cleanMetadata, engineContext, instantTime, recordsGenerationParams);
|
final HoodieData<HoodieRecord> metadataBloomFilterRecordsRDD =
|
||||||
|
convertMetadataToBloomFilterRecords(cleanMetadata, engineContext, instantTime, recordsGenerationParams);
|
||||||
partitionToRecordsMap.put(MetadataPartitionType.BLOOM_FILTERS, metadataBloomFilterRecordsRDD);
|
partitionToRecordsMap.put(MetadataPartitionType.BLOOM_FILTERS, metadataBloomFilterRecordsRDD);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (recordsGenerationParams.getEnabledPartitionTypes().contains(MetadataPartitionType.COLUMN_STATS)) {
|
if (recordsGenerationParams.getEnabledPartitionTypes().contains(MetadataPartitionType.COLUMN_STATS)) {
|
||||||
final HoodieData<HoodieRecord> metadataColumnStatsRDD = convertMetadataToColumnStatsRecords(cleanMetadata, engineContext, recordsGenerationParams);
|
final HoodieData<HoodieRecord> metadataColumnStatsRDD =
|
||||||
|
convertMetadataToColumnStatsRecords(cleanMetadata, engineContext, recordsGenerationParams);
|
||||||
partitionToRecordsMap.put(MetadataPartitionType.COLUMN_STATS, metadataColumnStatsRDD);
|
partitionToRecordsMap.put(MetadataPartitionType.COLUMN_STATS, metadataColumnStatsRDD);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -554,8 +558,9 @@ public class HoodieTableMetadataUtil {
|
|||||||
|
|
||||||
HoodieTableMetaClient dataTableMetaClient = recordsGenerationParams.getDataMetaClient();
|
HoodieTableMetaClient dataTableMetaClient = recordsGenerationParams.getDataMetaClient();
|
||||||
|
|
||||||
List<String> columnsToIndex = getColumnsToIndex(recordsGenerationParams,
|
List<String> columnsToIndex =
|
||||||
dataTableMetaClient.getTableConfig(), tryResolveSchemaForTable(dataTableMetaClient));
|
getColumnsToIndex(recordsGenerationParams,
|
||||||
|
Lazy.lazily(() -> tryResolveSchemaForTable(dataTableMetaClient)));
|
||||||
|
|
||||||
if (columnsToIndex.isEmpty()) {
|
if (columnsToIndex.isEmpty()) {
|
||||||
// In case there are no columns to index, bail
|
// In case there are no columns to index, bail
|
||||||
@@ -597,7 +602,8 @@ public class HoodieTableMetadataUtil {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (recordsGenerationParams.getEnabledPartitionTypes().contains(MetadataPartitionType.COLUMN_STATS)) {
|
if (recordsGenerationParams.getEnabledPartitionTypes().contains(MetadataPartitionType.COLUMN_STATS)) {
|
||||||
final HoodieData<HoodieRecord> metadataColumnStatsRDD = convertFilesToColumnStatsRecords(engineContext, partitionToDeletedFiles, partitionToAppendedFiles, recordsGenerationParams);
|
final HoodieData<HoodieRecord> metadataColumnStatsRDD =
|
||||||
|
convertFilesToColumnStatsRecords(engineContext, partitionToDeletedFiles, partitionToAppendedFiles, recordsGenerationParams);
|
||||||
partitionToRecordsMap.put(MetadataPartitionType.COLUMN_STATS, metadataColumnStatsRDD);
|
partitionToRecordsMap.put(MetadataPartitionType.COLUMN_STATS, metadataColumnStatsRDD);
|
||||||
}
|
}
|
||||||
return partitionToRecordsMap;
|
return partitionToRecordsMap;
|
||||||
@@ -803,7 +809,7 @@ public class HoodieTableMetadataUtil {
|
|||||||
fileChangeCount[1] += appendedFileMap.size();
|
fileChangeCount[1] += appendedFileMap.size();
|
||||||
|
|
||||||
// Validate that no appended file has been deleted
|
// Validate that no appended file has been deleted
|
||||||
ValidationUtils.checkState(
|
checkState(
|
||||||
!appendedFileMap.keySet().removeAll(partitionToDeletedFiles.getOrDefault(partition, Collections.emptyList())),
|
!appendedFileMap.keySet().removeAll(partitionToDeletedFiles.getOrDefault(partition, Collections.emptyList())),
|
||||||
"Rollback file cannot both be appended and deleted");
|
"Rollback file cannot both be appended and deleted");
|
||||||
|
|
||||||
@@ -904,39 +910,47 @@ public class HoodieTableMetadataUtil {
|
|||||||
HoodieData<HoodieRecord> allRecordsRDD = engineContext.emptyHoodieData();
|
HoodieData<HoodieRecord> allRecordsRDD = engineContext.emptyHoodieData();
|
||||||
HoodieTableMetaClient dataTableMetaClient = recordsGenerationParams.getDataMetaClient();
|
HoodieTableMetaClient dataTableMetaClient = recordsGenerationParams.getDataMetaClient();
|
||||||
|
|
||||||
final List<String> columnsToIndex = getColumnsToIndex(recordsGenerationParams,
|
final List<String> columnsToIndex =
|
||||||
dataTableMetaClient.getTableConfig(), tryResolveSchemaForTable(dataTableMetaClient));
|
getColumnsToIndex(recordsGenerationParams,
|
||||||
|
Lazy.lazily(() -> tryResolveSchemaForTable(dataTableMetaClient)));
|
||||||
|
|
||||||
if (columnsToIndex.isEmpty()) {
|
if (columnsToIndex.isEmpty()) {
|
||||||
// In case there are no columns to index, bail
|
// In case there are no columns to index, bail
|
||||||
return engineContext.emptyHoodieData();
|
return engineContext.emptyHoodieData();
|
||||||
}
|
}
|
||||||
|
|
||||||
final List<Pair<String, List<String>>> partitionToDeletedFilesList = partitionToDeletedFiles.entrySet()
|
final List<Pair<String, List<String>>> partitionToDeletedFilesList = partitionToDeletedFiles.entrySet().stream()
|
||||||
.stream().map(e -> Pair.of(e.getKey(), e.getValue())).collect(Collectors.toList());
|
.map(e -> Pair.of(e.getKey(), e.getValue()))
|
||||||
int parallelism = Math.max(Math.min(partitionToDeletedFilesList.size(), recordsGenerationParams.getColumnStatsIndexParallelism()), 1);
|
.collect(Collectors.toList());
|
||||||
final HoodieData<Pair<String, List<String>>> partitionToDeletedFilesRDD = engineContext.parallelize(partitionToDeletedFilesList, parallelism);
|
|
||||||
|
int deletedFilesTargetParallelism = Math.max(Math.min(partitionToDeletedFilesList.size(), recordsGenerationParams.getColumnStatsIndexParallelism()), 1);
|
||||||
|
final HoodieData<Pair<String, List<String>>> partitionToDeletedFilesRDD =
|
||||||
|
engineContext.parallelize(partitionToDeletedFilesList, deletedFilesTargetParallelism);
|
||||||
|
|
||||||
HoodieData<HoodieRecord> deletedFilesRecordsRDD = partitionToDeletedFilesRDD.flatMap(partitionToDeletedFilesPair -> {
|
HoodieData<HoodieRecord> deletedFilesRecordsRDD = partitionToDeletedFilesRDD.flatMap(partitionToDeletedFilesPair -> {
|
||||||
final String partitionName = partitionToDeletedFilesPair.getLeft();
|
final String partitionPath = partitionToDeletedFilesPair.getLeft();
|
||||||
final String partition = getPartitionIdentifier(partitionName);
|
final String partitionId = getPartitionIdentifier(partitionPath);
|
||||||
final List<String> deletedFileList = partitionToDeletedFilesPair.getRight();
|
final List<String> deletedFileList = partitionToDeletedFilesPair.getRight();
|
||||||
|
|
||||||
return deletedFileList.stream().flatMap(deletedFile -> {
|
return deletedFileList.stream().flatMap(deletedFile -> {
|
||||||
final String filePathWithPartition = partitionName + "/" + deletedFile;
|
final String filePathWithPartition = partitionPath + "/" + deletedFile;
|
||||||
return getColumnStatsRecords(partition, filePathWithPartition, dataTableMetaClient, columnsToIndex, true);
|
return getColumnStatsRecords(partitionId, filePathWithPartition, dataTableMetaClient, columnsToIndex, true);
|
||||||
}).iterator();
|
}).iterator();
|
||||||
});
|
});
|
||||||
|
|
||||||
allRecordsRDD = allRecordsRDD.union(deletedFilesRecordsRDD);
|
allRecordsRDD = allRecordsRDD.union(deletedFilesRecordsRDD);
|
||||||
|
|
||||||
final List<Pair<String, Map<String, Long>>> partitionToAppendedFilesList = partitionToAppendedFiles.entrySet()
|
final List<Pair<String, Map<String, Long>>> partitionToAppendedFilesList = partitionToAppendedFiles.entrySet().stream()
|
||||||
.stream().map(entry -> Pair.of(entry.getKey(), entry.getValue())).collect(Collectors.toList());
|
.map(entry -> Pair.of(entry.getKey(), entry.getValue()))
|
||||||
parallelism = Math.max(Math.min(partitionToAppendedFilesList.size(), recordsGenerationParams.getColumnStatsIndexParallelism()), 1);
|
.collect(Collectors.toList());
|
||||||
final HoodieData<Pair<String, Map<String, Long>>> partitionToAppendedFilesRDD = engineContext.parallelize(partitionToAppendedFilesList, parallelism);
|
|
||||||
|
int appendedFilesTargetParallelism = Math.max(Math.min(partitionToAppendedFilesList.size(), recordsGenerationParams.getColumnStatsIndexParallelism()), 1);
|
||||||
|
final HoodieData<Pair<String, Map<String, Long>>> partitionToAppendedFilesRDD =
|
||||||
|
engineContext.parallelize(partitionToAppendedFilesList, appendedFilesTargetParallelism);
|
||||||
|
|
||||||
HoodieData<HoodieRecord> appendedFilesRecordsRDD = partitionToAppendedFilesRDD.flatMap(partitionToAppendedFilesPair -> {
|
HoodieData<HoodieRecord> appendedFilesRecordsRDD = partitionToAppendedFilesRDD.flatMap(partitionToAppendedFilesPair -> {
|
||||||
final String partitionName = partitionToAppendedFilesPair.getLeft();
|
final String partitionPath = partitionToAppendedFilesPair.getLeft();
|
||||||
final String partition = getPartitionIdentifier(partitionName);
|
final String partitionId = getPartitionIdentifier(partitionPath);
|
||||||
final Map<String, Long> appendedFileMap = partitionToAppendedFilesPair.getRight();
|
final Map<String, Long> appendedFileMap = partitionToAppendedFilesPair.getRight();
|
||||||
|
|
||||||
return appendedFileMap.entrySet().stream().flatMap(appendedFileNameLengthEntry -> {
|
return appendedFileMap.entrySet().stream().flatMap(appendedFileNameLengthEntry -> {
|
||||||
@@ -944,11 +958,11 @@ public class HoodieTableMetadataUtil {
|
|||||||
|| !appendedFileNameLengthEntry.getKey().endsWith(HoodieFileFormat.PARQUET.getFileExtension())) {
|
|| !appendedFileNameLengthEntry.getKey().endsWith(HoodieFileFormat.PARQUET.getFileExtension())) {
|
||||||
return Stream.empty();
|
return Stream.empty();
|
||||||
}
|
}
|
||||||
final String filePathWithPartition = partitionName + "/" + appendedFileNameLengthEntry.getKey();
|
final String filePathWithPartition = partitionPath + "/" + appendedFileNameLengthEntry.getKey();
|
||||||
return getColumnStatsRecords(partition, filePathWithPartition, dataTableMetaClient, columnsToIndex, false);
|
return getColumnStatsRecords(partitionId, filePathWithPartition, dataTableMetaClient, columnsToIndex, false);
|
||||||
}).iterator();
|
}).iterator();
|
||||||
|
|
||||||
});
|
});
|
||||||
|
|
||||||
allRecordsRDD = allRecordsRDD.union(appendedFilesRecordsRDD);
|
allRecordsRDD = allRecordsRDD.union(appendedFilesRecordsRDD);
|
||||||
|
|
||||||
return allRecordsRDD;
|
return allRecordsRDD;
|
||||||
@@ -1091,7 +1105,7 @@ public class HoodieTableMetadataUtil {
|
|||||||
tableConfig.populateMetaFields() ? addMetadataFields(schema) : schema);
|
tableConfig.populateMetaFields() ? addMetadataFields(schema) : schema);
|
||||||
|
|
||||||
List<String> columnsToIndex = getColumnsToIndex(recordsGenerationParams,
|
List<String> columnsToIndex = getColumnsToIndex(recordsGenerationParams,
|
||||||
tableConfig, tableSchema);
|
Lazy.eagerly(tableSchema));
|
||||||
|
|
||||||
if (columnsToIndex.isEmpty()) {
|
if (columnsToIndex.isEmpty()) {
|
||||||
// In case there are no columns to index, bail
|
// In case there are no columns to index, bail
|
||||||
@@ -1108,19 +1122,24 @@ public class HoodieTableMetadataUtil {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the latest columns for the table for column stats indexing.
|
* Get the list of columns for the table for column stats indexing
|
||||||
*/
|
*/
|
||||||
private static List<String> getColumnsToIndex(MetadataRecordsGenerationParams recordsGenParams,
|
private static List<String> getColumnsToIndex(MetadataRecordsGenerationParams recordsGenParams,
|
||||||
HoodieTableConfig tableConfig,
|
Lazy<Option<Schema>> lazyWriterSchemaOpt) {
|
||||||
Option<Schema> writerSchemaOpt) {
|
checkState(recordsGenParams.isColumnStatsIndexEnabled());
|
||||||
if (recordsGenParams.isAllColumnStatsIndexEnabled() && writerSchemaOpt.isPresent()) {
|
|
||||||
return writerSchemaOpt.get().getFields().stream()
|
List<String> targetColumns = recordsGenParams.getTargetColumnsForColumnStatsIndex();
|
||||||
.map(Schema.Field::name).collect(Collectors.toList());
|
if (!targetColumns.isEmpty()) {
|
||||||
|
return targetColumns;
|
||||||
}
|
}
|
||||||
|
|
||||||
// In case no writer schema could be obtained we fall back to only index primary key
|
Option<Schema> writerSchemaOpt = lazyWriterSchemaOpt.get();
|
||||||
// columns
|
return writerSchemaOpt
|
||||||
return Arrays.asList(tableConfig.getRecordKeyFields().get());
|
.map(writerSchema ->
|
||||||
|
writerSchema.getFields().stream()
|
||||||
|
.map(Schema.Field::name)
|
||||||
|
.collect(Collectors.toList()))
|
||||||
|
.orElse(Collections.emptyList());
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Stream<HoodieRecord> translateWriteStatToColumnStats(HoodieWriteStat writeStat,
|
private static Stream<HoodieRecord> translateWriteStatToColumnStats(HoodieWriteStat writeStat,
|
||||||
@@ -1331,11 +1350,11 @@ public class HoodieTableMetadataUtil {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public static Set<String> getInflightMetadataPartitions(HoodieTableConfig tableConfig) {
|
public static Set<String> getInflightMetadataPartitions(HoodieTableConfig tableConfig) {
|
||||||
return StringUtils.toSet(tableConfig.getMetadataPartitionsInflight());
|
return new HashSet<>(tableConfig.getMetadataPartitionsInflight());
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Set<String> getCompletedMetadataPartitions(HoodieTableConfig tableConfig) {
|
public static Set<String> getCompletedMetadataPartitions(HoodieTableConfig tableConfig) {
|
||||||
return StringUtils.toSet(tableConfig.getMetadataPartitions());
|
return new HashSet<>(tableConfig.getMetadataPartitions());
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Set<String> getInflightAndCompletedMetadataPartitions(HoodieTableConfig tableConfig) {
|
public static Set<String> getInflightAndCompletedMetadataPartitions(HoodieTableConfig tableConfig) {
|
||||||
|
|||||||
@@ -26,28 +26,33 @@ import java.util.List;
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Encapsulates all parameters required to generate metadata index for enabled index types.
|
* Encapsulates all parameters required to generate metadata index for enabled index types.
|
||||||
|
*
|
||||||
|
* @deprecated this component currently duplicates configuration coming from the {@code HoodieWriteConfig}
|
||||||
|
* which is problematic; instead we should break this component down and use source of truth
|
||||||
|
* for each respective data-point directly ({@code HoodieWriteConfig}, {@code HoodieTableMetaClient}, etc)
|
||||||
*/
|
*/
|
||||||
|
@Deprecated
|
||||||
public class MetadataRecordsGenerationParams implements Serializable {
|
public class MetadataRecordsGenerationParams implements Serializable {
|
||||||
|
|
||||||
private final HoodieTableMetaClient dataMetaClient;
|
private final HoodieTableMetaClient dataMetaClient;
|
||||||
private final List<MetadataPartitionType> enabledPartitionTypes;
|
private final List<MetadataPartitionType> enabledPartitionTypes;
|
||||||
private final String bloomFilterType;
|
private final String bloomFilterType;
|
||||||
private final int bloomIndexParallelism;
|
private final int bloomIndexParallelism;
|
||||||
private final boolean isAllColumnStatsIndexEnabled;
|
private final boolean isColumnStatsIndexEnabled;
|
||||||
private final int columnStatsIndexParallelism;
|
private final int columnStatsIndexParallelism;
|
||||||
private final List<String> columnsToIndex;
|
private final List<String> targetColumnsForColumnStatsIndex;
|
||||||
private final List<String> bloomSecondaryKeys;
|
private final List<String> targetColumnsForBloomFilterIndex;
|
||||||
|
|
||||||
MetadataRecordsGenerationParams(HoodieTableMetaClient dataMetaClient, List<MetadataPartitionType> enabledPartitionTypes, String bloomFilterType, int bloomIndexParallelism,
|
MetadataRecordsGenerationParams(HoodieTableMetaClient dataMetaClient, List<MetadataPartitionType> enabledPartitionTypes, String bloomFilterType, int bloomIndexParallelism,
|
||||||
boolean isAllColumnStatsIndexEnabled, int columnStatsIndexParallelism, List<String> columnsToIndex, List<String> bloomSecondaryKeys) {
|
boolean isColumnStatsIndexEnabled, int columnStatsIndexParallelism, List<String> targetColumnsForColumnStatsIndex, List<String> targetColumnsForBloomFilterIndex) {
|
||||||
this.dataMetaClient = dataMetaClient;
|
this.dataMetaClient = dataMetaClient;
|
||||||
this.enabledPartitionTypes = enabledPartitionTypes;
|
this.enabledPartitionTypes = enabledPartitionTypes;
|
||||||
this.bloomFilterType = bloomFilterType;
|
this.bloomFilterType = bloomFilterType;
|
||||||
this.bloomIndexParallelism = bloomIndexParallelism;
|
this.bloomIndexParallelism = bloomIndexParallelism;
|
||||||
this.isAllColumnStatsIndexEnabled = isAllColumnStatsIndexEnabled;
|
this.isColumnStatsIndexEnabled = isColumnStatsIndexEnabled;
|
||||||
this.columnStatsIndexParallelism = columnStatsIndexParallelism;
|
this.columnStatsIndexParallelism = columnStatsIndexParallelism;
|
||||||
this.columnsToIndex = columnsToIndex;
|
this.targetColumnsForColumnStatsIndex = targetColumnsForColumnStatsIndex;
|
||||||
this.bloomSecondaryKeys = bloomSecondaryKeys;
|
this.targetColumnsForBloomFilterIndex = targetColumnsForBloomFilterIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
public HoodieTableMetaClient getDataMetaClient() {
|
public HoodieTableMetaClient getDataMetaClient() {
|
||||||
@@ -62,8 +67,8 @@ public class MetadataRecordsGenerationParams implements Serializable {
|
|||||||
return bloomFilterType;
|
return bloomFilterType;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isAllColumnStatsIndexEnabled() {
|
public boolean isColumnStatsIndexEnabled() {
|
||||||
return isAllColumnStatsIndexEnabled;
|
return isColumnStatsIndexEnabled;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getBloomIndexParallelism() {
|
public int getBloomIndexParallelism() {
|
||||||
@@ -74,11 +79,11 @@ public class MetadataRecordsGenerationParams implements Serializable {
|
|||||||
return columnStatsIndexParallelism;
|
return columnStatsIndexParallelism;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getColumnsToIndex() {
|
public List<String> getTargetColumnsForColumnStatsIndex() {
|
||||||
return columnsToIndex;
|
return targetColumnsForColumnStatsIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getBloomSecondaryKeys() {
|
public List<String> getSecondaryKeysForBloomFilterIndex() {
|
||||||
return bloomSecondaryKeys;
|
return targetColumnsForBloomFilterIndex;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -20,21 +20,25 @@ package org.apache.hudi.util;
|
|||||||
|
|
||||||
import java.util.function.Supplier;
|
import java.util.function.Supplier;
|
||||||
|
|
||||||
// TODO java-doc
|
/**
|
||||||
public class LazyRef<T> {
|
* Utility implementing lazy semantics in Java
|
||||||
|
*
|
||||||
|
* @param <T> type of the object being held by {@link Lazy}
|
||||||
|
*/
|
||||||
|
public class Lazy<T> {
|
||||||
|
|
||||||
private volatile boolean initialized;
|
private volatile boolean initialized;
|
||||||
|
|
||||||
private Supplier<T> initializer;
|
private Supplier<T> initializer;
|
||||||
private T ref;
|
private T ref;
|
||||||
|
|
||||||
private LazyRef(Supplier<T> initializer) {
|
private Lazy(Supplier<T> initializer) {
|
||||||
this.initializer = initializer;
|
this.initializer = initializer;
|
||||||
this.ref = null;
|
this.ref = null;
|
||||||
this.initialized = false;
|
this.initialized = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
private LazyRef(T ref) {
|
private Lazy(T ref) {
|
||||||
this.initializer = null;
|
this.initializer = null;
|
||||||
this.ref = ref;
|
this.ref = ref;
|
||||||
this.initialized = true;
|
this.initialized = true;
|
||||||
@@ -54,11 +58,20 @@ public class LazyRef<T> {
|
|||||||
return ref;
|
return ref;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static <T> LazyRef<T> lazy(Supplier<T> initializer) {
|
/**
|
||||||
return new LazyRef<>(initializer);
|
* Executes provided {@code initializer} lazily, while providing for "exactly once" semantic,
|
||||||
|
* to instantiate value of type {@link T} being subsequently held by the returned instance of
|
||||||
|
* {@link Lazy}
|
||||||
|
*/
|
||||||
|
public static <T> Lazy<T> lazily(Supplier<T> initializer) {
|
||||||
|
return new Lazy<>(initializer);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static <T> LazyRef<T> eager(T ref) {
|
/**
|
||||||
return new LazyRef<>(ref);
|
* Instantiates {@link Lazy} in an "eagerly" fashion setting it w/ the provided value of
|
||||||
|
* type {@link T} directly, bypassing lazy initialization sequence
|
||||||
|
*/
|
||||||
|
public static <T> Lazy<T> eagerly(T ref) {
|
||||||
|
return new Lazy<>(ref);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -22,9 +22,6 @@ import org.junit.jupiter.api.Test;
|
|||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
import static org.junit.jupiter.api.Assertions.assertNotEquals;
|
import static org.junit.jupiter.api.Assertions.assertNotEquals;
|
||||||
@@ -69,18 +66,10 @@ public class TestStringUtils {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testStringToSet() {
|
public void testSplit() {
|
||||||
assertEquals(new HashSet<>(), StringUtils.toSet(null));
|
assertEquals(new ArrayList<>(), StringUtils.split(null, ","));
|
||||||
assertEquals(new HashSet<>(), StringUtils.toSet(""));
|
assertEquals(new ArrayList<>(), StringUtils.split("", ","));
|
||||||
Set<String> expected = new HashSet<>(Arrays.asList("a", "b", "c"));
|
assertEquals(Arrays.asList("a", "b", "c"), StringUtils.split("a,b, c", ","));
|
||||||
assertEquals(expected, StringUtils.toSet("a,b, c"));
|
assertEquals(Arrays.asList("a", "b", "c"), StringUtils.split("a,b,, c ", ","));
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testStringToList() {
|
|
||||||
assertEquals(new ArrayList<>(), StringUtils.toList(null));
|
|
||||||
assertEquals(new ArrayList<>(), StringUtils.toList(""));
|
|
||||||
List<String> expected = Arrays.asList("a", "b", "c");
|
|
||||||
assertEquals(expected, StringUtils.toList("a,b, c"));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -351,7 +351,6 @@ class TestHoodieFileIndex extends HoodieClientTestBase {
|
|||||||
PRECOMBINE_FIELD.key -> "id",
|
PRECOMBINE_FIELD.key -> "id",
|
||||||
HoodieMetadataConfig.ENABLE.key -> "true",
|
HoodieMetadataConfig.ENABLE.key -> "true",
|
||||||
HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key -> "true",
|
HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key -> "true",
|
||||||
HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS_FOR_ALL_COLUMNS.key -> "true",
|
|
||||||
HoodieTableConfig.POPULATE_META_FIELDS.key -> "true"
|
HoodieTableConfig.POPULATE_META_FIELDS.key -> "true"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -375,8 +374,7 @@ class TestHoodieFileIndex extends HoodieClientTestBase {
|
|||||||
DataSourceReadOptions.ENABLE_DATA_SKIPPING.key -> "true",
|
DataSourceReadOptions.ENABLE_DATA_SKIPPING.key -> "true",
|
||||||
// NOTE: Metadata Table has to be enabled on the read path as well
|
// NOTE: Metadata Table has to be enabled on the read path as well
|
||||||
HoodieMetadataConfig.ENABLE.key -> "true",
|
HoodieMetadataConfig.ENABLE.key -> "true",
|
||||||
HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key -> "true",
|
HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key -> "true"
|
||||||
HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS_FOR_ALL_COLUMNS.key -> "true"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
val fileIndex = HoodieFileIndex(spark, metaClient, Option.empty, props, NoopCache)
|
val fileIndex = HoodieFileIndex(spark, metaClient, Option.empty, props, NoopCache)
|
||||||
|
|||||||
@@ -82,7 +82,6 @@ class TestColumnStatsIndex extends HoodieClientTestBase with ColumnStatsIndexSup
|
|||||||
PRECOMBINE_FIELD.key -> "c1",
|
PRECOMBINE_FIELD.key -> "c1",
|
||||||
HoodieMetadataConfig.ENABLE.key -> "true",
|
HoodieMetadataConfig.ENABLE.key -> "true",
|
||||||
HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key -> "true",
|
HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key -> "true",
|
||||||
HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS_FOR_ALL_COLUMNS.key -> "true",
|
|
||||||
HoodieMetadataConfig.ENABLE_FULL_SCAN_LOG_FILES.key -> forceFullLogScan.toString,
|
HoodieMetadataConfig.ENABLE_FULL_SCAN_LOG_FILES.key -> forceFullLogScan.toString,
|
||||||
HoodieTableConfig.POPULATE_META_FIELDS.key -> "true"
|
HoodieTableConfig.POPULATE_META_FIELDS.key -> "true"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -51,8 +51,7 @@ class TestMetadataTableWithSparkDataSource extends SparkClientFunctionalTestHarn
|
|||||||
|
|
||||||
val metadataOpts: Map[String, String] = Map(
|
val metadataOpts: Map[String, String] = Map(
|
||||||
HoodieMetadataConfig.ENABLE.key -> "true",
|
HoodieMetadataConfig.ENABLE.key -> "true",
|
||||||
HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key -> "true",
|
HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key -> "true"
|
||||||
HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS_FOR_ALL_COLUMNS.key -> "true"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
val combinedOpts: Map[String, String] = commonOpts ++ metadataOpts ++
|
val combinedOpts: Map[String, String] = commonOpts ++ metadataOpts ++
|
||||||
|
|||||||
@@ -926,7 +926,6 @@ public class HoodieMetadataTableValidator implements Serializable {
|
|||||||
.enable(enableMetadataTable)
|
.enable(enableMetadataTable)
|
||||||
.withMetadataIndexBloomFilter(enableMetadataTable)
|
.withMetadataIndexBloomFilter(enableMetadataTable)
|
||||||
.withMetadataIndexColumnStats(enableMetadataTable)
|
.withMetadataIndexColumnStats(enableMetadataTable)
|
||||||
.withMetadataIndexForAllColumns(enableMetadataTable)
|
|
||||||
.withAssumeDatePartitioning(cfg.assumeDatePartitioning)
|
.withAssumeDatePartitioning(cfg.assumeDatePartitioning)
|
||||||
.build();
|
.build();
|
||||||
this.fileSystemView = FileSystemViewManager.createInMemoryFileSystemView(engineContext,
|
this.fileSystemView = FileSystemViewManager.createInMemoryFileSystemView(engineContext,
|
||||||
|
|||||||
Reference in New Issue
Block a user