1
0

[HUDI-2488][HUDI-3175] Implement async metadata indexing (#4693)

- Add a new action called INDEX, whose state transition is described in the RFC.
- Changes in timeline to support the new action.
- Add an index planner in ScheduleIndexActionExecutor.
- Add index plan executor in RunIndexActionExecutor.
- Add 3 APIs in HoodieTableMetadataWriter; a) scheduleIndex: will generate an index plan based on latest completed instant, initialize file groups and add a requested INDEX instant, b) index: executes the index plan and also takes care of writes that happened after indexing was requested, c) dropIndex: will drop index by removing the given metadata partition.
- Add 2 new table configs to serve as the source of truth for inflight and completed indexes.
- Support upgrade/downgrade taking care of the newly added configs.
- Add tool to trigger indexing in HoodieIndexer.
- Handle corner cases related to partial failures.
- Abort gracefully after deleting partition and instant.
- Handle other actions in timeline to consider before catching up
This commit is contained in:
Sagar Sumit
2022-04-01 01:33:12 +05:30
committed by GitHub
parent 1da196c1e8
commit 28dafa774e
44 changed files with 2123 additions and 150 deletions

View File

@@ -25,6 +25,8 @@ import org.apache.hudi.avro.model.HoodieCleanMetadata;
import org.apache.hudi.avro.model.HoodieCleanerPlan;
import org.apache.hudi.avro.model.HoodieClusteringPlan;
import org.apache.hudi.avro.model.HoodieCompactionPlan;
import org.apache.hudi.avro.model.HoodieIndexCommitMetadata;
import org.apache.hudi.avro.model.HoodieIndexPlan;
import org.apache.hudi.avro.model.HoodieRestoreMetadata;
import org.apache.hudi.avro.model.HoodieRestorePlan;
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
@@ -63,12 +65,14 @@ import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieCommitException;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.exception.HoodieIndexException;
import org.apache.hudi.exception.HoodieRestoreException;
import org.apache.hudi.exception.HoodieRollbackException;
import org.apache.hudi.exception.HoodieSavepointException;
import org.apache.hudi.index.HoodieIndex;
import org.apache.hudi.metadata.HoodieTableMetadata;
import org.apache.hudi.metadata.HoodieTableMetadataWriter;
import org.apache.hudi.metadata.MetadataPartitionType;
import org.apache.hudi.metrics.HoodieMetrics;
import org.apache.hudi.table.BulkInsertPartitioner;
import org.apache.hudi.table.HoodieTable;
@@ -405,7 +409,6 @@ public abstract class BaseHoodieWriteClient<T extends HoodieRecordPayload, I, K,
public abstract O bulkInsert(I records, final String instantTime,
Option<BulkInsertPartitioner> userDefinedBulkInsertPartitioner);
/**
* Loads the given HoodieRecords, as inserts into the table. This is suitable for doing big bulk loads into a Hoodie
* table for the very first time (e.g: converting an existing table to Hoodie). The input records should contain no
@@ -956,6 +959,53 @@ public abstract class BaseHoodieWriteClient<T extends HoodieRecordPayload, I, K,
return scheduleTableService(instantTime, extraMetadata, TableServiceType.COMPACT).isPresent();
}
/**
* Schedules INDEX action.
*
* @param partitionTypes - list of {@link MetadataPartitionType} which needs to be indexed
* @return instant time for the requested INDEX action
*/
public Option<String> scheduleIndexing(List<MetadataPartitionType> partitionTypes) {
String instantTime = HoodieActiveTimeline.createNewInstantTime();
Option<HoodieIndexPlan> indexPlan = createTable(config, hadoopConf, config.isMetadataTableEnabled())
.scheduleIndexing(context, instantTime, partitionTypes);
return indexPlan.isPresent() ? Option.of(instantTime) : Option.empty();
}
/**
* Runs INDEX action to build out the metadata partitions as planned for the given instant time.
*
* @param indexInstantTime - instant time for the requested INDEX action
* @return {@link Option<HoodieIndexCommitMetadata>} after successful indexing.
*/
public Option<HoodieIndexCommitMetadata> index(String indexInstantTime) {
return createTable(config, hadoopConf, config.isMetadataTableEnabled()).index(context, indexInstantTime);
}
/**
* Drops the index and removes the metadata partitions.
*
* @param partitionTypes - list of {@link MetadataPartitionType} which needs to be indexed
*/
public void dropIndex(List<MetadataPartitionType> partitionTypes) {
HoodieTable table = createTable(config, hadoopConf);
String dropInstant = HoodieActiveTimeline.createNewInstantTime();
this.txnManager.beginTransaction();
try {
context.setJobStatus(this.getClass().getSimpleName(), "Dropping partitions from metadata table");
table.getMetadataWriter(dropInstant).ifPresent(w -> {
try {
((HoodieTableMetadataWriter) w).dropMetadataPartitions(partitionTypes);
} catch (IOException e) {
throw new HoodieIndexException("Failed to drop metadata index. ", e);
}
});
} finally {
this.txnManager.endTransaction();
}
}
/**
* Performs Compaction for the workload stored in instant-time.
*

View File

@@ -1507,8 +1507,20 @@ public class HoodieWriteConfig extends HoodieConfig {
return isMetadataTableEnabled() && getMetadataConfig().isBloomFilterIndexEnabled();
}
public boolean isMetadataIndexColumnStatsForAllColumnsEnabled() {
return isMetadataTableEnabled() && getMetadataConfig().isMetadataColumnStatsIndexForAllColumnsEnabled();
public boolean isMetadataColumnStatsIndexEnabled() {
return isMetadataTableEnabled() && getMetadataConfig().isColumnStatsIndexEnabled();
}
public String getColumnsEnabledForColumnStatsIndex() {
return getMetadataConfig().getColumnsEnabledForColumnStatsIndex();
}
public String getColumnsEnabledForBloomFilterIndex() {
return getMetadataConfig().getColumnsEnabledForBloomFilterIndex();
}
public int getIndexingCheckTimeoutSeconds() {
return getMetadataConfig().getIndexingCheckTimeoutSeconds();
}
public int getColumnStatsIndexParallelism() {
@@ -1892,6 +1904,10 @@ public class HoodieWriteConfig extends HoodieConfig {
return getBoolean(HoodieMetadataConfig.ASYNC_CLEAN_ENABLE);
}
public boolean isMetadataAsyncIndex() {
return getBooleanOrDefault(HoodieMetadataConfig.ASYNC_INDEX_ENABLE);
}
public int getMetadataMaxCommitsToKeep() {
return getInt(HoodieMetadataConfig.MAX_COMMITS_TO_KEEP);
}

View File

@@ -120,7 +120,7 @@ public class HoodieBloomIndex extends HoodieIndex<Object, Object> {
// Step 2: Load all involved files as <Partition, filename> pairs
List<Pair<String, BloomIndexFileInfo>> fileInfoList;
if (config.getBloomIndexPruneByRanges()) {
fileInfoList = (config.getMetadataConfig().isColumnStatsIndexEnabled()
fileInfoList = (config.isMetadataColumnStatsIndexEnabled()
? loadColumnRangesFromMetaIndex(affectedPartitionPathList, context, hoodieTable)
: loadColumnRangesFromFiles(affectedPartitionPathList, context, hoodieTable));
} else {

View File

@@ -50,12 +50,14 @@ import org.apache.hudi.common.util.DefaultSizeEstimator;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.ReflectionUtils;
import org.apache.hudi.common.util.SizeEstimator;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieAppendException;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieUpsertException;
import org.apache.hudi.table.HoodieTable;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.fs.Path;
@@ -69,8 +71,10 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.accumulateColumnRanges;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.aggregateColumnStats;
@@ -343,16 +347,27 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload, I, K, O> extends
updateWriteStatus(stat, result);
}
if (config.isMetadataIndexColumnStatsForAllColumnsEnabled()) {
if (config.isMetadataColumnStatsIndexEnabled()) {
final List<Schema.Field> fieldsToIndex;
if (!StringUtils.isNullOrEmpty(config.getColumnsEnabledForColumnStatsIndex())) {
Set<String> columnsToIndex = Stream.of(config.getColumnsEnabledForColumnStatsIndex().split(","))
.map(String::trim).filter(s -> !s.isEmpty()).collect(Collectors.toSet());
fieldsToIndex = writeSchemaWithMetaFields.getFields().stream()
.filter(field -> columnsToIndex.contains(field.name())).collect(Collectors.toList());
} else {
// if column stats index is enabled but columns not configured then we assume that all columns should be indexed
fieldsToIndex = writeSchemaWithMetaFields.getFields();
}
Map<String, HoodieColumnRangeMetadata<Comparable>> columnRangeMap = stat.getRecordsStats().isPresent()
? stat.getRecordsStats().get().getStats() : new HashMap<>();
final String filePath = stat.getPath();
// initialize map of column name to map of stats name to stats value
Map<String, Map<String, Object>> columnToStats = new HashMap<>();
writeSchemaWithMetaFields.getFields().forEach(field -> columnToStats.putIfAbsent(field.name(), new HashMap<>()));
fieldsToIndex.forEach(field -> columnToStats.putIfAbsent(field.name(), new HashMap<>()));
// collect stats for columns at once per record and keep iterating through every record to eventually find col stats for all fields.
recordList.forEach(record -> aggregateColumnStats(record, writeSchemaWithMetaFields, columnToStats, config.isConsistentLogicalTimestampEnabled()));
writeSchemaWithMetaFields.getFields().forEach(field -> accumulateColumnRanges(field, filePath, columnRangeMap, columnToStats));
recordList.forEach(record -> aggregateColumnStats(record, fieldsToIndex, columnToStats, config.isConsistentLogicalTimestampEnabled()));
fieldsToIndex.forEach(field -> accumulateColumnRanges(field, filePath, columnRangeMap, columnToStats));
stat.setRecordsStats(new HoodieDeltaWriteStat.RecordsStats<>(columnRangeMap));
}

View File

@@ -19,6 +19,7 @@
package org.apache.hudi.metadata;
import org.apache.hudi.avro.model.HoodieCleanMetadata;
import org.apache.hudi.avro.model.HoodieIndexPartitionInfo;
import org.apache.hudi.avro.model.HoodieInstantInfo;
import org.apache.hudi.avro.model.HoodieMetadataRecord;
import org.apache.hudi.avro.model.HoodieRestoreMetadata;
@@ -54,6 +55,7 @@ import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
import org.apache.hudi.common.util.HoodieTimer;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.common.util.ValidationUtils;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.config.HoodieCompactionConfig;
@@ -62,6 +64,7 @@ import org.apache.hudi.config.metrics.HoodieMetricsConfig;
import org.apache.hudi.config.metrics.HoodieMetricsGraphiteConfig;
import org.apache.hudi.config.metrics.HoodieMetricsJmxConfig;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIndexException;
import org.apache.hudi.exception.HoodieMetadataException;
import org.apache.avro.specific.SpecificRecordBase;
@@ -80,14 +83,19 @@ import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;
import static org.apache.hudi.common.table.HoodieTableConfig.ARCHIVELOG_FOLDER;
import static org.apache.hudi.common.util.StringUtils.EMPTY_STRING;
import static org.apache.hudi.metadata.HoodieTableMetadata.METADATA_TABLE_NAME_SUFFIX;
import static org.apache.hudi.metadata.HoodieTableMetadata.SOLO_COMMIT_TIMESTAMP;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getCompletedMetadataPartitions;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getInflightMetadataPartitions;
/**
* Writer implementation backed by an internal hudi table. Partition and file listing are saved within an internal MOR table
@@ -113,7 +121,6 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
protected boolean enabled;
protected SerializableConfiguration hadoopConf;
protected final transient HoodieEngineContext engineContext;
// TODO: HUDI-3258 Support secondary key via multiple partitions within a single type
protected final List<MetadataPartitionType> enabledPartitionTypes;
/**
@@ -363,6 +370,18 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
Option<String> inflightInstantTimestamp) throws IOException {
HoodieTimer timer = new HoodieTimer().startTimer();
boolean exists = metadataTableExists(dataMetaClient, actionMetadata);
if (!exists) {
// Initialize for the first time by listing partitions and files directly from the file system
if (initializeFromFilesystem(dataMetaClient, inflightInstantTimestamp)) {
metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.INITIALIZE_STR, timer.endTimer()));
}
}
}
private <T extends SpecificRecordBase> boolean metadataTableExists(HoodieTableMetaClient dataMetaClient,
Option<T> actionMetadata) throws IOException {
boolean exists = dataMetaClient.getFs().exists(new Path(metadataWriteConfig.getBasePath(),
HoodieTableMetaClient.METAFOLDER_NAME));
boolean reInitialize = false;
@@ -391,12 +410,7 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
exists = false;
}
if (!exists) {
// Initialize for the first time by listing partitions and files directly from the file system
if (initializeFromFilesystem(dataMetaClient, inflightInstantTimestamp)) {
metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.INITIALIZE_STR, timer.endTimer()));
}
}
return exists;
}
/**
@@ -451,7 +465,7 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
final String INSTANT_ACTION = (actionMetadata.get() instanceof HoodieRollbackMetadata
? HoodieTimeline.ROLLBACK_ACTION
: (actionMetadata.get() instanceof HoodieRestoreMetadata ? HoodieTimeline.RESTORE_ACTION : ""));
: (actionMetadata.get() instanceof HoodieRestoreMetadata ? HoodieTimeline.RESTORE_ACTION : EMPTY_STRING));
List<String> affectedInstantTimestamps;
switch (INSTANT_ACTION) {
@@ -511,16 +525,33 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
initializeMetaClient(dataWriteConfig.getMetadataConfig().populateMetaFields());
initTableMetadata();
initializeEnabledFileGroups(dataMetaClient, createInstantTime);
// if async metadata indexing is enabled,
// then only initialize files partition as other partitions will be built using HoodieIndexer
List<MetadataPartitionType> enabledPartitionTypes = new ArrayList<>();
if (dataWriteConfig.isMetadataAsyncIndex()) {
enabledPartitionTypes.add(MetadataPartitionType.FILES);
} else {
// all enabled ones should be initialized
enabledPartitionTypes = this.enabledPartitionTypes;
}
initializeEnabledFileGroups(dataMetaClient, createInstantTime, enabledPartitionTypes);
// During cold startup, the list of files to be committed can be huge. So creating a HoodieCommitMetadata out
// of these large number of files and calling the existing update(HoodieCommitMetadata) function does not scale
// well. Hence, we have a special commit just for the initialization scenario.
initialCommit(createInstantTime);
initialCommit(createInstantTime, enabledPartitionTypes);
updateInitializedPartitionsInTableConfig(enabledPartitionTypes);
return true;
}
private HoodieTableMetaClient initializeMetaClient(boolean populatMetaFields) throws IOException {
private void updateInitializedPartitionsInTableConfig(List<MetadataPartitionType> partitionTypes) {
Set<String> completedPartitions = getCompletedMetadataPartitions(dataMetaClient.getTableConfig());
completedPartitions.addAll(partitionTypes.stream().map(MetadataPartitionType::getPartitionPath).collect(Collectors.toSet()));
dataMetaClient.getTableConfig().setValue(HoodieTableConfig.TABLE_METADATA_PARTITIONS.key(), String.join(",", completedPartitions));
HoodieTableConfig.update(dataMetaClient.getFs(), new Path(dataMetaClient.getMetaPath()), dataMetaClient.getTableConfig().getProps());
}
private HoodieTableMetaClient initializeMetaClient(boolean populateMetaFields) throws IOException {
return HoodieTableMetaClient.withPropertyBuilder()
.setTableType(HoodieTableType.MERGE_ON_READ)
.setTableName(tableName)
@@ -528,7 +559,7 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
.setPayloadClassName(HoodieMetadataPayload.class.getName())
.setBaseFileFormat(HoodieFileFormat.HFILE.toString())
.setRecordKeyFields(RECORD_KEY_FIELD_NAME)
.setPopulateMetaFields(populatMetaFields)
.setPopulateMetaFields(populateMetaFields)
.setKeyGeneratorClassProp(HoodieTableMetadataKeyGenerator.class.getCanonicalName())
.initTable(hadoopConf.get(), metadataWriteConfig.getBasePath());
}
@@ -553,7 +584,7 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
// In each round we will list a section of directories
int numDirsToList = Math.min(fileListingParallelism, pathsToList.size());
// List all directories in parallel
List<DirectoryInfo> processedDirectories = engineContext.map(pathsToList.subList(0, numDirsToList), path -> {
List<DirectoryInfo> processedDirectories = engineContext.map(pathsToList.subList(0, numDirsToList), path -> {
FileSystem fs = path.getFileSystem(conf.get());
String relativeDirPath = FSUtils.getRelativePartitionPath(new Path(datasetBasePath), path);
return new DirectoryInfo(relativeDirPath, fs.listStatus(path));
@@ -595,13 +626,19 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
* @param createInstantTime - Metadata table create instant time
* @throws IOException
*/
private void initializeEnabledFileGroups(HoodieTableMetaClient dataMetaClient, String createInstantTime) throws IOException {
for (MetadataPartitionType enabledPartitionType : this.enabledPartitionTypes) {
private void initializeEnabledFileGroups(HoodieTableMetaClient dataMetaClient, String createInstantTime, List<MetadataPartitionType> partitionTypes) throws IOException {
for (MetadataPartitionType enabledPartitionType : partitionTypes) {
initializeFileGroups(dataMetaClient, enabledPartitionType, createInstantTime,
enabledPartitionType.getFileGroupCount());
}
}
public void initializeMetadataPartitions(HoodieTableMetaClient dataMetaClient, List<MetadataPartitionType> metadataPartitions, String instantTime) throws IOException {
for (MetadataPartitionType partitionType : metadataPartitions) {
initializeFileGroups(dataMetaClient, partitionType, instantTime, partitionType.getFileGroupCount());
}
}
/**
* Initialize file groups for a partition. For file listing, we just have one file group.
*
@@ -614,7 +651,6 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
*/
private void initializeFileGroups(HoodieTableMetaClient dataMetaClient, MetadataPartitionType metadataPartition, String instantTime,
int fileGroupCount) throws IOException {
final HashMap<HeaderMetadataType, String> blockHeader = new HashMap<>();
blockHeader.put(HeaderMetadataType.INSTANT_TIME, instantTime);
// Archival of data table has a dependency on compaction(base files) in metadata table.
@@ -645,12 +681,34 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
}
}
public void dropMetadataPartitions(List<MetadataPartitionType> metadataPartitions) throws IOException {
Set<String> completedIndexes = getCompletedMetadataPartitions(dataMetaClient.getTableConfig());
Set<String> inflightIndexes = getInflightMetadataPartitions(dataMetaClient.getTableConfig());
for (MetadataPartitionType partitionType : metadataPartitions) {
String partitionPath = partitionType.getPartitionPath();
// first update table config
if (inflightIndexes.contains(partitionPath)) {
inflightIndexes.remove(partitionPath);
dataMetaClient.getTableConfig().setValue(HoodieTableConfig.TABLE_METADATA_PARTITIONS_INFLIGHT.key(), String.join(",", inflightIndexes));
} else if (completedIndexes.contains(partitionPath)) {
completedIndexes.remove(partitionPath);
dataMetaClient.getTableConfig().setValue(HoodieTableConfig.TABLE_METADATA_PARTITIONS.key(), String.join(",", completedIndexes));
}
HoodieTableConfig.update(dataMetaClient.getFs(), new Path(dataMetaClient.getMetaPath()), dataMetaClient.getTableConfig().getProps());
LOG.warn("Deleting Metadata Table partitions: " + partitionPath);
dataMetaClient.getFs().delete(new Path(metadataWriteConfig.getBasePath(), partitionPath), true);
}
}
private MetadataRecordsGenerationParams getRecordsGenerationParams() {
return new MetadataRecordsGenerationParams(
dataMetaClient, enabledPartitionTypes, dataWriteConfig.getBloomFilterType(),
dataWriteConfig.getBloomIndexParallelism(),
dataWriteConfig.isMetadataIndexColumnStatsForAllColumnsEnabled(),
dataWriteConfig.getColumnStatsIndexParallelism());
dataWriteConfig.isMetadataColumnStatsIndexEnabled(),
dataWriteConfig.getColumnStatsIndexParallelism(),
StringUtils.toList(dataWriteConfig.getColumnsEnabledForColumnStatsIndex()),
StringUtils.toList(dataWriteConfig.getColumnsEnabledForBloomFilterIndex()));
}
/**
@@ -663,20 +721,82 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
/**
* Processes commit metadata from data table and commits to metadata table.
*
* @param instantTime instant time of interest.
* @param convertMetadataFunction converter function to convert the respective metadata to List of HoodieRecords to be written to metadata table.
* @param <T> type of commit metadata.
* @param canTriggerTableService true if table services can be triggered. false otherwise.
*/
private <T> void processAndCommit(String instantTime, ConvertMetadataFunction convertMetadataFunction, boolean canTriggerTableService) {
if (enabled && metadata != null) {
Map<MetadataPartitionType, HoodieData<HoodieRecord>> partitionRecordsMap = convertMetadataFunction.convertMetadata();
commit(instantTime, partitionRecordsMap, canTriggerTableService);
if (!dataWriteConfig.isMetadataTableEnabled()) {
return;
}
Set<String> partitionsToUpdate = getMetadataPartitionsToUpdate();
Set<String> inflightIndexes = getInflightMetadataPartitions(dataMetaClient.getTableConfig());
// if indexing is inflight then do not trigger table service
boolean doNotTriggerTableService = partitionsToUpdate.stream().anyMatch(inflightIndexes::contains);
if (enabled && metadata != null) {
// convert metadata and filter only the entries whose partition path are in partitionsToUpdate
Map<MetadataPartitionType, HoodieData<HoodieRecord>> partitionRecordsMap = convertMetadataFunction.convertMetadata().entrySet().stream()
.filter(entry -> partitionsToUpdate.contains(entry.getKey().getPartitionPath())).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
commit(instantTime, partitionRecordsMap, !doNotTriggerTableService && canTriggerTableService);
}
}
private Set<String> getMetadataPartitionsToUpdate() {
// fetch partitions to update from table config
Set<String> partitionsToUpdate = getCompletedMetadataPartitions(dataMetaClient.getTableConfig());
// add inflight indexes as well because the file groups have already been initialized, so writers can log updates
partitionsToUpdate.addAll(getInflightMetadataPartitions(dataMetaClient.getTableConfig()));
if (!partitionsToUpdate.isEmpty()) {
return partitionsToUpdate;
}
// fallback to all enabled partitions if table config returned no partitions
return getEnabledPartitionTypes().stream().map(MetadataPartitionType::getPartitionPath).collect(Collectors.toSet());
}
@Override
public void buildMetadataPartitions(HoodieEngineContext engineContext, List<HoodieIndexPartitionInfo> indexPartitionInfos) {
if (indexPartitionInfos.isEmpty()) {
LOG.warn("No partition to index in the plan");
return;
}
String indexUptoInstantTime = indexPartitionInfos.get(0).getIndexUptoInstant();
List<MetadataPartitionType> partitionTypes = new ArrayList<>();
indexPartitionInfos.forEach(indexPartitionInfo -> {
String relativePartitionPath = indexPartitionInfo.getMetadataPartitionPath();
LOG.info(String.format("Creating a new metadata index for partition '%s' under path %s upto instant %s",
relativePartitionPath, metadataWriteConfig.getBasePath(), indexUptoInstantTime));
try {
// file group should have already been initialized while scheduling index for this partition
if (!dataMetaClient.getFs().exists(new Path(metadataWriteConfig.getBasePath(), relativePartitionPath))) {
throw new HoodieIndexException(String.format("File group not initialized for metadata partition: %s, indexUptoInstant: %s. Looks like index scheduling failed!",
relativePartitionPath, indexUptoInstantTime));
}
} catch (IOException e) {
throw new HoodieIndexException(String.format("Unable to check whether file group is initialized for metadata partition: %s, indexUptoInstant: %s",
relativePartitionPath, indexUptoInstantTime));
}
// return early and populate enabledPartitionTypes correctly (check in initialCommit)
MetadataPartitionType partitionType = MetadataPartitionType.valueOf(relativePartitionPath.toUpperCase(Locale.ROOT));
if (!enabledPartitionTypes.contains(partitionType)) {
throw new HoodieIndexException(String.format("Indexing for metadata partition: %s is not enabled", partitionType));
}
partitionTypes.add(partitionType);
});
// before initial commit update inflight indexes in table config
Set<String> inflightIndexes = getInflightMetadataPartitions(dataMetaClient.getTableConfig());
inflightIndexes.addAll(indexPartitionInfos.stream().map(HoodieIndexPartitionInfo::getMetadataPartitionPath).collect(Collectors.toSet()));
dataMetaClient.getTableConfig().setValue(HoodieTableConfig.TABLE_METADATA_PARTITIONS_INFLIGHT.key(), String.join(",", inflightIndexes));
HoodieTableConfig.update(dataMetaClient.getFs(), new Path(dataMetaClient.getMetaPath()), dataMetaClient.getTableConfig().getProps());
initialCommit(indexUptoInstantTime, partitionTypes);
}
/**
* Update from {@code HoodieCommitMetadata}.
*
* @param commitMetadata {@code HoodieCommitMetadata}
* @param instantTime Timestamp at which the commit was performed
* @param isTableServiceAction {@code true} if commit metadata is pertaining to a table service. {@code false} otherwise.
@@ -776,12 +896,18 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
List<FileSlice> fileSlices =
HoodieTableMetadataUtil.getPartitionLatestFileSlices(metadataMetaClient, Option.ofNullable(fsView), partitionName);
if (fileSlices.isEmpty()) {
// scheduling of INDEX only initializes the file group and not add commit
// so if there are no committed file slices, look for inflight slices
fileSlices = HoodieTableMetadataUtil.getPartitionLatestFileSlicesIncludingInflight(metadataMetaClient, Option.ofNullable(fsView), partitionName);
}
ValidationUtils.checkArgument(fileSlices.size() == fileGroupCount,
String.format("Invalid number of file groups for partition:%s, found=%d, required=%d",
partitionName, fileSlices.size(), fileGroupCount));
List<FileSlice> finalFileSlices = fileSlices;
HoodieData<HoodieRecord> rddSinglePartitionRecords = records.map(r -> {
FileSlice slice = fileSlices.get(HoodieTableMetadataUtil.mapRecordKeyToFileGroupIndex(r.getRecordKey(),
FileSlice slice = finalFileSlices.get(HoodieTableMetadataUtil.mapRecordKeyToFileGroupIndex(r.getRecordKey(),
fileGroupCount));
r.setCurrentLocation(new HoodieRecordLocation(slice.getBaseInstantTime(), slice.getFileId()));
return r;
@@ -850,7 +976,7 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
* This is invoked to initialize metadata table for a dataset. Bootstrap Commit has special handling mechanism due to its scale compared to
* other regular commits.
*/
private void initialCommit(String createInstantTime) {
private void initialCommit(String createInstantTime, List<MetadataPartitionType> partitionTypes) {
// List all partitions in the basePath of the containing dataset
LOG.info("Initializing metadata table by using file listings in " + dataWriteConfig.getBasePath());
engineContext.setJobStatus(this.getClass().getSimpleName(), "Initializing metadata table by listing files and partitions");
@@ -877,6 +1003,29 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
return;
}
if (partitionTypes.contains(MetadataPartitionType.FILES)) {
HoodieData<HoodieRecord> filesPartitionRecords = getFilesPartitionRecords(createInstantTime, partitionInfoList, allPartitionRecord);
ValidationUtils.checkState(filesPartitionRecords.count() == (partitions.size() + 1));
partitionToRecordsMap.put(MetadataPartitionType.FILES, filesPartitionRecords);
}
if (partitionTypes.contains(MetadataPartitionType.BLOOM_FILTERS)) {
final HoodieData<HoodieRecord> recordsRDD = HoodieTableMetadataUtil.convertFilesToBloomFilterRecords(
engineContext, Collections.emptyMap(), partitionToFilesMap, getRecordsGenerationParams(), createInstantTime);
partitionToRecordsMap.put(MetadataPartitionType.BLOOM_FILTERS, recordsRDD);
}
if (partitionTypes.contains(MetadataPartitionType.COLUMN_STATS)) {
final HoodieData<HoodieRecord> recordsRDD = HoodieTableMetadataUtil.convertFilesToColumnStatsRecords(
engineContext, Collections.emptyMap(), partitionToFilesMap, getRecordsGenerationParams());
partitionToRecordsMap.put(MetadataPartitionType.COLUMN_STATS, recordsRDD);
}
LOG.info("Committing " + partitions.size() + " partitions and " + totalFiles + " files to metadata");
commit(createInstantTime, partitionToRecordsMap, false);
}
private HoodieData<HoodieRecord> getFilesPartitionRecords(String createInstantTime, List<DirectoryInfo> partitionInfoList, HoodieRecord allPartitionRecord) {
HoodieData<HoodieRecord> filesPartitionRecords = engineContext.parallelize(Arrays.asList(allPartitionRecord), 1);
if (!partitionInfoList.isEmpty()) {
HoodieData<HoodieRecord> fileListRecords = engineContext.parallelize(partitionInfoList, partitionInfoList.size()).map(partitionInfo -> {
@@ -893,23 +1042,7 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
});
filesPartitionRecords = filesPartitionRecords.union(fileListRecords);
}
ValidationUtils.checkState(filesPartitionRecords.count() == (partitions.size() + 1));
partitionToRecordsMap.put(MetadataPartitionType.FILES, filesPartitionRecords);
if (enabledPartitionTypes.contains(MetadataPartitionType.BLOOM_FILTERS)) {
final HoodieData<HoodieRecord> recordsRDD = HoodieTableMetadataUtil.convertFilesToBloomFilterRecords(
engineContext, Collections.emptyMap(), partitionToFilesMap, getRecordsGenerationParams(), createInstantTime);
partitionToRecordsMap.put(MetadataPartitionType.BLOOM_FILTERS, recordsRDD);
}
if (enabledPartitionTypes.contains(MetadataPartitionType.COLUMN_STATS)) {
final HoodieData<HoodieRecord> recordsRDD = HoodieTableMetadataUtil.convertFilesToColumnStatsRecords(
engineContext, Collections.emptyMap(), partitionToFilesMap, getRecordsGenerationParams());
partitionToRecordsMap.put(MetadataPartitionType.COLUMN_STATS, recordsRDD);
}
LOG.info("Committing " + partitions.size() + " partitions and " + totalFiles + " files to metadata");
commit(createInstantTime, partitionToRecordsMap, false);
return filesPartitionRecords;
}
/**

View File

@@ -19,45 +19,79 @@
package org.apache.hudi.metadata;
import org.apache.hudi.avro.model.HoodieCleanMetadata;
import org.apache.hudi.avro.model.HoodieIndexPartitionInfo;
import org.apache.hudi.avro.model.HoodieRestoreMetadata;
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import java.io.IOException;
import java.io.Serializable;
import java.util.List;
/**
* Interface that supports updating metadata for a given table, as actions complete.
*/
public interface HoodieTableMetadataWriter extends Serializable, AutoCloseable {
/**
* Builds the given metadata partitions to create index.
*
* @param engineContext
* @param indexPartitionInfos - information about partitions to build such as partition type and base instant time
*/
void buildMetadataPartitions(HoodieEngineContext engineContext, List<HoodieIndexPartitionInfo> indexPartitionInfos);
/**
* Initialize file groups for the given metadata partitions when indexing is requested.
*
* @param dataMetaClient - meta client for the data table
* @param metadataPartitions - metadata partitions for which file groups needs to be initialized
* @param instantTime - instant time of the index action
* @throws IOException
*/
void initializeMetadataPartitions(HoodieTableMetaClient dataMetaClient, List<MetadataPartitionType> metadataPartitions, String instantTime) throws IOException;
/**
* Drop the given metadata partitions.
*
* @param metadataPartitions
* @throws IOException
*/
void dropMetadataPartitions(List<MetadataPartitionType> metadataPartitions) throws IOException;
/**
* Update the metadata table due to a COMMIT operation.
* @param commitMetadata commit metadata of the operation of interest.
* @param instantTime instant time of the commit.
*
* @param commitMetadata commit metadata of the operation of interest.
* @param instantTime instant time of the commit.
* @param isTableServiceAction true if caller is a table service. false otherwise. Only regular write operations can trigger metadata table services and this argument
* will assist in this.
* will assist in this.
*/
void update(HoodieCommitMetadata commitMetadata, String instantTime, boolean isTableServiceAction);
/**
* Update the metadata table due to a CLEAN operation.
*
* @param cleanMetadata clean metadata of the operation of interest.
* @param instantTime instant time of the commit.
* @param instantTime instant time of the commit.
*/
void update(HoodieCleanMetadata cleanMetadata, String instantTime);
/**
* Update the metadata table due to a RESTORE operation.
*
* @param restoreMetadata restore metadata of the operation of interest.
* @param instantTime instant time of the commit.
* @param instantTime instant time of the commit.
*/
void update(HoodieRestoreMetadata restoreMetadata, String instantTime);
/**
* Update the metadata table due to a ROLLBACK operation.
*
* @param rollbackMetadata rollback metadata of the operation of interest.
* @param instantTime instant time of the commit.
* @param instantTime instant time of the commit.
*/
void update(HoodieRollbackMetadata rollbackMetadata, String instantTime);
}

View File

@@ -28,6 +28,8 @@ import org.apache.hudi.avro.model.HoodieCleanMetadata;
import org.apache.hudi.avro.model.HoodieCleanerPlan;
import org.apache.hudi.avro.model.HoodieClusteringPlan;
import org.apache.hudi.avro.model.HoodieCompactionPlan;
import org.apache.hudi.avro.model.HoodieIndexCommitMetadata;
import org.apache.hudi.avro.model.HoodieIndexPlan;
import org.apache.hudi.avro.model.HoodieRestoreMetadata;
import org.apache.hudi.avro.model.HoodieRestorePlan;
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
@@ -71,6 +73,7 @@ import org.apache.hudi.exception.HoodieUpsertException;
import org.apache.hudi.index.HoodieIndex;
import org.apache.hudi.metadata.HoodieTableMetadata;
import org.apache.hudi.metadata.HoodieTableMetadataWriter;
import org.apache.hudi.metadata.MetadataPartitionType;
import org.apache.hudi.table.action.HoodieWriteMetadata;
import org.apache.hudi.table.action.bootstrap.HoodieBootstrapWriteMetadata;
import org.apache.hudi.table.marker.WriteMarkers;
@@ -431,7 +434,6 @@ public abstract class HoodieTable<T extends HoodieRecordPayload, I, K, O> implem
*/
public abstract void rollbackBootstrap(HoodieEngineContext context, String instantTime);
/**
* Schedule cleaning for the instant time.
*
@@ -481,6 +483,25 @@ public abstract class HoodieTable<T extends HoodieRecordPayload, I, K, O> implem
boolean deleteInstants,
boolean skipLocking);
/**
* Schedules Indexing for the table to the given instant.
*
* @param context HoodieEngineContext
* @param indexInstantTime Instant time for scheduling index action.
* @param partitionsToIndex List of {@link MetadataPartitionType} that should be indexed.
* @return HoodieIndexPlan containing metadata partitions and instant upto which they should be indexed.
*/
public abstract Option<HoodieIndexPlan> scheduleIndexing(HoodieEngineContext context, String indexInstantTime, List<MetadataPartitionType> partitionsToIndex);
/**
* Execute requested index action.
*
* @param context HoodieEngineContext
* @param indexInstantTime Instant time for which index action was scheduled.
* @return HoodieIndexCommitMetadata containing write stats for each metadata partition.
*/
public abstract Option<HoodieIndexCommitMetadata> index(HoodieEngineContext context, String indexInstantTime);
/**
* Create a savepoint at the specified instant, so that the table can be restored
* to this point-in-timeline later if needed.
@@ -748,7 +769,7 @@ public abstract class HoodieTable<T extends HoodieRecordPayload, I, K, O> implem
* Get Table metadata writer.
*
* @param triggeringInstantTimestamp - The instant that is triggering this metadata write
* @return instance of {@link HoodieTableMetadataWriter
* @return instance of {@link HoodieTableMetadataWriter}
*/
public final Option<HoodieTableMetadataWriter> getMetadataWriter(String triggeringInstantTimestamp) {
return getMetadataWriter(triggeringInstantTimestamp, Option.empty());

View File

@@ -0,0 +1,390 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hudi.table.action.index;
import org.apache.hudi.avro.model.HoodieCleanMetadata;
import org.apache.hudi.avro.model.HoodieIndexCommitMetadata;
import org.apache.hudi.avro.model.HoodieIndexPartitionInfo;
import org.apache.hudi.avro.model.HoodieIndexPlan;
import org.apache.hudi.avro.model.HoodieRestoreMetadata;
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
import org.apache.hudi.client.transaction.TransactionManager;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
import org.apache.hudi.common.util.CleanerUtils;
import org.apache.hudi.common.util.CollectionUtils;
import org.apache.hudi.common.util.HoodieTimer;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieIndexException;
import org.apache.hudi.metadata.HoodieTableMetadataWriter;
import org.apache.hudi.metadata.MetadataPartitionType;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.action.BaseActionExecutor;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import java.io.IOException;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import static org.apache.hudi.common.model.WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL;
import static org.apache.hudi.common.table.HoodieTableConfig.TABLE_METADATA_PARTITIONS;
import static org.apache.hudi.common.table.HoodieTableConfig.TABLE_METADATA_PARTITIONS_INFLIGHT;
import static org.apache.hudi.common.table.timeline.HoodieInstant.State.COMPLETED;
import static org.apache.hudi.common.table.timeline.HoodieInstant.State.REQUESTED;
import static org.apache.hudi.common.table.timeline.HoodieTimeline.CLEAN_ACTION;
import static org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN_OR_EQUALS;
import static org.apache.hudi.common.table.timeline.HoodieTimeline.INDEXING_ACTION;
import static org.apache.hudi.common.table.timeline.HoodieTimeline.RESTORE_ACTION;
import static org.apache.hudi.common.table.timeline.HoodieTimeline.ROLLBACK_ACTION;
import static org.apache.hudi.config.HoodieWriteConfig.WRITE_CONCURRENCY_MODE;
import static org.apache.hudi.metadata.HoodieTableMetadata.getMetadataTableBasePath;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.deleteMetadataPartition;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getCompletedMetadataPartitions;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getInflightAndCompletedMetadataPartitions;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getInflightMetadataPartitions;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.metadataPartitionExists;
/**
* Reads the index plan and executes the plan.
* It also reconciles updates on data timeline while indexing was in progress.
*/
public class RunIndexActionExecutor<T extends HoodieRecordPayload, I, K, O> extends BaseActionExecutor<T, I, K, O, Option<HoodieIndexCommitMetadata>> {
private static final Logger LOG = LogManager.getLogger(RunIndexActionExecutor.class);
private static final Integer INDEX_COMMIT_METADATA_VERSION_1 = 1;
private static final Integer LATEST_INDEX_COMMIT_METADATA_VERSION = INDEX_COMMIT_METADATA_VERSION_1;
private static final int MAX_CONCURRENT_INDEXING = 1;
private static final int TIMELINE_RELOAD_INTERVAL_MILLIS = 5000;
// we use this to update the latest instant in data timeline that has been indexed in metadata table
// this needs to be volatile as it can be updated in the IndexingCheckTask spawned by this executor
// assumption is that only one indexer can execute at a time
private volatile String currentCaughtupInstant;
private final TransactionManager txnManager;
public RunIndexActionExecutor(HoodieEngineContext context, HoodieWriteConfig config, HoodieTable<T, I, K, O> table, String instantTime) {
super(context, config, table, instantTime);
this.txnManager = new TransactionManager(config, table.getMetaClient().getFs());
}
@Override
public Option<HoodieIndexCommitMetadata> execute() {
HoodieTimer indexTimer = new HoodieTimer();
indexTimer.startTimer();
HoodieInstant indexInstant = validateAndGetIndexInstant();
// read HoodieIndexPlan
HoodieIndexPlan indexPlan;
try {
indexPlan = TimelineMetadataUtils.deserializeIndexPlan(table.getActiveTimeline().readIndexPlanAsBytes(indexInstant).get());
} catch (IOException e) {
throw new HoodieIndexException("Failed to read the index plan for instant: " + indexInstant);
}
List<HoodieIndexPartitionInfo> indexPartitionInfos = indexPlan.getIndexPartitionInfos();
try {
if (indexPartitionInfos == null || indexPartitionInfos.isEmpty()) {
throw new HoodieIndexException(String.format("No partitions to index for instant: %s", instantTime));
}
// ensure the metadata partitions for the requested indexes are not already available (or inflight)
Set<String> indexesInflightOrCompleted = getInflightAndCompletedMetadataPartitions(table.getMetaClient().getTableConfig());
Set<String> requestedPartitions = indexPartitionInfos.stream()
.map(HoodieIndexPartitionInfo::getMetadataPartitionPath).collect(Collectors.toSet());
requestedPartitions.retainAll(indexesInflightOrCompleted);
if (!requestedPartitions.isEmpty()) {
throw new HoodieIndexException(String.format("Following partitions already exist or inflight: %s", requestedPartitions));
}
// transition requested indexInstant to inflight
table.getActiveTimeline().transitionIndexRequestedToInflight(indexInstant, Option.empty());
// start indexing for each partition
HoodieTableMetadataWriter metadataWriter = table.getMetadataWriter(instantTime)
.orElseThrow(() -> new HoodieIndexException(String.format("Could not get metadata writer to run index action for instant: %s", instantTime)));
// this will only build index upto base instant as generated by the plan, we will be doing catchup later
String indexUptoInstant = indexPartitionInfos.get(0).getIndexUptoInstant();
LOG.info("Starting Index Building with base instant: " + indexUptoInstant);
metadataWriter.buildMetadataPartitions(context, indexPartitionInfos);
// get remaining instants to catchup
List<HoodieInstant> instantsToCatchup = getInstantsToCatchup(indexUptoInstant);
LOG.info("Total remaining instants to index: " + instantsToCatchup.size());
// reconcile with metadata table timeline
String metadataBasePath = getMetadataTableBasePath(table.getMetaClient().getBasePath());
HoodieTableMetaClient metadataMetaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(metadataBasePath).build();
Set<String> metadataCompletedTimestamps = getCompletedArchivedAndActiveInstantsAfter(indexUptoInstant, metadataMetaClient).stream()
.map(HoodieInstant::getTimestamp).collect(Collectors.toSet());
// index catchup for all remaining instants with a timeout
currentCaughtupInstant = indexUptoInstant;
catchupWithInflightWriters(metadataWriter, instantsToCatchup, metadataMetaClient, metadataCompletedTimestamps);
// save index commit metadata and update table config
List<HoodieIndexPartitionInfo> finalIndexPartitionInfos = indexPartitionInfos.stream()
.map(info -> new HoodieIndexPartitionInfo(
info.getVersion(),
info.getMetadataPartitionPath(),
currentCaughtupInstant))
.collect(Collectors.toList());
HoodieIndexCommitMetadata indexCommitMetadata = HoodieIndexCommitMetadata.newBuilder()
.setVersion(LATEST_INDEX_COMMIT_METADATA_VERSION).setIndexPartitionInfos(finalIndexPartitionInfos).build();
updateTableConfigAndTimeline(indexInstant, finalIndexPartitionInfos, indexCommitMetadata);
return Option.of(indexCommitMetadata);
} catch (IOException e) {
// abort gracefully
abort(indexInstant, indexPartitionInfos.stream().map(HoodieIndexPartitionInfo::getMetadataPartitionPath).collect(Collectors.toSet()));
throw new HoodieIndexException(String.format("Unable to index instant: %s", indexInstant));
}
}
private void abort(HoodieInstant indexInstant, Set<String> requestedPartitions) {
Set<String> inflightPartitions = getInflightMetadataPartitions(table.getMetaClient().getTableConfig());
Set<String> completedPartitions = getCompletedMetadataPartitions(table.getMetaClient().getTableConfig());
// update table config
requestedPartitions.forEach(partition -> {
inflightPartitions.remove(partition);
completedPartitions.remove(partition);
});
table.getMetaClient().getTableConfig().setValue(TABLE_METADATA_PARTITIONS_INFLIGHT.key(), String.join(",", inflightPartitions));
table.getMetaClient().getTableConfig().setValue(TABLE_METADATA_PARTITIONS.key(), String.join(",", completedPartitions));
HoodieTableConfig.update(table.getMetaClient().getFs(), new Path(table.getMetaClient().getMetaPath()), table.getMetaClient().getTableConfig().getProps());
// delete metadata partition
requestedPartitions.forEach(partition -> {
MetadataPartitionType partitionType = MetadataPartitionType.valueOf(partition.toUpperCase(Locale.ROOT));
if (metadataPartitionExists(table.getMetaClient().getBasePath(), context, partitionType)) {
deleteMetadataPartition(table.getMetaClient().getBasePath(), context, partitionType);
}
});
// delete inflight instant
table.getMetaClient().reloadActiveTimeline().deleteInstantFileIfExists(HoodieTimeline.getIndexInflightInstant(indexInstant.getTimestamp()));
}
private List<HoodieInstant> getInstantsToCatchup(String indexUptoInstant) {
// since only write timeline was considered while scheduling index, which gives us the indexUpto instant
// here we consider other valid actions to pick catchupStart instant
Set<String> validActions = CollectionUtils.createSet(CLEAN_ACTION, RESTORE_ACTION, ROLLBACK_ACTION);
Option<HoodieInstant> catchupStartInstant = table.getMetaClient().reloadActiveTimeline()
.getTimelineOfActions(validActions)
.filterInflightsAndRequested()
.findInstantsBefore(indexUptoInstant)
.firstInstant();
// get all instants since the plan completed (both from active timeline and archived timeline)
List<HoodieInstant> instantsToIndex;
if (catchupStartInstant.isPresent()) {
instantsToIndex = getRemainingArchivedAndActiveInstantsSince(catchupStartInstant.get().getTimestamp(), table.getMetaClient());
} else {
instantsToIndex = getRemainingArchivedAndActiveInstantsSince(indexUptoInstant, table.getMetaClient());
}
return instantsToIndex;
}
private HoodieInstant validateAndGetIndexInstant() {
// ensure lock provider configured
if (!config.getWriteConcurrencyMode().supportsOptimisticConcurrencyControl() || StringUtils.isNullOrEmpty(config.getLockProviderClass())) {
throw new HoodieIndexException(String.format("Need to set %s as %s and configure lock provider class",
WRITE_CONCURRENCY_MODE.key(), OPTIMISTIC_CONCURRENCY_CONTROL.name()));
}
return table.getActiveTimeline()
.filterPendingIndexTimeline()
.filter(instant -> instant.getTimestamp().equals(instantTime) && REQUESTED.equals(instant.getState()))
.lastInstant()
.orElseThrow(() -> new HoodieIndexException(String.format("No requested index instant found: %s", instantTime)));
}
private void updateTableConfigAndTimeline(HoodieInstant indexInstant,
List<HoodieIndexPartitionInfo> finalIndexPartitionInfos,
HoodieIndexCommitMetadata indexCommitMetadata) throws IOException {
try {
// update the table config and timeline in a lock as there could be another indexer running
txnManager.beginTransaction();
updateMetadataPartitionsTableConfig(table.getMetaClient(),
finalIndexPartitionInfos.stream().map(HoodieIndexPartitionInfo::getMetadataPartitionPath).collect(Collectors.toSet()));
table.getActiveTimeline().saveAsComplete(
new HoodieInstant(true, INDEXING_ACTION, indexInstant.getTimestamp()),
TimelineMetadataUtils.serializeIndexCommitMetadata(indexCommitMetadata));
} finally {
txnManager.endTransaction();
}
}
private void catchupWithInflightWriters(HoodieTableMetadataWriter metadataWriter, List<HoodieInstant> instantsToIndex,
HoodieTableMetaClient metadataMetaClient, Set<String> metadataCompletedTimestamps) {
ExecutorService executorService = Executors.newFixedThreadPool(MAX_CONCURRENT_INDEXING);
Future<?> indexingCatchupTaskFuture = executorService.submit(
new IndexingCatchupTask(metadataWriter, instantsToIndex, metadataCompletedTimestamps, table.getMetaClient(), metadataMetaClient));
try {
LOG.info("Starting index catchup task");
indexingCatchupTaskFuture.get(config.getIndexingCheckTimeoutSeconds(), TimeUnit.SECONDS);
} catch (Exception e) {
indexingCatchupTaskFuture.cancel(true);
throw new HoodieIndexException(String.format("Index catchup failed. Current indexed instant = %s. Aborting!", currentCaughtupInstant), e);
} finally {
executorService.shutdownNow();
}
}
private static List<HoodieInstant> getRemainingArchivedAndActiveInstantsSince(String instant, HoodieTableMetaClient metaClient) {
List<HoodieInstant> remainingInstantsToIndex = metaClient.getArchivedTimeline().getInstants()
.filter(i -> HoodieTimeline.compareTimestamps(i.getTimestamp(), GREATER_THAN_OR_EQUALS, instant))
.filter(i -> !INDEXING_ACTION.equals(i.getAction()))
.collect(Collectors.toList());
remainingInstantsToIndex.addAll(metaClient.getActiveTimeline().findInstantsAfter(instant).getInstants()
.filter(i -> HoodieTimeline.compareTimestamps(i.getTimestamp(), GREATER_THAN_OR_EQUALS, instant))
.filter(i -> !INDEXING_ACTION.equals(i.getAction()))
.collect(Collectors.toList()));
return remainingInstantsToIndex;
}
private static List<HoodieInstant> getCompletedArchivedAndActiveInstantsAfter(String instant, HoodieTableMetaClient metaClient) {
List<HoodieInstant> completedInstants = metaClient.getArchivedTimeline().filterCompletedInstants().findInstantsAfter(instant)
.getInstants().filter(i -> !INDEXING_ACTION.equals(i.getAction())).collect(Collectors.toList());
completedInstants.addAll(metaClient.reloadActiveTimeline().filterCompletedInstants().findInstantsAfter(instant)
.getInstants().filter(i -> !INDEXING_ACTION.equals(i.getAction())).collect(Collectors.toList()));
return completedInstants;
}
private void updateMetadataPartitionsTableConfig(HoodieTableMetaClient metaClient, Set<String> metadataPartitions) {
// remove from inflight and update completed indexes
Set<String> inflightPartitions = getInflightMetadataPartitions(metaClient.getTableConfig());
Set<String> completedPartitions = getCompletedMetadataPartitions(metaClient.getTableConfig());
inflightPartitions.removeAll(metadataPartitions);
completedPartitions.addAll(metadataPartitions);
// update table config
metaClient.getTableConfig().setValue(TABLE_METADATA_PARTITIONS_INFLIGHT.key(), String.join(",", inflightPartitions));
metaClient.getTableConfig().setValue(TABLE_METADATA_PARTITIONS.key(), String.join(",", completedPartitions));
HoodieTableConfig.update(metaClient.getFs(), new Path(metaClient.getMetaPath()), metaClient.getTableConfig().getProps());
}
/**
* Indexing check runs for instants that completed after the base instant (in the index plan).
* It will check if these later instants have logged updates to metadata table or not.
* If not, then it will do the update. If a later instant is inflight, it will wait until it is completed or the task times out.
*/
class IndexingCatchupTask implements Runnable {
private final HoodieTableMetadataWriter metadataWriter;
private final List<HoodieInstant> instantsToIndex;
private final Set<String> metadataCompletedInstants;
private final HoodieTableMetaClient metaClient;
private final HoodieTableMetaClient metadataMetaClient;
IndexingCatchupTask(HoodieTableMetadataWriter metadataWriter,
List<HoodieInstant> instantsToIndex,
Set<String> metadataCompletedInstants,
HoodieTableMetaClient metaClient,
HoodieTableMetaClient metadataMetaClient) {
this.metadataWriter = metadataWriter;
this.instantsToIndex = instantsToIndex;
this.metadataCompletedInstants = metadataCompletedInstants;
this.metaClient = metaClient;
this.metadataMetaClient = metadataMetaClient;
}
@Override
public void run() {
for (HoodieInstant instant : instantsToIndex) {
// metadata index already updated for this instant
if (!metadataCompletedInstants.isEmpty() && metadataCompletedInstants.contains(instant.getTimestamp())) {
currentCaughtupInstant = instant.getTimestamp();
continue;
}
while (!instant.isCompleted()) {
try {
LOG.warn("instant not completed, reloading timeline " + instant);
// reload timeline and fetch instant details again wait until timeout
String instantTime = instant.getTimestamp();
Option<HoodieInstant> currentInstant = metaClient.reloadActiveTimeline()
.filterCompletedInstants().filter(i -> i.getTimestamp().equals(instantTime)).firstInstant();
instant = currentInstant.orElse(instant);
// so that timeline is not reloaded very frequently
Thread.sleep(TIMELINE_RELOAD_INTERVAL_MILLIS);
} catch (InterruptedException e) {
throw new HoodieIndexException(String.format("Thread interrupted while running indexing check for instant: %s", instant), e);
}
}
// if instant completed, ensure that there was metadata commit, else update metadata for this completed instant
if (COMPLETED.equals(instant.getState())) {
String instantTime = instant.getTimestamp();
Option<HoodieInstant> metadataInstant = metadataMetaClient.reloadActiveTimeline()
.filterCompletedInstants().filter(i -> i.getTimestamp().equals(instantTime)).firstInstant();
if (metadataInstant.isPresent()) {
currentCaughtupInstant = instantTime;
continue;
}
try {
// we need take a lock here as inflight writer could also try to update the timeline
txnManager.beginTransaction(Option.of(instant), Option.empty());
LOG.info("Updating metadata table for instant: " + instant);
switch (instant.getAction()) {
// TODO: see if this can be moved to metadata writer itself
case HoodieTimeline.COMMIT_ACTION:
case HoodieTimeline.DELTA_COMMIT_ACTION:
case HoodieTimeline.REPLACE_COMMIT_ACTION:
HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(
table.getActiveTimeline().getInstantDetails(instant).get(), HoodieCommitMetadata.class);
// do not trigger any table service as partition is not fully built out yet
metadataWriter.update(commitMetadata, instant.getTimestamp(), false);
break;
case CLEAN_ACTION:
HoodieCleanMetadata cleanMetadata = CleanerUtils.getCleanerMetadata(table.getMetaClient(), instant);
metadataWriter.update(cleanMetadata, instant.getTimestamp());
break;
case RESTORE_ACTION:
HoodieRestoreMetadata restoreMetadata = TimelineMetadataUtils.deserializeHoodieRestoreMetadata(
table.getActiveTimeline().getInstantDetails(instant).get());
metadataWriter.update(restoreMetadata, instant.getTimestamp());
break;
case ROLLBACK_ACTION:
HoodieRollbackMetadata rollbackMetadata = TimelineMetadataUtils.deserializeHoodieRollbackMetadata(
table.getActiveTimeline().getInstantDetails(instant).get());
metadataWriter.update(rollbackMetadata, instant.getTimestamp());
break;
default:
throw new IllegalStateException("Unexpected value: " + instant.getAction());
}
} catch (IOException e) {
throw new HoodieIndexException(String.format("Could not update metadata partition for instant: %s", instant), e);
} finally {
txnManager.endTransaction(Option.of(instant));
}
}
}
}
}
}

View File

@@ -0,0 +1,152 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hudi.table.action.index;
import org.apache.hudi.avro.model.HoodieIndexPartitionInfo;
import org.apache.hudi.avro.model.HoodieIndexPlan;
import org.apache.hudi.client.transaction.TransactionManager;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.exception.HoodieIndexException;
import org.apache.hudi.metadata.HoodieTableMetadataWriter;
import org.apache.hudi.metadata.MetadataPartitionType;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.action.BaseActionExecutor;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import java.io.IOException;
import java.util.EnumSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import static org.apache.hudi.common.model.WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL;
import static org.apache.hudi.config.HoodieWriteConfig.WRITE_CONCURRENCY_MODE;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.deleteMetadataPartition;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getInflightAndCompletedMetadataPartitions;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.metadataPartitionExists;
/**
* Schedules INDEX action.
* <li>
* 1. Fetch last completed instant on data timeline.
* 2. Write the index plan to the <instant>.index.requested.
* 3. Initialize file groups for the enabled partition types within a transaction.
* </li>
*/
public class ScheduleIndexActionExecutor<T extends HoodieRecordPayload, I, K, O> extends BaseActionExecutor<T, I, K, O, Option<HoodieIndexPlan>> {
private static final Logger LOG = LogManager.getLogger(ScheduleIndexActionExecutor.class);
private static final Integer INDEX_PLAN_VERSION_1 = 1;
private static final Integer LATEST_INDEX_PLAN_VERSION = INDEX_PLAN_VERSION_1;
private final List<MetadataPartitionType> partitionIndexTypes;
private final TransactionManager txnManager;
public ScheduleIndexActionExecutor(HoodieEngineContext context,
HoodieWriteConfig config,
HoodieTable<T, I, K, O> table,
String instantTime,
List<MetadataPartitionType> partitionIndexTypes) {
super(context, config, table, instantTime);
this.partitionIndexTypes = partitionIndexTypes;
this.txnManager = new TransactionManager(config, table.getMetaClient().getFs());
}
@Override
public Option<HoodieIndexPlan> execute() {
validateBeforeScheduling();
// make sure that it is idempotent, check with previously pending index operations.
Set<String> indexesInflightOrCompleted = getInflightAndCompletedMetadataPartitions(table.getMetaClient().getTableConfig());
Set<String> requestedPartitions = partitionIndexTypes.stream().map(MetadataPartitionType::getPartitionPath).collect(Collectors.toSet());
requestedPartitions.removeAll(indexesInflightOrCompleted);
if (!requestedPartitions.isEmpty()) {
LOG.warn(String.format("Following partitions already exist or inflight: %s. Going to index only these partitions: %s",
indexesInflightOrCompleted, requestedPartitions));
} else {
LOG.error("All requested index types are inflight or completed: " + partitionIndexTypes);
return Option.empty();
}
List<MetadataPartitionType> finalPartitionsToIndex = partitionIndexTypes.stream()
.filter(p -> requestedPartitions.contains(p.getPartitionPath())).collect(Collectors.toList());
final HoodieInstant indexInstant = HoodieTimeline.getIndexRequestedInstant(instantTime);
try {
this.txnManager.beginTransaction(Option.of(indexInstant), Option.empty());
// get last completed instant
Option<HoodieInstant> indexUptoInstant = table.getActiveTimeline().getContiguousCompletedWriteTimeline().lastInstant();
if (indexUptoInstant.isPresent()) {
// start initializing file groups
// in case FILES partition itself was not initialized before (i.e. metadata was never enabled), this will initialize synchronously
HoodieTableMetadataWriter metadataWriter = table.getMetadataWriter(instantTime)
.orElseThrow(() -> new HoodieIndexException(String.format("Could not get metadata writer to initialize filegroups for indexing for instant: %s", instantTime)));
metadataWriter.initializeMetadataPartitions(table.getMetaClient(), finalPartitionsToIndex, indexInstant.getTimestamp());
// for each partitionToIndex add that time to the plan
List<HoodieIndexPartitionInfo> indexPartitionInfos = finalPartitionsToIndex.stream()
.map(p -> new HoodieIndexPartitionInfo(LATEST_INDEX_PLAN_VERSION, p.getPartitionPath(), indexUptoInstant.get().getTimestamp()))
.collect(Collectors.toList());
HoodieIndexPlan indexPlan = new HoodieIndexPlan(LATEST_INDEX_PLAN_VERSION, indexPartitionInfos);
// update data timeline with requested instant
table.getActiveTimeline().saveToPendingIndexAction(indexInstant, TimelineMetadataUtils.serializeIndexPlan(indexPlan));
return Option.of(indexPlan);
}
} catch (IOException e) {
LOG.error("Could not initialize file groups", e);
// abort gracefully
abort(indexInstant);
throw new HoodieIOException(e.getMessage(), e);
} finally {
this.txnManager.endTransaction(Option.of(indexInstant));
}
return Option.empty();
}
private void validateBeforeScheduling() {
if (!EnumSet.allOf(MetadataPartitionType.class).containsAll(partitionIndexTypes)) {
throw new HoodieIndexException("Not all index types are valid: " + partitionIndexTypes);
}
// ensure lock provider configured
if (!config.getWriteConcurrencyMode().supportsOptimisticConcurrencyControl() || StringUtils.isNullOrEmpty(config.getLockProviderClass())) {
throw new HoodieIndexException(String.format("Need to set %s as %s and configure lock provider class",
WRITE_CONCURRENCY_MODE.key(), OPTIMISTIC_CONCURRENCY_CONTROL.name()));
}
}
private void abort(HoodieInstant indexInstant) {
// delete metadata partition
partitionIndexTypes.forEach(partitionType -> {
if (metadataPartitionExists(table.getMetaClient().getBasePath(), context, partitionType)) {
deleteMetadataPartition(table.getMetaClient().getBasePath(), context, partitionType);
}
});
// delete requested instant
table.getMetaClient().reloadActiveTimeline().deleteInstantFileIfExists(indexInstant);
}
}

View File

@@ -22,6 +22,7 @@ package org.apache.hudi.table.upgrade;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.metadata.HoodieTableMetadataUtil;
import java.util.Collections;
import java.util.Map;
@@ -33,6 +34,11 @@ public class FourToThreeDowngradeHandler implements DowngradeHandler {
@Override
public Map<ConfigProperty, String> downgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime, SupportsUpgradeDowngrade upgradeDowngradeHelper) {
if (config.isMetadataTableEnabled()) {
// Metadata Table in version 4 has a schema that is not forward compatible.
// Hence, it is safe to delete the metadata table, which will be re-initialized in subsequent commit.
HoodieTableMetadataUtil.deleteMetadataTable(config.getBasePath(), context);
}
return Collections.emptyMap();
}
}

View File

@@ -23,10 +23,15 @@ import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.metadata.MetadataPartitionType;
import java.util.Hashtable;
import java.util.Map;
import static org.apache.hudi.common.table.HoodieTableConfig.TABLE_CHECKSUM;
import static org.apache.hudi.common.table.HoodieTableConfig.TABLE_METADATA_PARTITIONS;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.metadataPartitionExists;
/**
* UpgradeHandler to assist in upgrading {@link org.apache.hudi.table.HoodieTable} from version 3 to 4.
*/
@@ -35,7 +40,12 @@ public class ThreeToFourUpgradeHandler implements UpgradeHandler {
@Override
public Map<ConfigProperty, String> upgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime, SupportsUpgradeDowngrade upgradeDowngradeHelper) {
Map<ConfigProperty, String> tablePropsToAdd = new Hashtable<>();
tablePropsToAdd.put(HoodieTableConfig.TABLE_CHECKSUM, String.valueOf(HoodieTableConfig.generateChecksum(config.getProps())));
tablePropsToAdd.put(TABLE_CHECKSUM, String.valueOf(HoodieTableConfig.generateChecksum(config.getProps())));
// if metadata is enabled and files partition exist then update TABLE_METADATA_INDEX_COMPLETED
// schema for the files partition is same between the two versions
if (config.isMetadataTableEnabled() && metadataPartitionExists(config.getBasePath(), context, MetadataPartitionType.FILES)) {
tablePropsToAdd.put(TABLE_METADATA_PARTITIONS, MetadataPartitionType.FILES.getPartitionPath());
}
return tablePropsToAdd;
}
}

View File

@@ -22,6 +22,8 @@ import org.apache.hudi.avro.model.HoodieCleanMetadata;
import org.apache.hudi.avro.model.HoodieCleanerPlan;
import org.apache.hudi.avro.model.HoodieClusteringPlan;
import org.apache.hudi.avro.model.HoodieCompactionPlan;
import org.apache.hudi.avro.model.HoodieIndexCommitMetadata;
import org.apache.hudi.avro.model.HoodieIndexPlan;
import org.apache.hudi.avro.model.HoodieRestoreMetadata;
import org.apache.hudi.avro.model.HoodieRestorePlan;
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
@@ -48,6 +50,7 @@ import org.apache.hudi.io.HoodieSortedMergeHandle;
import org.apache.hudi.io.HoodieWriteHandle;
import org.apache.hudi.keygen.BaseKeyGenerator;
import org.apache.hudi.keygen.factory.HoodieAvroKeyGeneratorFactory;
import org.apache.hudi.metadata.MetadataPartitionType;
import org.apache.hudi.table.action.HoodieWriteMetadata;
import org.apache.hudi.table.action.bootstrap.HoodieBootstrapWriteMetadata;
import org.apache.hudi.table.action.clean.CleanActionExecutor;
@@ -330,6 +333,16 @@ public class HoodieFlinkCopyOnWriteTable<T extends HoodieRecordPayload>
return new CopyOnWriteRollbackActionExecutor(context, config, this, rollbackInstantTime, commitInstant, deleteInstants, skipLocking).execute();
}
@Override
public Option<HoodieIndexPlan> scheduleIndexing(HoodieEngineContext context, String indexInstantTime, List<MetadataPartitionType> partitionsToIndex) {
throw new HoodieNotSupportedException("Metadata indexing is not supported for a Flink table yet.");
}
@Override
public Option<HoodieIndexCommitMetadata> index(HoodieEngineContext context, String indexInstantTime) {
throw new HoodieNotSupportedException("Metadata indexing is not supported for a Flink table yet.");
}
@Override
public HoodieSavepointMetadata savepoint(HoodieEngineContext context, String instantToSavepoint, String user, String comment) {
throw new HoodieNotSupportedException("Savepoint is not supported yet");

View File

@@ -22,6 +22,8 @@ import org.apache.hudi.avro.model.HoodieCleanMetadata;
import org.apache.hudi.avro.model.HoodieCleanerPlan;
import org.apache.hudi.avro.model.HoodieClusteringPlan;
import org.apache.hudi.avro.model.HoodieCompactionPlan;
import org.apache.hudi.avro.model.HoodieIndexCommitMetadata;
import org.apache.hudi.avro.model.HoodieIndexPlan;
import org.apache.hudi.avro.model.HoodieRestoreMetadata;
import org.apache.hudi.avro.model.HoodieRestorePlan;
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
@@ -44,6 +46,7 @@ import org.apache.hudi.exception.HoodieUpsertException;
import org.apache.hudi.io.HoodieCreateHandle;
import org.apache.hudi.io.HoodieMergeHandle;
import org.apache.hudi.io.HoodieSortedMergeHandle;
import org.apache.hudi.metadata.MetadataPartitionType;
import org.apache.hudi.table.action.HoodieWriteMetadata;
import org.apache.hudi.table.action.bootstrap.HoodieBootstrapWriteMetadata;
import org.apache.hudi.table.action.clean.CleanActionExecutor;
@@ -60,6 +63,8 @@ import org.apache.hudi.table.action.commit.JavaInsertPreppedCommitActionExecutor
import org.apache.hudi.table.action.commit.JavaMergeHelper;
import org.apache.hudi.table.action.commit.JavaUpsertCommitActionExecutor;
import org.apache.hudi.table.action.commit.JavaUpsertPreppedCommitActionExecutor;
import org.apache.hudi.table.action.index.RunIndexActionExecutor;
import org.apache.hudi.table.action.index.ScheduleIndexActionExecutor;
import org.apache.hudi.table.action.restore.CopyOnWriteRestoreActionExecutor;
import org.apache.hudi.table.action.rollback.BaseRollbackPlanActionExecutor;
import org.apache.hudi.table.action.rollback.CopyOnWriteRollbackActionExecutor;
@@ -232,6 +237,16 @@ public class HoodieJavaCopyOnWriteTable<T extends HoodieRecordPayload>
context, config, this, rollbackInstantTime, commitInstant, deleteInstants, skipLocking).execute();
}
@Override
public Option<HoodieIndexPlan> scheduleIndexing(HoodieEngineContext context, String indexInstantTime, List<MetadataPartitionType> partitionsToIndex) {
return new ScheduleIndexActionExecutor<>(context, config, this, indexInstantTime, partitionsToIndex).execute();
}
@Override
public Option<HoodieIndexCommitMetadata> index(HoodieEngineContext context, String indexInstantTime) {
return new RunIndexActionExecutor<>(context, config, this, indexInstantTime).execute();
}
@Override
public HoodieSavepointMetadata savepoint(HoodieEngineContext context,
String instantToSavepoint,

View File

@@ -22,6 +22,8 @@ import org.apache.hudi.avro.model.HoodieCleanMetadata;
import org.apache.hudi.avro.model.HoodieCleanerPlan;
import org.apache.hudi.avro.model.HoodieClusteringPlan;
import org.apache.hudi.avro.model.HoodieCompactionPlan;
import org.apache.hudi.avro.model.HoodieIndexCommitMetadata;
import org.apache.hudi.avro.model.HoodieIndexPlan;
import org.apache.hudi.avro.model.HoodieRestoreMetadata;
import org.apache.hudi.avro.model.HoodieRestorePlan;
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
@@ -49,6 +51,7 @@ import org.apache.hudi.io.HoodieMergeHandle;
import org.apache.hudi.io.HoodieSortedMergeHandle;
import org.apache.hudi.keygen.BaseKeyGenerator;
import org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory;
import org.apache.hudi.metadata.MetadataPartitionType;
import org.apache.hudi.table.action.HoodieWriteMetadata;
import org.apache.hudi.table.action.bootstrap.HoodieBootstrapWriteMetadata;
import org.apache.hudi.table.action.bootstrap.SparkBootstrapCommitActionExecutor;
@@ -67,6 +70,8 @@ import org.apache.hudi.table.action.commit.SparkInsertOverwriteTableCommitAction
import org.apache.hudi.table.action.commit.SparkInsertPreppedCommitActionExecutor;
import org.apache.hudi.table.action.commit.SparkUpsertCommitActionExecutor;
import org.apache.hudi.table.action.commit.SparkUpsertPreppedCommitActionExecutor;
import org.apache.hudi.table.action.index.RunIndexActionExecutor;
import org.apache.hudi.table.action.index.ScheduleIndexActionExecutor;
import org.apache.hudi.table.action.restore.CopyOnWriteRestoreActionExecutor;
import org.apache.hudi.table.action.rollback.BaseRollbackPlanActionExecutor;
import org.apache.hudi.table.action.rollback.CopyOnWriteRollbackActionExecutor;
@@ -276,6 +281,16 @@ public class HoodieSparkCopyOnWriteTable<T extends HoodieRecordPayload>
deleteInstants, skipLocking).execute();
}
@Override
public Option<HoodieIndexPlan> scheduleIndexing(HoodieEngineContext context, String indexInstantTime, List<MetadataPartitionType> partitionsToIndex) {
return new ScheduleIndexActionExecutor<>(context, config, this, indexInstantTime, partitionsToIndex).execute();
}
@Override
public Option<HoodieIndexCommitMetadata> index(HoodieEngineContext context, String indexInstantTime) {
return new RunIndexActionExecutor<>(context, config, this, indexInstantTime).execute();
}
@Override
public HoodieSavepointMetadata savepoint(HoodieEngineContext context, String instantToSavepoint, String user, String comment) {
return new SavepointActionExecutor<>(context, config, this, instantToSavepoint, user, comment).execute();