1
0

[HUDI-2285][HUDI-2476] Metadata table synchronous design. Rebased and Squashed from pull/3426 (#3590)

* [HUDI-2285] Adding Synchronous updates to metadata before completion of commits in data timelime.

- This patch adds synchronous updates to metadata table. In other words, every write is first committed to metadata table followed by data table. While reading metadata table, we ignore any delta commits that are present only in metadata table and not in data table timeline.
- Compaction of metadata table is fenced by the condition that we trigger compaction only when there are no inflight requests in datatable. This ensures that all base files in metadata table is always in sync with data table(w/o any holes) and only there could be some extra invalid commits among delta log files in metadata table.
- Due to this, archival of data table also fences itself up until compacted instant in metadata table.
All writes to metadata table happens within the datatable lock. So, metadata table works in one writer mode only. This might be tough to loosen since all writers write to same FILES partition and so, will result in a conflict anyways.
- As part of this, have added acquiring locks in data table for those operations which were not before while committing (rollback, clean, compaction, cluster). To note, we were not doing any conflict resolution. All we are doing here is to commit by taking a lock. So that all writes to metadata table is always a single writer. 
- Also added building block to add buckets for partitions, which will be leveraged by other indexes like record level index, etc. For now, FILES partition has only one bucket. In general, any number of buckets per partition is allowed and each partition has a fixed fileId prefix with incremental suffix for each bucket within each partition.
Have fixed [HUDI-2476]. This fix is about retrying a failed compaction if it succeeded in metadata for first time, but failed w/ data table.
- Enabling metadata table by default.
- Adding more tests for metadata table

Co-authored-by: Prashant Wason <pwason@uber.com>
This commit is contained in:
Sivabalan Narayanan
2021-10-06 00:17:52 -04:00
committed by GitHub
parent 46808dcb1f
commit 5f32162a2f
101 changed files with 3329 additions and 2069 deletions

View File

@@ -61,6 +61,7 @@ import org.apache.hudi.exception.HoodieRestoreException;
import org.apache.hudi.exception.HoodieRollbackException;
import org.apache.hudi.exception.HoodieSavepointException;
import org.apache.hudi.index.HoodieIndex;
import org.apache.hudi.metadata.HoodieTableMetadataWriter;
import org.apache.hudi.metrics.HoodieMetrics;
import org.apache.hudi.table.BulkInsertPartitioner;
import org.apache.hudi.table.HoodieTable;
@@ -241,13 +242,16 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload, I
}
}
/**
* Any pre-commit actions like conflict resolution or updating metadata table goes here.
* @param instantTime commit instant time.
* @param metadata commit metadata for which pre commit is being invoked.
*/
protected void preCommit(String instantTime, HoodieCommitMetadata metadata) {
// no-op
// TODO : Conflict resolution is not supported for Flink & Java engines
}
protected void syncTableMetadata() {
// no-op
// Create a Hoodie table after starting the transaction which encapsulated the commits and files visible.
// Important to create this after the lock to ensure latest commits show up in the timeline without need for reload
HoodieTable table = createTable(config, hadoopConf);
table.getMetadataWriter().ifPresent(w -> ((HoodieTableMetadataWriter)w).update(metadata, instantTime));
}
/**
@@ -404,16 +408,6 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload, I
HoodieTableMetaClient metaClient) {
setOperationType(writeOperationType);
this.lastCompletedTxnAndMetadata = TransactionUtils.getLastCompletedTxnInstantAndMetadata(metaClient);
this.txnManager.beginTransaction(Option.of(new HoodieInstant(State.INFLIGHT, metaClient.getCommitActionType(), instantTime)), lastCompletedTxnAndMetadata
.isPresent()
? Option.of(lastCompletedTxnAndMetadata.get().getLeft()) : Option.empty());
try {
if (writeOperationType != WriteOperationType.CLUSTER && writeOperationType != WriteOperationType.COMPACT) {
syncTableMetadata();
}
} finally {
this.txnManager.endTransaction();
}
this.asyncCleanerService = AsyncCleanerService.startAsyncCleaningIfEnabled(this);
}
@@ -443,9 +437,6 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload, I
// We cannot have unbounded commit files. Archive commits if we have to archive
HoodieTimelineArchiveLog archiveLog = new HoodieTimelineArchiveLog(config, table);
archiveLog.archiveIfRequired(context);
if (operationType != null && operationType != WriteOperationType.CLUSTER && operationType != WriteOperationType.COMPACT) {
syncTableMetadata();
}
} catch (IOException ioe) {
throw new HoodieIOException(ioe.getMessage(), ioe);
} finally {

View File

@@ -58,7 +58,7 @@ public class TransactionUtils {
if (config.getWriteConcurrencyMode().supportsOptimisticConcurrencyControl()) {
ConflictResolutionStrategy resolutionStrategy = config.getWriteConflictResolutionStrategy();
Stream<HoodieInstant> instantStream = resolutionStrategy.getCandidateInstants(table.getActiveTimeline(), currentTxnOwnerInstant.get(), lastCompletedTxnOwnerInstant);
final ConcurrentOperation thisOperation = new ConcurrentOperation(currentTxnOwnerInstant.get(), thisCommitMetadata.get());
final ConcurrentOperation thisOperation = new ConcurrentOperation(currentTxnOwnerInstant.get(), thisCommitMetadata.orElse(new HoodieCommitMetadata()));
instantStream.forEach(instant -> {
try {
ConcurrentOperation otherOperation = new ConcurrentOperation(instant, table.getMetaClient());

View File

@@ -1679,10 +1679,6 @@ public class HoodieWriteConfig extends HoodieConfig {
return metadataConfig.enabled();
}
public boolean getFileListingMetadataVerify() {
return metadataConfig.validateFileListingMetadata();
}
public int getMetadataInsertParallelism() {
return getInt(HoodieMetadataConfig.INSERT_PARALLELISM_VALUE);
}

View File

@@ -19,7 +19,6 @@
package org.apache.hudi.metadata;
import org.apache.hudi.avro.model.HoodieCleanMetadata;
import org.apache.hudi.avro.model.HoodieCleanerPlan;
import org.apache.hudi.avro.model.HoodieMetadataRecord;
import org.apache.hudi.avro.model.HoodieRestoreMetadata;
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
@@ -32,12 +31,17 @@ import org.apache.hudi.common.model.HoodieCleaningPolicy;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
import org.apache.hudi.common.model.HoodieFileFormat;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodiePartitionMetadata;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.model.WriteConcurrencyMode;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.log.HoodieLogFormat;
import org.apache.hudi.common.table.log.block.HoodieDeleteBlock;
import org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
@@ -51,7 +55,6 @@ import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.config.metrics.HoodieMetricsGraphiteConfig;
import org.apache.hudi.config.metrics.HoodieMetricsJmxConfig;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.exception.HoodieMetadataException;
import org.apache.hadoop.conf.Configuration;
@@ -83,11 +86,12 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
private static final Logger LOG = LogManager.getLogger(HoodieBackedTableMetadataWriter.class);
protected HoodieWriteConfig metadataWriteConfig;
protected HoodieWriteConfig datasetWriteConfig;
protected HoodieWriteConfig dataWriteConfig;
protected String tableName;
protected HoodieBackedTableMetadata metadata;
protected HoodieTableMetaClient metaClient;
protected HoodieTableMetaClient metadataMetaClient;
protected HoodieTableMetaClient dataMetaClient;
protected Option<HoodieMetadataMetrics> metrics;
protected boolean enabled;
protected SerializableConfiguration hadoopConf;
@@ -95,7 +99,7 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
protected HoodieBackedTableMetadataWriter(Configuration hadoopConf, HoodieWriteConfig writeConfig,
HoodieEngineContext engineContext) {
this.datasetWriteConfig = writeConfig;
this.dataWriteConfig = writeConfig;
this.engineContext = engineContext;
this.hadoopConf = new SerializableConfiguration(hadoopConf);
@@ -112,17 +116,9 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
ValidationUtils.checkArgument(!this.metadataWriteConfig.isMetadataTableEnabled(), "File listing cannot be used for Metadata Table");
initRegistry();
HoodieTableMetaClient datasetMetaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(datasetWriteConfig.getBasePath()).build();
initialize(engineContext, datasetMetaClient);
if (enabled) {
// This is always called even in case the table was created for the first time. This is because
// initFromFilesystem() does file listing and hence may take a long time during which some new updates
// may have occurred on the table. Hence, calling this always ensures that the metadata is brought in sync
// with the active timeline.
HoodieTimer timer = new HoodieTimer().startTimer();
syncFromInstants(datasetMetaClient);
metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.SYNC_STR, timer.endTimer()));
}
this.dataMetaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(dataWriteConfig.getBasePath()).build();
initialize(engineContext);
initTableMetadata();
} else {
enabled = false;
this.metrics = Option.empty();
@@ -165,7 +161,7 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
.withAutoClean(false)
.withCleanerParallelism(parallelism)
.withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS)
.withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.EAGER)
.withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.LAZY)
.retainCommits(writeConfig.getMetadataCleanerCommitsRetained())
.archiveCommitsWith(minCommitsToKeep, maxCommitsToKeep)
// we will trigger compaction manually, to control the instant times
@@ -174,7 +170,8 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
.withParallelism(parallelism, parallelism)
.withDeleteParallelism(parallelism)
.withRollbackParallelism(parallelism)
.withFinalizeWriteParallelism(parallelism);
.withFinalizeWriteParallelism(parallelism)
.withAllowMultiWriteOnSameInstant(true);
if (writeConfig.isMetricsOn()) {
builder.withMetricsConfig(HoodieMetricsConfig.newBuilder()
@@ -216,48 +213,43 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
}
/**
* Initialize the metadata table if it does not exist. Update the metadata to bring it in sync with the file system.
* Initialize the metadata table if it does not exist.
*
* This can happen in two ways:
* 1. If the metadata table did not exist, then file and partition listing is used
* 2. If the metadata table exists, the instants from active timeline are read in order and changes applied
*
* The above logic has been chosen because it is faster to perform #1 at scale rather than read all the Instants
* which are large in size (AVRO or JSON encoded and not compressed) and incur considerable IO for de-serialization
* and decoding.
* If the metadata table did not exist, then file and partition listing is used to bootstrap the table.
*/
protected abstract void initialize(HoodieEngineContext engineContext, HoodieTableMetaClient datasetMetaClient);
protected abstract void initialize(HoodieEngineContext engineContext);
protected void initTableMetadata() {
try {
if (this.metadata != null) {
this.metadata.close();
}
this.metadata = new HoodieBackedTableMetadata(engineContext, datasetWriteConfig.getMetadataConfig(),
datasetWriteConfig.getBasePath(), datasetWriteConfig.getSpillableMapBasePath());
this.metaClient = metadata.getMetaClient();
this.metadata = new HoodieBackedTableMetadata(engineContext, dataWriteConfig.getMetadataConfig(),
dataWriteConfig.getBasePath(), dataWriteConfig.getSpillableMapBasePath());
this.metadataMetaClient = metadata.getMetadataMetaClient();
} catch (Exception e) {
throw new HoodieException("Error initializing metadata table for reads", e);
}
}
protected void bootstrapIfNeeded(HoodieEngineContext engineContext, HoodieTableMetaClient datasetMetaClient) throws IOException {
protected void bootstrapIfNeeded(HoodieEngineContext engineContext, HoodieTableMetaClient dataMetaClient) throws IOException {
HoodieTimer timer = new HoodieTimer().startTimer();
boolean exists = datasetMetaClient.getFs().exists(new Path(metadataWriteConfig.getBasePath(), HoodieTableMetaClient.METAFOLDER_NAME));
boolean exists = dataMetaClient.getFs().exists(new Path(metadataWriteConfig.getBasePath(), HoodieTableMetaClient.METAFOLDER_NAME));
boolean rebootstrap = false;
if (exists) {
// If the un-synched instants have been archived then the metadata table will need to be bootstrapped again
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf.get())
HoodieTableMetaClient metadataMetaClient = HoodieTableMetaClient.builder().setConf(hadoopConf.get())
.setBasePath(metadataWriteConfig.getBasePath()).build();
Option<HoodieInstant> latestMetadataInstant = metaClient.getActiveTimeline().filterCompletedInstants().lastInstant();
Option<HoodieInstant> latestMetadataInstant = metadataMetaClient.getActiveTimeline().filterCompletedInstants().lastInstant();
if (!latestMetadataInstant.isPresent()) {
LOG.warn("Metadata Table will need to be re-bootstrapped as no instants were found");
rebootstrap = true;
} else if (!latestMetadataInstant.get().getTimestamp().equals(SOLO_COMMIT_TIMESTAMP)
&& datasetMetaClient.getActiveTimeline().isBeforeTimelineStarts(latestMetadataInstant.get().getTimestamp())) {
&& dataMetaClient.getActiveTimeline().getAllCommitsTimeline().isBeforeTimelineStarts(latestMetadataInstant.get().getTimestamp())) {
// TODO: Revisit this logic and validate that filtering for all commits timeline is the right thing to do
LOG.warn("Metadata Table will need to be re-bootstrapped as un-synced instants have been archived."
+ " latestMetadataInstant=" + latestMetadataInstant.get().getTimestamp()
+ ", latestDatasetInstant=" + datasetMetaClient.getActiveTimeline().firstInstant().get().getTimestamp());
+ ", latestDataInstant=" + dataMetaClient.getActiveTimeline().firstInstant().get().getTimestamp());
rebootstrap = true;
}
}
@@ -265,13 +257,13 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
if (rebootstrap) {
metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.REBOOTSTRAP_STR, 1));
LOG.info("Deleting Metadata Table directory so that it can be re-bootstrapped");
datasetMetaClient.getFs().delete(new Path(metadataWriteConfig.getBasePath()), true);
dataMetaClient.getFs().delete(new Path(metadataWriteConfig.getBasePath()), true);
exists = false;
}
if (!exists) {
// Initialize for the first time by listing partitions and files directly from the file system
if (bootstrapFromFilesystem(engineContext, datasetMetaClient)) {
if (bootstrapFromFilesystem(engineContext, dataMetaClient)) {
metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.INITIALIZE_STR, timer.endTimer()));
}
}
@@ -280,23 +272,23 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
/**
* Initialize the Metadata Table by listing files and partitions from the file system.
*
* @param datasetMetaClient {@code HoodieTableMetaClient} for the dataset
* @param dataMetaClient {@code HoodieTableMetaClient} for the dataset.
*/
private boolean bootstrapFromFilesystem(HoodieEngineContext engineContext, HoodieTableMetaClient datasetMetaClient) throws IOException {
private boolean bootstrapFromFilesystem(HoodieEngineContext engineContext, HoodieTableMetaClient dataMetaClient) throws IOException {
ValidationUtils.checkState(enabled, "Metadata table cannot be initialized as it is not enabled");
// We can only bootstrap if there are no pending operations on the dataset
Option<HoodieInstant> pendingInstantOption = Option.fromJavaOptional(datasetMetaClient.getActiveTimeline()
Option<HoodieInstant> pendingDataInstant = Option.fromJavaOptional(dataMetaClient.getActiveTimeline()
.getReverseOrderedInstants().filter(i -> !i.isCompleted()).findFirst());
if (pendingInstantOption.isPresent()) {
if (pendingDataInstant.isPresent()) {
metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.BOOTSTRAP_ERR_STR, 1));
LOG.warn("Cannot bootstrap metadata table as operation is in progress: " + pendingInstantOption.get());
LOG.warn("Cannot bootstrap metadata table as operation is in progress in dataset: " + pendingDataInstant.get());
return false;
}
// If there is no commit on the dataset yet, use the SOLO_COMMIT_TIMESTAMP as the instant time for initial commit
// Otherwise, we use the latest commit timestamp.
String createInstantTime = datasetMetaClient.getActiveTimeline().getReverseOrderedInstants().findFirst()
String createInstantTime = dataMetaClient.getActiveTimeline().getReverseOrderedInstants().findFirst()
.map(HoodieInstant::getTimestamp).orElse(SOLO_COMMIT_TIMESTAMP);
LOG.info("Creating a new metadata table in " + metadataWriteConfig.getBasePath() + " at instant " + createInstantTime);
@@ -309,10 +301,11 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
.initTable(hadoopConf.get(), metadataWriteConfig.getBasePath());
initTableMetadata();
initializeFileGroups(dataMetaClient, MetadataPartitionType.FILES, createInstantTime, 1);
// List all partitions in the basePath of the containing dataset
LOG.info("Initializing metadata table by using file listings in " + datasetWriteConfig.getBasePath());
Map<String, List<FileStatus>> partitionToFileStatus = getPartitionsToFilesMapping(datasetMetaClient);
LOG.info("Initializing metadata table by using file listings in " + dataWriteConfig.getBasePath());
Map<String, List<FileStatus>> partitionToFileStatus = getPartitionsToFilesMapping(dataMetaClient);
// Create a HoodieCommitMetadata with writeStats for all discovered files
int[] stats = {0};
@@ -349,17 +342,17 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
/**
* Function to find hoodie partitions and list files in them in parallel.
*
* @param datasetMetaClient
* @param dataMetaClient
* @return Map of partition names to a list of FileStatus for all the files in the partition
*/
private Map<String, List<FileStatus>> getPartitionsToFilesMapping(HoodieTableMetaClient datasetMetaClient) {
private Map<String, List<FileStatus>> getPartitionsToFilesMapping(HoodieTableMetaClient dataMetaClient) {
List<Path> pathsToList = new LinkedList<>();
pathsToList.add(new Path(datasetWriteConfig.getBasePath()));
pathsToList.add(new Path(dataWriteConfig.getBasePath()));
Map<String, List<FileStatus>> partitionToFileStatus = new HashMap<>();
final int fileListingParallelism = metadataWriteConfig.getFileListingParallelism();
SerializableConfiguration conf = new SerializableConfiguration(datasetMetaClient.getHadoopConf());
final String dirFilterRegex = datasetWriteConfig.getMetadataConfig().getDirectoryFilterRegex();
SerializableConfiguration conf = new SerializableConfiguration(dataMetaClient.getHadoopConf());
final String dirFilterRegex = dataWriteConfig.getMetadataConfig().getDirectoryFilterRegex();
while (!pathsToList.isEmpty()) {
int listingParallelism = Math.min(fileListingParallelism, pathsToList.size());
@@ -383,7 +376,7 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
.collect(Collectors.toList());
if (p.getRight().length > filesInDir.size()) {
String partitionName = FSUtils.getRelativePartitionPath(new Path(datasetMetaClient.getBasePath()), p.getLeft());
String partitionName = FSUtils.getRelativePartitionPath(new Path(dataMetaClient.getBasePath()), p.getLeft());
// deal with Non-partition table, we should exclude .hoodie
partitionToFileStatus.put(partitionName, filesInDir.stream()
.filter(f -> !f.getPath().getName().equals(HoodieTableMetaClient.METAFOLDER_NAME)).collect(Collectors.toList()));
@@ -401,35 +394,66 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
}
/**
* Sync the Metadata Table from the instants created on the dataset.
* Initialize file groups for a partition. For file listing, we just have one file group.
*
* @param datasetMetaClient {@code HoodieTableMetaClient} for the dataset
* All FileGroups for a given metadata partition has a fixed prefix as per the {@link MetadataPartitionType#getFileIdPrefix()}.
* Each file group is suffixed with 4 digits with increments of 1 starting with 0000.
*
* Lets say we configure 10 file groups for record level index partittion, and prefix as "record-index-bucket-"
* File groups will be named as :
* record-index-bucket-0000, .... -> ..., record-index-bucket-0009
*/
private void syncFromInstants(HoodieTableMetaClient datasetMetaClient) {
ValidationUtils.checkState(enabled, "Metadata table cannot be synced as it is not enabled");
// (re) init the metadata for reading.
initTableMetadata();
try {
List<HoodieInstant> instantsToSync = metadata.findInstantsToSyncForWriter();
if (instantsToSync.isEmpty()) {
return;
private void initializeFileGroups(HoodieTableMetaClient dataMetaClient, MetadataPartitionType metadataPartition, String instantTime,
int fileGroupCount) throws IOException {
final HashMap<HeaderMetadataType, String> blockHeader = new HashMap<>();
blockHeader.put(HeaderMetadataType.INSTANT_TIME, instantTime);
// Archival of data table has a dependency on compaction(base files) in metadata table.
// It is assumed that as of time Tx of base instant (/compaction time) in metadata table,
// all commits in data table is in sync with metadata table. So, we always start with log file for any fileGroup.
final HoodieDeleteBlock block = new HoodieDeleteBlock(new HoodieKey[0], blockHeader);
LOG.info(String.format("Creating %d file groups for partition %s with base fileId %s at instant time %s",
fileGroupCount, metadataPartition.partitionPath(), metadataPartition.getFileIdPrefix(), instantTime));
for (int i = 0; i < fileGroupCount; ++i) {
final String fileGroupFileId = String.format("%s%04d", metadataPartition.getFileIdPrefix(), i);
try {
HoodieLogFormat.Writer writer = HoodieLogFormat.newWriterBuilder()
.onParentPath(FSUtils.getPartitionPath(metadataWriteConfig.getBasePath(), metadataPartition.partitionPath()))
.withFileId(fileGroupFileId).overBaseCommit(instantTime)
.withLogVersion(HoodieLogFile.LOGFILE_BASE_VERSION)
.withFileSize(0L)
.withSizeThreshold(metadataWriteConfig.getLogFileMaxSize())
.withFs(dataMetaClient.getFs())
.withRolloverLogWriteToken(HoodieLogFormat.DEFAULT_WRITE_TOKEN)
.withLogWriteToken(HoodieLogFormat.DEFAULT_WRITE_TOKEN)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).build();
writer.appendBlock(block);
writer.close();
} catch (InterruptedException e) {
throw new HoodieException("Failed to created fileGroup " + fileGroupFileId + " for partition " + metadataPartition.partitionPath(), e);
}
}
}
LOG.info("Syncing " + instantsToSync.size() + " instants to metadata table: " + instantsToSync);
/**
* Interface to assist in converting commit metadata to List of HoodieRecords to be written to metadata table.
* Updates of different commit metadata uses the same method to convert to HoodieRecords and hence.
*/
private interface ConvertMetadataFunction {
List<HoodieRecord> convertMetadata();
}
// Read each instant in order and sync it to metadata table
for (HoodieInstant instant : instantsToSync) {
LOG.info("Syncing instant " + instant + " to metadata table");
Option<List<HoodieRecord>> records = HoodieTableMetadataUtil.convertInstantToMetaRecords(datasetMetaClient,
metaClient.getActiveTimeline(), instant, metadata.getUpdateTime());
if (records.isPresent()) {
commit(records.get(), MetadataPartitionType.FILES.partitionPath(), instant.getTimestamp());
}
}
initTableMetadata();
} catch (IOException ioe) {
throw new HoodieIOException("Unable to sync instants from data to metadata table.", ioe);
/**
* Processes commit metadata from data table and commits to metadata table.
* @param instantTime instant time of interest.
* @param convertMetadataFunction converter function to convert the respective metadata to List of HoodieRecords to be written to metadata table.
* @param <T> type of commit metadata.
*/
private <T> void processAndCommit(String instantTime, ConvertMetadataFunction convertMetadataFunction) {
if (enabled && metadata != null) {
List<HoodieRecord> records = convertMetadataFunction.convertMetadata();
commit(records, MetadataPartitionType.FILES.partitionPath(), instantTime);
}
}
@@ -441,24 +465,7 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
*/
@Override
public void update(HoodieCommitMetadata commitMetadata, String instantTime) {
if (enabled) {
List<HoodieRecord> records = HoodieTableMetadataUtil.convertMetadataToRecords(commitMetadata, instantTime);
commit(records, MetadataPartitionType.FILES.partitionPath(), instantTime);
}
}
/**
* Update from {@code HoodieCleanerPlan}.
*
* @param cleanerPlan {@code HoodieCleanerPlan}
* @param instantTime Timestamp at which the clean plan was generated
*/
@Override
public void update(HoodieCleanerPlan cleanerPlan, String instantTime) {
if (enabled) {
List<HoodieRecord> records = HoodieTableMetadataUtil.convertMetadataToRecords(cleanerPlan, instantTime);
commit(records, MetadataPartitionType.FILES.partitionPath(), instantTime);
}
processAndCommit(instantTime, () -> HoodieTableMetadataUtil.convertMetadataToRecords(commitMetadata, instantTime));
}
/**
@@ -469,10 +476,7 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
*/
@Override
public void update(HoodieCleanMetadata cleanMetadata, String instantTime) {
if (enabled) {
List<HoodieRecord> records = HoodieTableMetadataUtil.convertMetadataToRecords(cleanMetadata, instantTime);
commit(records, MetadataPartitionType.FILES.partitionPath(), instantTime);
}
processAndCommit(instantTime, () -> HoodieTableMetadataUtil.convertMetadataToRecords(cleanMetadata, instantTime));
}
/**
@@ -483,11 +487,8 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
*/
@Override
public void update(HoodieRestoreMetadata restoreMetadata, String instantTime) {
if (enabled) {
List<HoodieRecord> records = HoodieTableMetadataUtil.convertMetadataToRecords(metaClient.getActiveTimeline(),
restoreMetadata, instantTime, metadata.getUpdateTime());
commit(records, MetadataPartitionType.FILES.partitionPath(), instantTime);
}
processAndCommit(instantTime, () -> HoodieTableMetadataUtil.convertMetadataToRecords(metadataMetaClient.getActiveTimeline(),
restoreMetadata, instantTime, metadata.getSyncedInstantTime()));
}
/**
@@ -498,9 +499,21 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
*/
@Override
public void update(HoodieRollbackMetadata rollbackMetadata, String instantTime) {
if (enabled) {
List<HoodieRecord> records = HoodieTableMetadataUtil.convertMetadataToRecords(metaClient.getActiveTimeline(),
rollbackMetadata, instantTime, metadata.getUpdateTime());
if (enabled && metadata != null) {
// Is this rollback of an instant that has been synced to the metadata table?
String rollbackInstant = rollbackMetadata.getCommitsRollback().get(0);
boolean wasSynced = metadataMetaClient.getActiveTimeline().containsInstant(new HoodieInstant(false, HoodieTimeline.DELTA_COMMIT_ACTION, rollbackInstant));
if (!wasSynced) {
// A compaction may have taken place on metadata table which would have included this instant being rolled back.
// Revisit this logic to relax the compaction fencing : https://issues.apache.org/jira/browse/HUDI-2458
Option<String> latestCompaction = metadata.getLatestCompactionTime();
if (latestCompaction.isPresent()) {
wasSynced = HoodieTimeline.compareTimestamps(rollbackInstant, HoodieTimeline.LESSER_THAN_OR_EQUALS, latestCompaction.get());
}
}
List<HoodieRecord> records = HoodieTableMetadataUtil.convertMetadataToRecords(metadataMetaClient.getActiveTimeline(), rollbackMetadata, instantTime,
metadata.getSyncedInstantTime(), wasSynced);
commit(records, MetadataPartitionType.FILES.partitionPath(), instantTime);
}
}
@@ -512,13 +525,12 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
}
}
public HoodieBackedTableMetadata getMetadataReader() {
return metadata;
}
/**
* Commit the {@code HoodieRecord}s to Metadata Table as a new delta-commit.
*
* @param records The list of records to be written.
* @param partitionName The partition to which the records are to be written.
* @param instantTime The timestamp to use for the deltacommit.
*/
protected abstract void commit(List<HoodieRecord> records, String partitionName, String instantTime);
}

View File

@@ -19,7 +19,6 @@
package org.apache.hudi.metadata;
import org.apache.hudi.avro.model.HoodieCleanMetadata;
import org.apache.hudi.avro.model.HoodieCleanerPlan;
import org.apache.hudi.avro.model.HoodieRestoreMetadata;
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
import org.apache.hudi.common.model.HoodieCommitMetadata;
@@ -31,13 +30,32 @@ import java.io.Serializable;
*/
public interface HoodieTableMetadataWriter extends Serializable, AutoCloseable {
/**
* Update the metadata table due to a COMMIT operation.
* @param commitMetadata commit metadata of the operation of interest.
* @param instantTime instant time of the commit.
*/
void update(HoodieCommitMetadata commitMetadata, String instantTime);
void update(HoodieCleanerPlan cleanerPlan, String instantTime);
/**
* Update the metadata table due to a CLEAN operation.
* @param cleanMetadata clean metadata of the operation of interest.
* @param instantTime instant time of the commit.
*/
void update(HoodieCleanMetadata cleanMetadata, String instantTime);
/**
* Update the metadata table due to a RESTORE operation.
* @param restoreMetadata restore metadata of the operation of interest.
* @param instantTime instant time of the commit.
*/
void update(HoodieRestoreMetadata restoreMetadata, String instantTime);
/**
* Update the metadata table due to a ROLLBACK operation.
* @param rollbackMetadata rollback metadata of the operation of interest.
* @param instantTime instant time of the commit.
*/
void update(HoodieRollbackMetadata rollbackMetadata, String instantTime);
}

View File

@@ -55,6 +55,7 @@ import org.apache.hudi.common.table.view.TableFileSystemView;
import org.apache.hudi.common.table.view.TableFileSystemView.BaseFileOnlyView;
import org.apache.hudi.common.table.view.TableFileSystemView.SliceView;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.ValidationUtils;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieException;
@@ -63,6 +64,7 @@ import org.apache.hudi.exception.HoodieInsertException;
import org.apache.hudi.exception.HoodieUpsertException;
import org.apache.hudi.index.HoodieIndex;
import org.apache.hudi.metadata.HoodieTableMetadata;
import org.apache.hudi.metadata.HoodieTableMetadataWriter;
import org.apache.hudi.table.action.HoodieWriteMetadata;
import org.apache.hudi.table.action.bootstrap.HoodieBootstrapWriteMetadata;
import org.apache.hudi.table.marker.WriteMarkers;
@@ -703,4 +705,13 @@ public abstract class HoodieTable<T extends HoodieRecordPayload, I, K, O> implem
// to engine context, and it ends up being null (as its not serializable and marked transient here).
return context == null ? new HoodieLocalEngineContext(hadoopConfiguration.get()) : context;
}
/**
* Fetch instance of {@link HoodieTableMetadataWriter}.
* @return instance of {@link HoodieTableMetadataWriter}
*/
public Option<HoodieTableMetadataWriter> getMetadataWriter() {
ValidationUtils.checkArgument(!config.isMetadataTableEnabled(), "Metadata Table support not enabled in this Table");
return Option.empty();
}
}

View File

@@ -200,20 +200,19 @@ public class HoodieTimelineArchiveLog<T extends HoodieAvroPayload, I, K, O> {
.collect(Collectors.groupingBy(i -> Pair.of(i.getTimestamp(),
HoodieInstant.getComparableAction(i.getAction()))));
// If metadata table is enabled, do not archive instants which are more recent that the latest synced
// instant on the metadata table. This is required for metadata table sync.
// If metadata table is enabled, do not archive instants which are more recent that the last compaction on the
// metadata table.
if (config.isMetadataTableEnabled()) {
try (HoodieTableMetadata tableMetadata = HoodieTableMetadata.create(table.getContext(), config.getMetadataConfig(),
config.getBasePath(), FileSystemViewStorageConfig.SPILLABLE_DIR.defaultValue())) {
Option<String> lastSyncedInstantTime = tableMetadata.getUpdateTime();
if (lastSyncedInstantTime.isPresent()) {
LOG.info("Limiting archiving of instants to last synced instant on metadata table at " + lastSyncedInstantTime.get());
instants = instants.filter(i -> HoodieTimeline.compareTimestamps(i.getTimestamp(), HoodieTimeline.LESSER_THAN,
lastSyncedInstantTime.get()));
} else {
LOG.info("Not archiving as there is no instants yet on the metadata table");
Option<String> latestCompactionTime = tableMetadata.getLatestCompactionTime();
if (!latestCompactionTime.isPresent()) {
LOG.info("Not archiving as there is no compaction yet on the metadata table");
instants = Stream.empty();
} else {
LOG.info("Limiting archiving of instants to latest compaction on metadata table at " + latestCompactionTime.get());
instants = instants.filter(instant -> HoodieTimeline.compareTimestamps(instant.getTimestamp(), HoodieTimeline.LESSER_THAN,
latestCompactionTime.get()));
}
} catch (Exception e) {
throw new HoodieException("Error limiting instant archival based on metadata table", e);

View File

@@ -21,7 +21,11 @@ package org.apache.hudi.table.action;
import java.io.Serializable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hudi.avro.model.HoodieCleanMetadata;
import org.apache.hudi.avro.model.HoodieRestoreMetadata;
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
@@ -46,4 +50,36 @@ public abstract class BaseActionExecutor<T extends HoodieRecordPayload, I, K, O,
}
public abstract R execute();
/**
* Writes commits metadata to table metadata.
* @param metadata commit metadata of interest.
*/
protected final void writeTableMetadata(HoodieCommitMetadata metadata) {
table.getMetadataWriter().ifPresent(w -> w.update(metadata, instantTime));
}
/**
* Writes clean metadata to table metadata.
* @param metadata clean metadata of interest.
*/
protected final void writeTableMetadata(HoodieCleanMetadata metadata) {
table.getMetadataWriter().ifPresent(w -> w.update(metadata, instantTime));
}
/**
* Writes rollback metadata to table metadata.
* @param metadata rollback metadata of interest.
*/
protected final void writeTableMetadata(HoodieRollbackMetadata metadata) {
table.getMetadataWriter().ifPresent(w -> w.update(metadata, instantTime));
}
/**
* Writes restore metadata to table metadata.
* @param metadata restore metadata of interest.
*/
protected final void writeTableMetadata(HoodieRestoreMetadata metadata) {
table.getMetadataWriter().ifPresent(w -> w.update(metadata, instantTime));
}
}

View File

@@ -24,6 +24,7 @@ import org.apache.hadoop.fs.Path;
import org.apache.hudi.avro.model.HoodieActionInstant;
import org.apache.hudi.avro.model.HoodieCleanMetadata;
import org.apache.hudi.avro.model.HoodieCleanerPlan;
import org.apache.hudi.client.transaction.TransactionManager;
import org.apache.hudi.common.HoodieCleanStat;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.CleanFileInfo;
@@ -58,9 +59,11 @@ public class CleanActionExecutor<T extends HoodieRecordPayload, I, K, O> extends
private static final long serialVersionUID = 1L;
private static final Logger LOG = LogManager.getLogger(CleanActionExecutor.class);
private final TransactionManager txnManager;
public CleanActionExecutor(HoodieEngineContext context, HoodieWriteConfig config, HoodieTable<T, I, K, O> table, String instantTime) {
super(context, config, table, instantTime);
this.txnManager = new TransactionManager(config, table.getMetaClient().getFs());
}
static Boolean deleteFileAndGetResult(FileSystem fs, String deletePathStr) throws IOException {
@@ -196,7 +199,7 @@ public class CleanActionExecutor<T extends HoodieRecordPayload, I, K, O> extends
Option.of(timer.endTimer()),
cleanStats
);
writeMetadata(metadata);
table.getActiveTimeline().transitionCleanInflightToComplete(inflightInstant,
TimelineMetadataUtils.serializeCleanMetadata(metadata));
LOG.info("Marked clean started on " + inflightInstant.getTimestamp() + " as complete");
@@ -206,6 +209,19 @@ public class CleanActionExecutor<T extends HoodieRecordPayload, I, K, O> extends
}
}
/**
* Update metadata table if available. Any update to metadata table happens within data table lock.
* @param cleanMetadata intance of {@link HoodieCleanMetadata} to be applied to metadata.
*/
private void writeMetadata(HoodieCleanMetadata cleanMetadata) {
try {
this.txnManager.beginTransaction(Option.empty(), Option.empty());
writeTableMetadata(cleanMetadata);
} finally {
this.txnManager.endTransaction();
}
}
@Override
public HoodieCleanMetadata execute() {
List<HoodieCleanMetadata> cleanMetadataList = new ArrayList<>();

View File

@@ -175,10 +175,6 @@ public abstract class BaseCommitActionExecutor<T extends HoodieRecordPayload, I,
}
}
protected void syncTableMetadata() {
// No Op
}
/**
* By default, return the writer schema in Write Config for storing in commit.
*/

View File

@@ -20,6 +20,7 @@ package org.apache.hudi.table.action.restore;
import org.apache.hudi.avro.model.HoodieRestoreMetadata;
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
import org.apache.hudi.client.transaction.TransactionManager;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
@@ -27,6 +28,7 @@ import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
import org.apache.hudi.common.util.HoodieTimer;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieRollbackException;
import org.apache.hudi.table.HoodieTable;
@@ -46,6 +48,7 @@ public abstract class BaseRestoreActionExecutor<T extends HoodieRecordPayload, I
private static final Logger LOG = LogManager.getLogger(BaseRestoreActionExecutor.class);
private final String restoreInstantTime;
private final TransactionManager txnManager;
public BaseRestoreActionExecutor(HoodieEngineContext context,
HoodieWriteConfig config,
@@ -54,6 +57,7 @@ public abstract class BaseRestoreActionExecutor<T extends HoodieRecordPayload, I
String restoreInstantTime) {
super(context, config, table, instantTime);
this.restoreInstantTime = restoreInstantTime;
this.txnManager = new TransactionManager(config, table.getMetaClient().getFs());
}
@Override
@@ -92,9 +96,23 @@ public abstract class BaseRestoreActionExecutor<T extends HoodieRecordPayload, I
HoodieRestoreMetadata restoreMetadata = TimelineMetadataUtils.convertRestoreMetadata(
instantTime, durationInMs, instantsRolledBack, instantToMetadata);
writeToMetadata(restoreMetadata);
table.getActiveTimeline().saveAsComplete(new HoodieInstant(true, HoodieTimeline.RESTORE_ACTION, instantTime),
TimelineMetadataUtils.serializeRestoreMetadata(restoreMetadata));
LOG.info("Commits " + instantsRolledBack + " rollback is complete. Restored table to " + restoreInstantTime);
return restoreMetadata;
}
/**
* Update metadata table if available. Any update to metadata table happens within data table lock.
* @param restoreMetadata intance of {@link HoodieRestoreMetadata} to be applied to metadata.
*/
private void writeToMetadata(HoodieRestoreMetadata restoreMetadata) {
try {
this.txnManager.beginTransaction(Option.empty(), Option.empty());
writeTableMetadata(restoreMetadata);
} finally {
this.txnManager.endTransaction();
}
}
}

View File

@@ -21,6 +21,7 @@ package org.apache.hudi.table.action.rollback;
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
import org.apache.hudi.avro.model.HoodieRollbackPlan;
import org.apache.hudi.client.heartbeat.HoodieHeartbeatClient;
import org.apache.hudi.client.transaction.TransactionManager;
import org.apache.hudi.common.HoodieRollbackStat;
import org.apache.hudi.common.bootstrap.index.BootstrapIndex;
import org.apache.hudi.common.engine.HoodieEngineContext;
@@ -57,6 +58,7 @@ public abstract class BaseRollbackActionExecutor<T extends HoodieRecordPayload,
protected final boolean deleteInstants;
protected final boolean skipTimelinePublish;
protected final boolean useMarkerBasedStrategy;
private final TransactionManager txnManager;
public BaseRollbackActionExecutor(HoodieEngineContext context,
HoodieWriteConfig config,
@@ -85,6 +87,7 @@ public abstract class BaseRollbackActionExecutor<T extends HoodieRecordPayload,
ValidationUtils.checkArgument(!instantToRollback.isCompleted(),
"Cannot use marker based rollback strategy on completed instant:" + instantToRollback);
}
this.txnManager = new TransactionManager(config, table.getMetaClient().getFs());
}
/**
@@ -248,6 +251,7 @@ public abstract class BaseRollbackActionExecutor<T extends HoodieRecordPayload,
protected void finishRollback(HoodieInstant inflightInstant, HoodieRollbackMetadata rollbackMetadata) throws HoodieIOException {
try {
writeToMetadata(rollbackMetadata);
table.getActiveTimeline().transitionRollbackInflightToComplete(inflightInstant,
TimelineMetadataUtils.serializeRollbackMetadata(rollbackMetadata));
LOG.info("Rollback of Commits " + rollbackMetadata.getCommitsRollback() + " is complete");
@@ -256,6 +260,19 @@ public abstract class BaseRollbackActionExecutor<T extends HoodieRecordPayload,
}
}
/**
* Update metadata table if available. Any update to metadata table happens within data table lock.
* @param rollbackMetadata intance of {@link HoodieRollbackMetadata} to be applied to metadata.
*/
private void writeToMetadata(HoodieRollbackMetadata rollbackMetadata) {
try {
this.txnManager.beginTransaction(Option.empty(), Option.empty());
writeTableMetadata(rollbackMetadata);
} finally {
this.txnManager.endTransaction();
}
}
/**
* Delete Inflight instant if enabled.
*

View File

@@ -28,6 +28,7 @@ import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.exception.HoodieLockException;
import java.io.IOException;
import java.io.Serializable;
import java.util.concurrent.TimeUnit;
import static org.apache.hudi.common.config.LockConfiguration.FILESYSTEM_LOCK_PATH_PROP_KEY;
@@ -39,12 +40,12 @@ import static org.apache.hudi.common.config.LockConfiguration.LOCK_ACQUIRE_RETRY
* create operation. This lock does not support cleaning/expiring the lock after a failed write hence cannot be used
* in production environments.
*/
public class FileSystemBasedLockProviderTestClass implements LockProvider<String> {
public class FileSystemBasedLockProviderTestClass implements LockProvider<String>, Serializable {
private static final String LOCK_NAME = "acquired";
private String lockPath;
private FileSystem fs;
private transient FileSystem fs;
protected LockConfiguration lockConfiguration;
public FileSystemBasedLockProviderTestClass(final LockConfiguration lockConfiguration, final Configuration configuration) {
@@ -55,7 +56,7 @@ public class FileSystemBasedLockProviderTestClass implements LockProvider<String
public void acquireLock() {
try {
fs.create(new Path(lockPath + "/" + LOCK_NAME)).close();
fs.create(new Path(lockPath + "/" + LOCK_NAME), false).close();
} catch (IOException e) {
throw new HoodieIOException("Failed to acquire lock", e);
}
@@ -78,7 +79,12 @@ public class FileSystemBasedLockProviderTestClass implements LockProvider<String
&& (numRetries <= lockConfiguration.getConfig().getInteger(LOCK_ACQUIRE_NUM_RETRIES_PROP_KEY))) {
Thread.sleep(lockConfiguration.getConfig().getInteger(LOCK_ACQUIRE_RETRY_WAIT_TIME_IN_MILLIS_PROP_KEY));
}
acquireLock();
synchronized (LOCK_NAME) {
if (fs.exists(new Path(lockPath + "/" + LOCK_NAME))) {
return false;
}
acquireLock();
}
return true;
} catch (IOException | InterruptedException e) {
throw new HoodieLockException("Failed to acquire lock", e);

View File

@@ -0,0 +1,143 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.common.testutils;
import org.apache.hudi.avro.model.HoodieCleanMetadata;
import org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata;
import org.apache.hudi.avro.model.HoodieRestoreMetadata;
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieReplaceCommitMetadata;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.metadata.HoodieTableMetadataWriter;
import org.apache.hadoop.fs.FileSystem;
import java.io.IOException;
import java.util.List;
import java.util.Map;
/**
* {@link HoodieTestTable} impl used for testing metadata. This class does synchronous updates to HoodieTableMetadataWriter if non null.
*/
public class HoodieMetadataTestTable extends HoodieTestTable {
private HoodieTableMetadataWriter writer;
protected HoodieMetadataTestTable(String basePath, FileSystem fs, HoodieTableMetaClient metaClient, HoodieTableMetadataWriter writer) {
super(basePath, fs, metaClient);
this.writer = writer;
}
public static HoodieTestTable of(HoodieTableMetaClient metaClient) {
return HoodieMetadataTestTable.of(metaClient, null);
}
public static HoodieTestTable of(HoodieTableMetaClient metaClient, HoodieTableMetadataWriter writer) {
testTableState = HoodieTestTableState.of();
return new HoodieMetadataTestTable(metaClient.getBasePath(), metaClient.getRawFs(), metaClient, writer);
}
@Override
public HoodieCommitMetadata doWriteOperation(String commitTime, WriteOperationType operationType,
List<String> newPartitionsToAdd, List<String> partitions,
int filesPerPartition, boolean bootstrap, boolean createInflightCommit) throws Exception {
HoodieCommitMetadata commitMetadata = super.doWriteOperation(commitTime, operationType, newPartitionsToAdd, partitions, filesPerPartition, bootstrap, createInflightCommit);
if (writer != null && !createInflightCommit) {
writer.update(commitMetadata, commitTime);
}
return commitMetadata;
}
@Override
public HoodieTestTable moveInflightCommitToComplete(String instantTime, HoodieCommitMetadata metadata) throws IOException {
super.moveInflightCommitToComplete(instantTime, metadata);
if (writer != null) {
writer.update(metadata, instantTime);
}
return this;
}
public HoodieTestTable moveInflightCommitToComplete(String instantTime, HoodieCommitMetadata metadata, boolean ignoreWriter) throws IOException {
super.moveInflightCommitToComplete(instantTime, metadata);
if (!ignoreWriter && writer != null) {
writer.update(metadata, instantTime);
}
return this;
}
@Override
public HoodieTestTable moveInflightCompactionToComplete(String instantTime, HoodieCommitMetadata metadata) throws IOException {
super.moveInflightCompactionToComplete(instantTime, metadata);
if (writer != null) {
writer.update(metadata, instantTime);
}
return this;
}
@Override
public HoodieCleanMetadata doClean(String commitTime, Map<String, Integer> partitionFileCountsToDelete) throws IOException {
HoodieCleanMetadata cleanMetadata = super.doClean(commitTime, partitionFileCountsToDelete);
if (writer != null) {
writer.update(cleanMetadata, commitTime);
}
return cleanMetadata;
}
public HoodieTestTable addCompaction(String instantTime, HoodieCommitMetadata commitMetadata) throws Exception {
super.addCompaction(instantTime, commitMetadata);
if (writer != null) {
writer.update(commitMetadata, instantTime);
}
return this;
}
@Override
public HoodieTestTable addRollback(String instantTime, HoodieRollbackMetadata rollbackMetadata) throws IOException {
super.addRollback(instantTime, rollbackMetadata);
if (writer != null) {
writer.update(rollbackMetadata, instantTime);
}
return this;
}
@Override
public HoodieTestTable addRestore(String instantTime, HoodieRestoreMetadata restoreMetadata) throws IOException {
super.addRestore(instantTime, restoreMetadata);
if (writer != null) {
writer.update(restoreMetadata, instantTime);
}
return this;
}
@Override
public HoodieTestTable addReplaceCommit(
String instantTime,
Option<HoodieRequestedReplaceMetadata> requestedReplaceMetadata,
Option<HoodieCommitMetadata> inflightReplaceMetadata,
HoodieReplaceCommitMetadata completeReplaceMetadata) throws Exception {
super.addReplaceCommit(instantTime, requestedReplaceMetadata, inflightReplaceMetadata, completeReplaceMetadata);
if (writer != null) {
writer.update(completeReplaceMetadata, instantTime);
}
return this;
}
}