[HUDI-2285][HUDI-2476] Metadata table synchronous design. Rebased and Squashed from pull/3426 (#3590)
* [HUDI-2285] Adding Synchronous updates to metadata before completion of commits in data timelime. - This patch adds synchronous updates to metadata table. In other words, every write is first committed to metadata table followed by data table. While reading metadata table, we ignore any delta commits that are present only in metadata table and not in data table timeline. - Compaction of metadata table is fenced by the condition that we trigger compaction only when there are no inflight requests in datatable. This ensures that all base files in metadata table is always in sync with data table(w/o any holes) and only there could be some extra invalid commits among delta log files in metadata table. - Due to this, archival of data table also fences itself up until compacted instant in metadata table. All writes to metadata table happens within the datatable lock. So, metadata table works in one writer mode only. This might be tough to loosen since all writers write to same FILES partition and so, will result in a conflict anyways. - As part of this, have added acquiring locks in data table for those operations which were not before while committing (rollback, clean, compaction, cluster). To note, we were not doing any conflict resolution. All we are doing here is to commit by taking a lock. So that all writes to metadata table is always a single writer. - Also added building block to add buckets for partitions, which will be leveraged by other indexes like record level index, etc. For now, FILES partition has only one bucket. In general, any number of buckets per partition is allowed and each partition has a fixed fileId prefix with incremental suffix for each bucket within each partition. Have fixed [HUDI-2476]. This fix is about retrying a failed compaction if it succeeded in metadata for first time, but failed w/ data table. - Enabling metadata table by default. - Adding more tests for metadata table Co-authored-by: Prashant Wason <pwason@uber.com>
This commit is contained in:
committed by
GitHub
parent
46808dcb1f
commit
5f32162a2f
@@ -42,7 +42,6 @@ import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.HoodieClusteringException;
|
||||
import org.apache.hudi.exception.HoodieCommitException;
|
||||
import org.apache.hudi.exception.HoodieMetadataException;
|
||||
import org.apache.hudi.index.HoodieIndex;
|
||||
import org.apache.hudi.index.SparkHoodieIndex;
|
||||
import org.apache.hudi.metadata.HoodieTableMetadataWriter;
|
||||
@@ -79,7 +78,7 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
|
||||
private static final Logger LOG = LogManager.getLogger(SparkRDDWriteClient.class);
|
||||
|
||||
public SparkRDDWriteClient(HoodieEngineContext context, HoodieWriteConfig clientConfig) {
|
||||
super(context, clientConfig);
|
||||
this(context, clientConfig, Option.empty());
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
@@ -96,6 +95,11 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
|
||||
public SparkRDDWriteClient(HoodieEngineContext context, HoodieWriteConfig writeConfig,
|
||||
Option<EmbeddedTimelineService> timelineService) {
|
||||
super(context, writeConfig, timelineService);
|
||||
if (config.isMetadataTableEnabled()) {
|
||||
// If the metadata table does not exist, it should be bootstrapped here
|
||||
// TODO: Check if we can remove this requirement - auto bootstrap on commit
|
||||
SparkHoodieBackedTableMetadataWriter.create(context.getHadoopConf().get(), config, context);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -299,12 +303,13 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
|
||||
String compactionCommitTime) {
|
||||
this.context.setJobStatus(this.getClass().getSimpleName(), "Collect compaction write status and commit compaction");
|
||||
List<HoodieWriteStat> writeStats = writeStatuses.map(WriteStatus::getStat).collect();
|
||||
writeTableMetadata(table, metadata, new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, compactionCommitTime));
|
||||
// commit to data table after committing to metadata table.
|
||||
finalizeWrite(table, compactionCommitTime, writeStats);
|
||||
LOG.info("Committing Compaction " + compactionCommitTime + ". Finished with result " + metadata);
|
||||
SparkCompactHelpers.newInstance().completeInflightCompaction(table, compactionCommitTime, metadata);
|
||||
WriteMarkersFactory.get(config.getMarkersType(), table, compactionCommitTime)
|
||||
.quietDeleteMarkerDir(context, config.getMarkersDeleteParallelism());
|
||||
|
||||
if (compactionTimer != null) {
|
||||
long durationInMs = metrics.getDurationInMs(compactionTimer.stop());
|
||||
try {
|
||||
@@ -320,7 +325,7 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
|
||||
|
||||
@Override
|
||||
protected JavaRDD<WriteStatus> compact(String compactionInstantTime, boolean shouldComplete) {
|
||||
HoodieSparkTable<T> table = HoodieSparkTable.create(config, context, config.isMetadataTableEnabled());
|
||||
HoodieSparkTable<T> table = HoodieSparkTable.create(config, context, true);
|
||||
preWrite(compactionInstantTime, WriteOperationType.COMPACT, table.getMetaClient());
|
||||
HoodieTimeline pendingCompactionTimeline = table.getActiveTimeline().filterPendingCompactionTimeline();
|
||||
HoodieInstant inflightInstant = HoodieTimeline.getCompactionInflightInstant(compactionInstantTime);
|
||||
@@ -369,6 +374,7 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
|
||||
throw new HoodieClusteringException("Clustering failed to write to files:"
|
||||
+ writeStats.stream().filter(s -> s.getTotalWriteErrors() > 0L).map(s -> s.getFileId()).collect(Collectors.joining(",")));
|
||||
}
|
||||
writeTableMetadata(table, metadata, new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.REPLACE_COMMIT_ACTION, clusteringCommitTime));
|
||||
finalizeWrite(table, clusteringCommitTime, writeStats);
|
||||
try {
|
||||
LOG.info("Committing Clustering " + clusteringCommitTime + ". Finished with result " + metadata);
|
||||
@@ -376,7 +382,7 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
|
||||
HoodieTimeline.getReplaceCommitInflightInstant(clusteringCommitTime),
|
||||
Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
|
||||
} catch (IOException e) {
|
||||
throw new HoodieClusteringException("unable to transition clustering inflight to complete: " + clusteringCommitTime, e);
|
||||
throw new HoodieClusteringException("unable to transition clustering inflight to complete: " + clusteringCommitTime, e);
|
||||
}
|
||||
WriteMarkersFactory.get(config.getMarkersType(), table, clusteringCommitTime)
|
||||
.quietDeleteMarkerDir(context, config.getMarkersDeleteParallelism());
|
||||
@@ -393,6 +399,18 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
|
||||
LOG.info("Clustering successfully on commit " + clusteringCommitTime);
|
||||
}
|
||||
|
||||
private void writeTableMetadata(HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> table, HoodieCommitMetadata commitMetadata,
|
||||
HoodieInstant hoodieInstant) {
|
||||
try {
|
||||
this.txnManager.beginTransaction(Option.of(hoodieInstant), Option.empty());
|
||||
// Do not do any conflict resolution here as we do with regular writes. We take the lock here to ensure all writes to metadata table happens within a
|
||||
// single lock (single writer). Because more than one write to metadata table will result in conflicts since all of them updates the same partition.
|
||||
table.getMetadataWriter().ifPresent(w -> w.update(commitMetadata, hoodieInstant.getTimestamp()));
|
||||
} finally {
|
||||
this.txnManager.endTransaction();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> getTableAndInitCtx(WriteOperationType operationType, String instantTime) {
|
||||
HoodieTableMetaClient metaClient = createMetaClient(true);
|
||||
@@ -445,24 +463,10 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
|
||||
} else {
|
||||
writeTimer = metrics.getDeltaCommitCtx();
|
||||
}
|
||||
table.getHoodieView().sync();
|
||||
return table;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void syncTableMetadata() {
|
||||
if (!config.getMetadataConfig().enableSync()) {
|
||||
LOG.info("Metadata table sync is disabled in the config.");
|
||||
return;
|
||||
}
|
||||
|
||||
// Open up the metadata table again, for syncing
|
||||
try (HoodieTableMetadataWriter writer = SparkHoodieBackedTableMetadataWriter.create(hadoopConf, config, context)) {
|
||||
LOG.info("Successfully synced to metadata table");
|
||||
} catch (Exception e) {
|
||||
throw new HoodieMetadataException("Error syncing to metadata table.", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void preCommit(String instantTime, HoodieCommitMetadata metadata) {
|
||||
// Create a Hoodie table after startTxn which encapsulated the commits and files visible.
|
||||
@@ -470,6 +474,7 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
|
||||
HoodieTable table = createTable(config, hadoopConf);
|
||||
TransactionUtils.resolveWriteConflictIfAny(table, this.txnManager.getCurrentTransactionOwner(),
|
||||
Option.of(metadata), config, txnManager.getLastCompletedTransactionOwner());
|
||||
table.getMetadataWriter().ifPresent(w -> ((HoodieTableMetadataWriter)w).update(metadata, instantTime));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@@ -24,20 +24,15 @@ import org.apache.hudi.client.common.HoodieSparkEngineContext;
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.common.metrics.Registry;
|
||||
import org.apache.hudi.common.model.FileSlice;
|
||||
import org.apache.hudi.common.model.HoodieBaseFile;
|
||||
import org.apache.hudi.common.model.HoodieLogFile;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.model.HoodieRecordLocation;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.view.TableFileSystemView;
|
||||
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.common.util.ValidationUtils;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.HoodieIOException;
|
||||
import org.apache.hudi.exception.HoodieMetadataException;
|
||||
import org.apache.hudi.metrics.DistributedRegistry;
|
||||
import org.apache.hudi.table.HoodieSparkTable;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.log4j.LogManager;
|
||||
@@ -46,8 +41,8 @@ import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class SparkHoodieBackedTableMetadataWriter extends HoodieBackedTableMetadataWriter {
|
||||
@@ -78,7 +73,7 @@ public class SparkHoodieBackedTableMetadataWriter extends HoodieBackedTableMetad
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void initialize(HoodieEngineContext engineContext, HoodieTableMetaClient datasetMetaClient) {
|
||||
protected void initialize(HoodieEngineContext engineContext) {
|
||||
try {
|
||||
metrics.map(HoodieMetadataMetrics::registry).ifPresent(registry -> {
|
||||
if (registry instanceof DistributedRegistry) {
|
||||
@@ -88,7 +83,7 @@ public class SparkHoodieBackedTableMetadataWriter extends HoodieBackedTableMetad
|
||||
});
|
||||
|
||||
if (enabled) {
|
||||
bootstrapIfNeeded(engineContext, datasetMetaClient);
|
||||
bootstrapIfNeeded(engineContext, dataMetaClient);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
LOG.error("Failed to initialize metadata table. Disabling the writer.", e);
|
||||
@@ -99,83 +94,93 @@ public class SparkHoodieBackedTableMetadataWriter extends HoodieBackedTableMetad
|
||||
@Override
|
||||
protected void commit(List<HoodieRecord> records, String partitionName, String instantTime) {
|
||||
ValidationUtils.checkState(enabled, "Metadata table cannot be committed to as it is not enabled");
|
||||
JavaRDD<HoodieRecord> recordRDD = prepRecords(records, partitionName);
|
||||
JavaRDD<HoodieRecord> recordRDD = prepRecords(records, partitionName, 1);
|
||||
|
||||
try (SparkRDDWriteClient writeClient = new SparkRDDWriteClient(engineContext, metadataWriteConfig, true)) {
|
||||
writeClient.startCommitWithTime(instantTime);
|
||||
if (!metadataMetaClient.getActiveTimeline().filterCompletedInstants().containsInstant(instantTime)) {
|
||||
// if this is a new commit being applied to metadata for the first time
|
||||
writeClient.startCommitWithTime(instantTime);
|
||||
} else {
|
||||
// this code path refers to a re-attempted commit that got committed to metadata table, but failed in datatable.
|
||||
// for eg, lets say compaction c1 on 1st attempt succeeded in metadata table and failed before committing to datatable.
|
||||
// when retried again, data table will first rollback pending compaction. these will be applied to metadata table, but all changes
|
||||
// are upserts to metadata table and so only a new delta commit will be created.
|
||||
// once rollback is complete, compaction will be retried again, which will eventually hit this code block where the respective commit is
|
||||
// already part of completed commit. So, we have to manually remove the completed instant and proceed.
|
||||
// and it is for the same reason we enabled withAllowMultiWriteOnSameInstant for metadata table.
|
||||
HoodieInstant alreadyCompletedInstant = metadataMetaClient.getActiveTimeline().filterCompletedInstants().filter(entry -> entry.getTimestamp().equals(instantTime)).lastInstant().get();
|
||||
HoodieActiveTimeline.deleteInstantFile(metadataMetaClient.getFs(), metadataMetaClient.getMetaPath(), alreadyCompletedInstant);
|
||||
metadataMetaClient.reloadActiveTimeline();
|
||||
}
|
||||
List<WriteStatus> statuses = writeClient.upsertPreppedRecords(recordRDD, instantTime).collect();
|
||||
statuses.forEach(writeStatus -> {
|
||||
if (writeStatus.hasErrors()) {
|
||||
throw new HoodieMetadataException("Failed to commit metadata table records at instant " + instantTime);
|
||||
}
|
||||
});
|
||||
// trigger cleaning, compaction, with suffixes based on the same instant time. This ensures that any future
|
||||
// delta commits synced over will not have an instant time lesser than the last completed instant on the
|
||||
// metadata table.
|
||||
if (writeClient.scheduleCompactionAtInstant(instantTime + "001", Option.empty())) {
|
||||
writeClient.compact(instantTime + "001");
|
||||
}
|
||||
writeClient.clean(instantTime + "002");
|
||||
|
||||
// reload timeline
|
||||
metadataMetaClient.reloadActiveTimeline();
|
||||
compactIfNecessary(writeClient, instantTime);
|
||||
doClean(writeClient, instantTime);
|
||||
}
|
||||
|
||||
// Update total size of the metadata and count of base/log files
|
||||
metrics.ifPresent(m -> {
|
||||
try {
|
||||
Map<String, String> stats = m.getStats(false, metaClient, metadata);
|
||||
m.updateMetrics(Long.parseLong(stats.get(HoodieMetadataMetrics.STAT_TOTAL_BASE_FILE_SIZE)),
|
||||
Long.parseLong(stats.get(HoodieMetadataMetrics.STAT_TOTAL_LOG_FILE_SIZE)),
|
||||
Integer.parseInt(stats.get(HoodieMetadataMetrics.STAT_COUNT_BASE_FILES)),
|
||||
Integer.parseInt(stats.get(HoodieMetadataMetrics.STAT_COUNT_LOG_FILES)));
|
||||
} catch (HoodieIOException e) {
|
||||
LOG.error("Could not publish metadata size metrics", e);
|
||||
}
|
||||
});
|
||||
metrics.ifPresent(m -> m.updateSizeMetrics(metadataMetaClient, metadata));
|
||||
}
|
||||
|
||||
/**
|
||||
* Tag each record with the location.
|
||||
* Perform a compaction on the Metadata Table.
|
||||
*
|
||||
* Since we only read the latest base file in a partition, we tag the records with the instant time of the latest
|
||||
* base file.
|
||||
* Cases to be handled:
|
||||
* 1. We cannot perform compaction if there are previous inflight operations on the dataset. This is because
|
||||
* a compacted metadata base file at time Tx should represent all the actions on the dataset till time Tx.
|
||||
*
|
||||
* 2. In multi-writer scenario, a parallel operation with a greater instantTime may have completed creating a
|
||||
* deltacommit.
|
||||
*/
|
||||
private JavaRDD<HoodieRecord> prepRecords(List<HoodieRecord> records, String partitionName) {
|
||||
HoodieTable table = HoodieSparkTable.create(metadataWriteConfig, engineContext);
|
||||
TableFileSystemView.SliceView fsView = table.getSliceView();
|
||||
List<HoodieBaseFile> baseFiles = fsView.getLatestFileSlices(partitionName)
|
||||
.map(FileSlice::getBaseFile)
|
||||
.filter(Option::isPresent)
|
||||
.map(Option::get)
|
||||
.collect(Collectors.toList());
|
||||
private void compactIfNecessary(SparkRDDWriteClient writeClient, String instantTime) {
|
||||
String latestDeltacommitTime = metadataMetaClient.getActiveTimeline().getDeltaCommitTimeline().filterCompletedInstants().lastInstant()
|
||||
.get().getTimestamp();
|
||||
List<HoodieInstant> pendingInstants = dataMetaClient.reloadActiveTimeline().filterInflightsAndRequested()
|
||||
.findInstantsBefore(latestDeltacommitTime).getInstants().collect(Collectors.toList());
|
||||
|
||||
// All the metadata fits within a single base file
|
||||
if (partitionName.equals(MetadataPartitionType.FILES.partitionPath())) {
|
||||
if (baseFiles.size() > 1) {
|
||||
throw new HoodieMetadataException("Multiple base files found in metadata partition");
|
||||
}
|
||||
if (!pendingInstants.isEmpty()) {
|
||||
LOG.info(String.format("Cannot compact metadata table as there are %d inflight instants before latest deltacommit %s: %s",
|
||||
pendingInstants.size(), latestDeltacommitTime, Arrays.toString(pendingInstants.toArray())));
|
||||
return;
|
||||
}
|
||||
|
||||
// Trigger compaction with suffixes based on the same instant time. This ensures that any future
|
||||
// delta commits synced over will not have an instant time lesser than the last completed instant on the
|
||||
// metadata table.
|
||||
final String compactionInstantTime = latestDeltacommitTime + "001";
|
||||
if (writeClient.scheduleCompactionAtInstant(compactionInstantTime, Option.empty())) {
|
||||
writeClient.compact(compactionInstantTime);
|
||||
}
|
||||
}
|
||||
|
||||
private void doClean(SparkRDDWriteClient writeClient, String instantTime) {
|
||||
// Trigger cleaning with suffixes based on the same instant time. This ensures that any future
|
||||
// delta commits synced over will not have an instant time lesser than the last completed instant on the
|
||||
// metadata table.
|
||||
writeClient.clean(instantTime + "002");
|
||||
}
|
||||
|
||||
/**
|
||||
* Tag each record with the location in the given partition.
|
||||
*
|
||||
* The record is tagged with respective file slice's location based on its record key.
|
||||
*/
|
||||
private JavaRDD<HoodieRecord> prepRecords(List<HoodieRecord> records, String partitionName, int numFileGroups) {
|
||||
List<FileSlice> fileSlices = HoodieTableMetadataUtil.loadPartitionFileGroupsWithLatestFileSlices(metadataMetaClient, partitionName);
|
||||
ValidationUtils.checkArgument(fileSlices.size() == numFileGroups, String.format("Invalid number of file groups: found=%d, required=%d", fileSlices.size(), numFileGroups));
|
||||
|
||||
JavaSparkContext jsc = ((HoodieSparkEngineContext) engineContext).getJavaSparkContext();
|
||||
String fileId;
|
||||
String instantTime;
|
||||
if (!baseFiles.isEmpty()) {
|
||||
fileId = baseFiles.get(0).getFileId();
|
||||
instantTime = baseFiles.get(0).getCommitTime();
|
||||
} else {
|
||||
// If there is a log file then we can assume that it has the data
|
||||
List<HoodieLogFile> logFiles = fsView.getLatestFileSlices(MetadataPartitionType.FILES.partitionPath())
|
||||
.map(FileSlice::getLatestLogFile)
|
||||
.filter(Option::isPresent)
|
||||
.map(Option::get)
|
||||
.collect(Collectors.toList());
|
||||
if (logFiles.isEmpty()) {
|
||||
// No base and log files. All are new inserts
|
||||
return jsc.parallelize(records, 1);
|
||||
}
|
||||
|
||||
fileId = logFiles.get(0).getFileId();
|
||||
instantTime = logFiles.get(0).getBaseCommitTime();
|
||||
}
|
||||
|
||||
return jsc.parallelize(records, 1).map(r -> r.setCurrentLocation(new HoodieRecordLocation(instantTime, fileId)));
|
||||
return jsc.parallelize(records, 1).map(r -> {
|
||||
FileSlice slice = fileSlices.get(HoodieTableMetadataUtil.mapRecordKeyToFileGroupIndex(r.getRecordKey(), numFileGroups));
|
||||
r.setCurrentLocation(new HoodieRecordLocation(slice.getBaseInstantTime(), slice.getFileId()));
|
||||
return r;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -29,14 +29,24 @@ import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
import org.apache.hudi.exception.HoodieMetadataException;
|
||||
import org.apache.hudi.index.HoodieIndex;
|
||||
import org.apache.hudi.index.SparkHoodieIndex;
|
||||
import org.apache.hudi.metadata.HoodieTableMetadata;
|
||||
import org.apache.hudi.metadata.HoodieTableMetadataWriter;
|
||||
import org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter;
|
||||
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public abstract class HoodieSparkTable<T extends HoodieRecordPayload>
|
||||
extends HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> {
|
||||
|
||||
private boolean isMetadataAvailabilityUpdated = false;
|
||||
private boolean isMetadataTableAvailable;
|
||||
|
||||
protected HoodieSparkTable(HoodieWriteConfig config, HoodieEngineContext context, HoodieTableMetaClient metaClient) {
|
||||
super(config, context, metaClient);
|
||||
}
|
||||
@@ -85,4 +95,31 @@ public abstract class HoodieSparkTable<T extends HoodieRecordPayload>
|
||||
protected HoodieIndex<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> getIndex(HoodieWriteConfig config, HoodieEngineContext context) {
|
||||
return SparkHoodieIndex.createIndex(config);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch instance of {@link HoodieTableMetadataWriter}.
|
||||
*
|
||||
* @return instance of {@link HoodieTableMetadataWriter}
|
||||
*/
|
||||
@Override
|
||||
public Option<HoodieTableMetadataWriter> getMetadataWriter() {
|
||||
synchronized (this) {
|
||||
if (!isMetadataAvailabilityUpdated) {
|
||||
// this code assumes that if metadata availability is updated once it will not change. please revisit this logic if that's not the case.
|
||||
// this is done to avoid repeated calls to fs.exists().
|
||||
try {
|
||||
isMetadataTableAvailable = config.isMetadataTableEnabled()
|
||||
&& metaClient.getFs().exists(new Path(HoodieTableMetadata.getMetadataTableBasePath(metaClient.getBasePath())));
|
||||
} catch (IOException e) {
|
||||
throw new HoodieMetadataException("Checking existence of metadata table failed", e);
|
||||
}
|
||||
isMetadataAvailabilityUpdated = true;
|
||||
}
|
||||
}
|
||||
if (isMetadataTableAvailable) {
|
||||
return Option.of(SparkHoodieBackedTableMetadataWriter.create(context.getHadoopConf().get(), config, context));
|
||||
} else {
|
||||
return Option.empty();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -59,13 +59,10 @@ import org.apache.hudi.exception.HoodieCommitException;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
import org.apache.hudi.exception.HoodieIOException;
|
||||
import org.apache.hudi.exception.HoodieKeyGeneratorException;
|
||||
import org.apache.hudi.exception.HoodieMetadataException;
|
||||
import org.apache.hudi.execution.SparkBoundedInMemoryExecutor;
|
||||
import org.apache.hudi.io.HoodieBootstrapHandle;
|
||||
import org.apache.hudi.keygen.KeyGeneratorInterface;
|
||||
import org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory;
|
||||
import org.apache.hudi.metadata.HoodieTableMetadataWriter;
|
||||
import org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter;
|
||||
import org.apache.hudi.table.HoodieSparkTable;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hudi.table.action.HoodieWriteMetadata;
|
||||
@@ -226,17 +223,6 @@ public class SparkBootstrapCommitActionExecutor<T extends HoodieRecordPayload<T>
|
||||
LOG.info("Committing metadata bootstrap !!");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void syncTableMetadata() {
|
||||
// Open up the metadata table again, for syncing
|
||||
try (HoodieTableMetadataWriter writer =
|
||||
SparkHoodieBackedTableMetadataWriter.create(hadoopConf, config, context)) {
|
||||
LOG.info("Successfully synced to metadata table");
|
||||
} catch (Exception e) {
|
||||
throw new HoodieMetadataException("Error syncing to metadata table.", e);
|
||||
}
|
||||
}
|
||||
|
||||
protected void commit(Option<Map<String, String>> extraMetadata, HoodieWriteMetadata<JavaRDD<WriteStatus>> result, List<HoodieWriteStat> stats) {
|
||||
String actionType = table.getMetaClient().getCommitActionType();
|
||||
LOG.info("Committing " + instantTime + ", action Type " + actionType);
|
||||
@@ -252,7 +238,6 @@ public class SparkBootstrapCommitActionExecutor<T extends HoodieRecordPayload<T>
|
||||
|
||||
// Finalize write
|
||||
finalizeWrite(instantTime, stats, result);
|
||||
syncTableMetadata();
|
||||
// add in extra metadata
|
||||
if (extraMetadata.isPresent()) {
|
||||
extraMetadata.get().forEach(metadata::addMetadata);
|
||||
@@ -260,6 +245,8 @@ public class SparkBootstrapCommitActionExecutor<T extends HoodieRecordPayload<T>
|
||||
metadata.addMetadata(HoodieCommitMetadata.SCHEMA_KEY, getSchemaToStoreInCommit());
|
||||
metadata.setOperationType(operationType);
|
||||
|
||||
writeTableMetadata(metadata);
|
||||
|
||||
try {
|
||||
activeTimeline.saveAsComplete(new HoodieInstant(true, actionType, instantTime),
|
||||
Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
|
||||
|
||||
@@ -40,7 +40,6 @@ import org.apache.hudi.common.util.collection.Pair;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.HoodieCommitException;
|
||||
import org.apache.hudi.exception.HoodieIOException;
|
||||
import org.apache.hudi.exception.HoodieMetadataException;
|
||||
import org.apache.hudi.exception.HoodieUpsertException;
|
||||
import org.apache.hudi.execution.SparkLazyInsertIterable;
|
||||
import org.apache.hudi.io.CreateHandleFactory;
|
||||
@@ -49,8 +48,6 @@ import org.apache.hudi.io.HoodieSortedMergeHandle;
|
||||
import org.apache.hudi.io.storage.HoodieConcatHandle;
|
||||
import org.apache.hudi.keygen.BaseKeyGenerator;
|
||||
import org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory;
|
||||
import org.apache.hudi.metadata.HoodieTableMetadataWriter;
|
||||
import org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter;
|
||||
import org.apache.hudi.table.HoodieSparkTable;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hudi.table.WorkloadProfile;
|
||||
@@ -239,7 +236,7 @@ public abstract class BaseSparkCommitActionExecutor<T extends HoodieRecordPayloa
|
||||
result.setWriteStatuses(statuses);
|
||||
return statuses;
|
||||
}
|
||||
|
||||
|
||||
protected void updateIndexAndCommitIfNeeded(JavaRDD<WriteStatus> writeStatusRDD, HoodieWriteMetadata result) {
|
||||
updateIndex(writeStatusRDD, result);
|
||||
result.setPartitionToReplaceFileIds(getPartitionToReplacedFileIds(result));
|
||||
@@ -264,13 +261,11 @@ public abstract class BaseSparkCommitActionExecutor<T extends HoodieRecordPayloa
|
||||
result.setWriteStats(writeStats);
|
||||
// Finalize write
|
||||
finalizeWrite(instantTime, writeStats, result);
|
||||
syncTableMetadata();
|
||||
try {
|
||||
LOG.info("Committing " + instantTime + ", action Type " + getCommitActionType());
|
||||
HoodieActiveTimeline activeTimeline = table.getActiveTimeline();
|
||||
HoodieCommitMetadata metadata = CommitUtils.buildMetadata(writeStats, result.getPartitionToReplaceFileIds(),
|
||||
extraMetadata, operationType, getSchemaToStoreInCommit(), getCommitActionType());
|
||||
|
||||
writeTableMetadata(metadata);
|
||||
activeTimeline.saveAsComplete(new HoodieInstant(true, getCommitActionType(), instantTime),
|
||||
Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
|
||||
LOG.info("Committed " + instantTime);
|
||||
@@ -354,17 +349,6 @@ public abstract class BaseSparkCommitActionExecutor<T extends HoodieRecordPayloa
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void syncTableMetadata() {
|
||||
// Open up the metadata table again, for syncing
|
||||
try (HoodieTableMetadataWriter writer =
|
||||
SparkHoodieBackedTableMetadataWriter.create(hadoopConf, config, context)) {
|
||||
LOG.info("Successfully synced to metadata table");
|
||||
} catch (Exception e) {
|
||||
throw new HoodieMetadataException("Error syncing to metadata table.", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<List<WriteStatus>> handleInsert(String idPfx, Iterator<HoodieRecord<T>> recordItr)
|
||||
throws Exception {
|
||||
|
||||
@@ -30,4 +30,4 @@ public class OneToTwoUpgradeHandler extends BaseOneToTwoUpgradeHandler {
|
||||
String getPartitionColumns(HoodieWriteConfig config) {
|
||||
return HoodieSparkUtils.getPartitionColumns(config.getProps());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -53,6 +53,8 @@ public class SparkUpgradeDowngrade extends AbstractUpgradeDowngrade {
|
||||
return new ZeroToOneUpgradeHandler().upgrade(config, context, instantTime);
|
||||
} else if (fromVersion == HoodieTableVersion.ONE && toVersion == HoodieTableVersion.TWO) {
|
||||
return new OneToTwoUpgradeHandler().upgrade(config, context, instantTime);
|
||||
} else if (fromVersion == HoodieTableVersion.TWO && toVersion == HoodieTableVersion.THREE) {
|
||||
return new TwoToThreeUpgradeHandler().upgrade(config, context, instantTime);
|
||||
} else {
|
||||
throw new HoodieUpgradeDowngradeException(fromVersion.versionCode(), toVersion.versionCode(), true);
|
||||
}
|
||||
@@ -64,6 +66,8 @@ public class SparkUpgradeDowngrade extends AbstractUpgradeDowngrade {
|
||||
return new OneToZeroDowngradeHandler().downgrade(config, context, instantTime);
|
||||
} else if (fromVersion == HoodieTableVersion.TWO && toVersion == HoodieTableVersion.ONE) {
|
||||
return new TwoToOneDowngradeHandler().downgrade(config, context, instantTime);
|
||||
} else if (fromVersion == HoodieTableVersion.THREE && toVersion == HoodieTableVersion.TWO) {
|
||||
return new ThreeToTwoDowngradeHandler().downgrade(config, context, instantTime);
|
||||
} else {
|
||||
throw new HoodieUpgradeDowngradeException(fromVersion.versionCode(), toVersion.versionCode(), false);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,44 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.table.upgrade;
|
||||
|
||||
import org.apache.hudi.common.config.ConfigProperty;
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.metadata.HoodieTableMetadataUtil;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Downgrade handler to assist in downgrading hoodie table from version 3 to 2.
|
||||
*/
|
||||
public class ThreeToTwoDowngradeHandler implements DowngradeHandler {
|
||||
|
||||
@Override
|
||||
public Map<ConfigProperty, String> downgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime) {
|
||||
if (config.isMetadataTableEnabled()) {
|
||||
// Metadata Table in version 3 is synchronous and in version 2 is asynchronous. Downgrading to asynchronous
|
||||
// removes the checks in code to decide whether to use a LogBlock or not. Also, the schema for the
|
||||
// table has been updated and is not forward compatible. Hence, we need to delete the table.
|
||||
HoodieTableMetadataUtil.deleteMetadataTable(config.getBasePath(), context);
|
||||
}
|
||||
return Collections.emptyMap();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.table.upgrade;
|
||||
|
||||
import org.apache.hudi.common.config.ConfigProperty;
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.metadata.HoodieTableMetadataUtil;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* UpgradeHandler to assist in upgrading {@link org.apache.hudi.table.HoodieTable} from version 2 to 3.
|
||||
*/
|
||||
public class TwoToThreeUpgradeHandler implements UpgradeHandler {
|
||||
@Override
|
||||
public Map<ConfigProperty, String> upgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime) {
|
||||
if (config.isMetadataTableEnabled()) {
|
||||
// Metadata Table in version 2 is asynchronous and in version 3 is synchronous. Synchronous table will not
|
||||
// sync any instants not already synced. So its simpler to re-bootstrap the table. Also, the schema for the
|
||||
// table has been updated and is not backward compatible.
|
||||
HoodieTableMetadataUtil.deleteMetadataTable(config.getBasePath(), context);
|
||||
}
|
||||
return Collections.emptyMap();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user