[HUDI-2501] Add HoodieData abstraction and refactor compaction actions in hudi-client module (#3741)
This commit is contained in:
@@ -775,19 +775,6 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload, I
|
||||
protected abstract void completeCompaction(HoodieCommitMetadata metadata, O writeStatuses,
|
||||
HoodieTable<T, I, K, O> table, String compactionCommitTime);
|
||||
|
||||
/**
|
||||
* Rollback failed compactions. Inflight rollbacks for compactions revert the .inflight file to the .requested file
|
||||
* TODO : Deprecate this method and make it protected
|
||||
* @param inflightInstant Inflight Compaction Instant
|
||||
* @param table Hoodie Table
|
||||
*/
|
||||
public void rollbackInflightCompaction(HoodieInstant inflightInstant, HoodieTable<T, I, K, O> table) {
|
||||
String commitTime = HoodieActiveTimeline.createNewInstantTime();
|
||||
table.scheduleRollback(context, commitTime, inflightInstant, false);
|
||||
table.rollback(context, commitTime, inflightInstant, false);
|
||||
table.getActiveTimeline().revertCompactionInflightToRequested(inflightInstant);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get inflight time line exclude compaction and clustering.
|
||||
* @param metaClient
|
||||
|
||||
@@ -0,0 +1,43 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.table;
|
||||
|
||||
import org.apache.hudi.client.WriteStatus;
|
||||
import org.apache.hudi.common.model.HoodieBaseFile;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.model.HoodieRecordPayload;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Interface for insert and update operations in compaction.
|
||||
*
|
||||
* @param <T> HoodieRecordPayload type.
|
||||
*/
|
||||
public interface HoodieCompactionHandler<T extends HoodieRecordPayload> {
|
||||
Iterator<List<WriteStatus>> handleUpdate(String instantTime, String partitionPath, String fileId,
|
||||
Map<String, HoodieRecord<T>> keyToNewRecords, HoodieBaseFile oldDataFile) throws IOException;
|
||||
|
||||
Iterator<List<WriteStatus>> handleInsert(String instantTime, String partitionPath, String fileId,
|
||||
Map<String, HoodieRecord<? extends HoodieRecordPayload>> recordMap);
|
||||
}
|
||||
@@ -365,12 +365,11 @@ public abstract class HoodieTable<T extends HoodieRecordPayload, I, K, O> implem
|
||||
/**
|
||||
* Run Compaction on the table. Compaction arranges the data so that it is optimized for data access.
|
||||
*
|
||||
* @param context HoodieEngineContext
|
||||
* @param context HoodieEngineContext
|
||||
* @param compactionInstantTime Instant Time
|
||||
*/
|
||||
public abstract HoodieWriteMetadata<O> compact(HoodieEngineContext context,
|
||||
String compactionInstantTime);
|
||||
|
||||
String compactionInstantTime);
|
||||
|
||||
/**
|
||||
* Schedule clustering for the instant time.
|
||||
@@ -471,11 +470,24 @@ public abstract class HoodieTable<T extends HoodieRecordPayload, I, K, O> implem
|
||||
String restoreInstantTime,
|
||||
String instantToRestore);
|
||||
|
||||
/**
|
||||
* Rollback failed compactions. Inflight rollbacks for compactions revert the .inflight file
|
||||
* to the .requested file.
|
||||
*
|
||||
* @param inflightInstant Inflight Compaction Instant
|
||||
*/
|
||||
public void rollbackInflightCompaction(HoodieInstant inflightInstant) {
|
||||
String commitTime = HoodieActiveTimeline.createNewInstantTime();
|
||||
scheduleRollback(context, commitTime, inflightInstant, false);
|
||||
rollback(context, commitTime, inflightInstant, false);
|
||||
getActiveTimeline().revertCompactionInflightToRequested(inflightInstant);
|
||||
}
|
||||
|
||||
/**
|
||||
* Finalize the written data onto storage. Perform any final cleanups.
|
||||
*
|
||||
* @param context HoodieEngineContext
|
||||
* @param stats List of HoodieWriteStats
|
||||
* @param stats List of HoodieWriteStats
|
||||
* @throws HoodieIOException if some paths can't be finalized on storage
|
||||
*/
|
||||
public void finalizeWrite(HoodieEngineContext context, String instantTs, List<HoodieWriteStat> stats) throws HoodieIOException {
|
||||
|
||||
@@ -46,6 +46,36 @@ public class HoodieWriteMetadata<O> {
|
||||
public HoodieWriteMetadata() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Clones the write metadata with transformed write statuses.
|
||||
*
|
||||
* @param transformedWriteStatuses transformed write statuses
|
||||
* @param <T> type of transformed write statuses
|
||||
* @return Cloned {@link HoodieWriteMetadata<T>} instance
|
||||
*/
|
||||
public <T> HoodieWriteMetadata<T> clone(T transformedWriteStatuses) {
|
||||
HoodieWriteMetadata<T> newMetadataInstance = new HoodieWriteMetadata<>();
|
||||
newMetadataInstance.setWriteStatuses(transformedWriteStatuses);
|
||||
if (indexLookupDuration.isPresent()) {
|
||||
newMetadataInstance.setIndexLookupDuration(indexLookupDuration.get());
|
||||
}
|
||||
newMetadataInstance.setCommitted(isCommitted);
|
||||
newMetadataInstance.setCommitMetadata(commitMetadata);
|
||||
if (writeStats.isPresent()) {
|
||||
newMetadataInstance.setWriteStats(writeStats.get());
|
||||
}
|
||||
if (indexUpdateDuration.isPresent()) {
|
||||
newMetadataInstance.setIndexUpdateDuration(indexUpdateDuration.get());
|
||||
}
|
||||
if (finalizeDuration.isPresent()) {
|
||||
newMetadataInstance.setFinalizeDuration(finalizeDuration.get());
|
||||
}
|
||||
if (partitionToReplaceFileIds.isPresent()) {
|
||||
newMetadataInstance.setPartitionToReplaceFileIds(partitionToReplaceFileIds.get());
|
||||
}
|
||||
return newMetadataInstance;
|
||||
}
|
||||
|
||||
public O getWriteStatuses() {
|
||||
return writeStatuses;
|
||||
}
|
||||
|
||||
@@ -1,90 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.table.action.compact;
|
||||
|
||||
import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.common.model.HoodieRecordPayload;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.common.util.ValidationUtils;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.HoodieIOException;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hudi.table.action.BaseActionExecutor;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public abstract class BaseScheduleCompactionActionExecutor<T extends HoodieRecordPayload, I, K, O> extends BaseActionExecutor<T, I, K, O, Option<HoodieCompactionPlan>> {
|
||||
|
||||
private final Option<Map<String, String>> extraMetadata;
|
||||
|
||||
public BaseScheduleCompactionActionExecutor(HoodieEngineContext context,
|
||||
HoodieWriteConfig config,
|
||||
HoodieTable<T, I, K, O> table,
|
||||
String instantTime,
|
||||
Option<Map<String, String>> extraMetadata) {
|
||||
super(context, config, table, instantTime);
|
||||
this.extraMetadata = extraMetadata;
|
||||
}
|
||||
|
||||
protected abstract HoodieCompactionPlan scheduleCompaction();
|
||||
|
||||
@Override
|
||||
public Option<HoodieCompactionPlan> execute() {
|
||||
if (!config.getWriteConcurrencyMode().supportsOptimisticConcurrencyControl()
|
||||
&& !config.getFailedWritesCleanPolicy().isLazy()) {
|
||||
// if there are inflight writes, their instantTime must not be less than that of compaction instant time
|
||||
table.getActiveTimeline().getCommitsTimeline().filterPendingExcludingCompaction().firstInstant()
|
||||
.ifPresent(earliestInflight -> ValidationUtils.checkArgument(
|
||||
HoodieTimeline.compareTimestamps(earliestInflight.getTimestamp(), HoodieTimeline.GREATER_THAN, instantTime),
|
||||
"Earliest write inflight instant time must be later than compaction time. Earliest :" + earliestInflight
|
||||
+ ", Compaction scheduled at " + instantTime));
|
||||
// Committed and pending compaction instants should have strictly lower timestamps
|
||||
List<HoodieInstant> conflictingInstants = table.getActiveTimeline()
|
||||
.getWriteTimeline().filterCompletedAndCompactionInstants().getInstants()
|
||||
.filter(instant -> HoodieTimeline.compareTimestamps(
|
||||
instant.getTimestamp(), HoodieTimeline.GREATER_THAN_OR_EQUALS, instantTime))
|
||||
.collect(Collectors.toList());
|
||||
ValidationUtils.checkArgument(conflictingInstants.isEmpty(),
|
||||
"Following instants have timestamps >= compactionInstant (" + instantTime + ") Instants :"
|
||||
+ conflictingInstants);
|
||||
}
|
||||
|
||||
HoodieCompactionPlan plan = scheduleCompaction();
|
||||
if (plan != null && (plan.getOperations() != null) && (!plan.getOperations().isEmpty())) {
|
||||
extraMetadata.ifPresent(plan::setExtraMetadata);
|
||||
HoodieInstant compactionInstant =
|
||||
new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, instantTime);
|
||||
try {
|
||||
table.getActiveTimeline().saveToCompactionRequested(compactionInstant,
|
||||
TimelineMetadataUtils.serializeCompactionPlan(plan));
|
||||
} catch (IOException ioe) {
|
||||
throw new HoodieIOException("Exception scheduling compaction", ioe);
|
||||
}
|
||||
return Option.of(plan);
|
||||
}
|
||||
return Option.empty();
|
||||
}
|
||||
}
|
||||
@@ -18,17 +18,23 @@
|
||||
|
||||
package org.apache.hudi.table.action.compact;
|
||||
|
||||
import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
||||
import org.apache.hudi.client.WriteStatus;
|
||||
import org.apache.hudi.common.data.HoodieData;
|
||||
import org.apache.hudi.common.model.HoodieCommitMetadata;
|
||||
import org.apache.hudi.common.model.HoodieRecordPayload;
|
||||
import org.apache.hudi.common.model.HoodieWriteStat;
|
||||
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.exception.HoodieCompactionException;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Base class helps to perform compact.
|
||||
@@ -38,11 +44,34 @@ import java.nio.charset.StandardCharsets;
|
||||
* @param <K> Type of keys
|
||||
* @param <O> Type of outputs
|
||||
*/
|
||||
public abstract class AbstractCompactHelpers<T extends HoodieRecordPayload, I, K, O> {
|
||||
public abstract HoodieCommitMetadata createCompactionMetadata(HoodieTable<T, I, K, O> table,
|
||||
String compactionInstantTime,
|
||||
O writeStatuses,
|
||||
String schema) throws IOException;
|
||||
public class CompactHelpers<T extends HoodieRecordPayload, I, K, O> {
|
||||
|
||||
private static final CompactHelpers SINGLETON_INSTANCE = new CompactHelpers();
|
||||
|
||||
private CompactHelpers() {
|
||||
}
|
||||
|
||||
public static CompactHelpers getInstance() {
|
||||
return SINGLETON_INSTANCE;
|
||||
}
|
||||
|
||||
public HoodieCommitMetadata createCompactionMetadata(
|
||||
HoodieTable table, String compactionInstantTime, HoodieData<WriteStatus> writeStatuses,
|
||||
String schema) throws IOException {
|
||||
byte[] planBytes = table.getActiveTimeline().readCompactionPlanAsBytes(
|
||||
HoodieTimeline.getCompactionRequestedInstant(compactionInstantTime)).get();
|
||||
HoodieCompactionPlan compactionPlan = TimelineMetadataUtils.deserializeCompactionPlan(planBytes);
|
||||
List<HoodieWriteStat> updateStatusMap = writeStatuses.map(WriteStatus::getStat).collectAsList();
|
||||
HoodieCommitMetadata metadata = new HoodieCommitMetadata(true);
|
||||
for (HoodieWriteStat stat : updateStatusMap) {
|
||||
metadata.addWriteStat(stat.getPartitionPath(), stat);
|
||||
}
|
||||
metadata.addMetadata(org.apache.hudi.common.model.HoodieCommitMetadata.SCHEMA_KEY, schema);
|
||||
if (compactionPlan.getExtraMetadata() != null) {
|
||||
compactionPlan.getExtraMetadata().forEach(metadata::addMetadata);
|
||||
}
|
||||
return metadata;
|
||||
}
|
||||
|
||||
public void completeInflightCompaction(HoodieTable table, String compactionCommitTime, HoodieCommitMetadata commitMetadata) {
|
||||
HoodieActiveTimeline activeTimeline = table.getActiveTimeline();
|
||||
@@ -18,39 +18,280 @@
|
||||
|
||||
package org.apache.hudi.table.action.compact;
|
||||
|
||||
import org.apache.hudi.avro.HoodieAvroUtils;
|
||||
import org.apache.hudi.avro.model.HoodieCompactionOperation;
|
||||
import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
||||
import org.apache.hudi.client.WriteStatus;
|
||||
import org.apache.hudi.common.data.HoodieAccumulator;
|
||||
import org.apache.hudi.common.data.HoodieData;
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.common.engine.TaskContextSupplier;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.model.CompactionOperation;
|
||||
import org.apache.hudi.common.model.HoodieBaseFile;
|
||||
import org.apache.hudi.common.model.HoodieFileGroupId;
|
||||
import org.apache.hudi.common.model.HoodieLogFile;
|
||||
import org.apache.hudi.common.model.HoodieRecordPayload;
|
||||
import org.apache.hudi.common.model.HoodieTableType;
|
||||
import org.apache.hudi.common.model.HoodieWriteStat.RuntimeStats;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.TableSchemaResolver;
|
||||
import org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner;
|
||||
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.apache.hudi.common.table.view.TableFileSystemView.SliceView;
|
||||
import org.apache.hudi.common.util.CollectionUtils;
|
||||
import org.apache.hudi.common.util.CompactionUtils;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.common.util.ValidationUtils;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.io.IOUtils;
|
||||
import org.apache.hudi.table.HoodieCompactionHandler;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hudi.table.action.compact.strategy.CompactionStrategy;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.StreamSupport;
|
||||
|
||||
import static java.util.stream.Collectors.toList;
|
||||
|
||||
/**
|
||||
* A HoodieCompactor runs compaction on a hoodie table.
|
||||
*/
|
||||
public interface HoodieCompactor<T extends HoodieRecordPayload, I, K, O> extends Serializable {
|
||||
public abstract class HoodieCompactor<T extends HoodieRecordPayload, I, K, O> implements Serializable {
|
||||
|
||||
private static final Logger LOG = LogManager.getLogger(HoodieCompactor.class);
|
||||
|
||||
/**
|
||||
* Generate a new compaction plan for scheduling.
|
||||
* Handles the compaction timeline based on the compaction instant before actual compaction.
|
||||
*
|
||||
* @param context HoodieEngineContext
|
||||
* @param hoodieTable Hoodie Table
|
||||
* @param config Hoodie Write Configuration
|
||||
* @param compactionCommitTime scheduled compaction commit time
|
||||
* @param fgIdsInPendingCompactions partition-fileId pairs for which compaction is pending
|
||||
* @return Compaction Plan
|
||||
* @throws IOException when encountering errors
|
||||
* @param table {@link HoodieTable} instance to use.
|
||||
* @param pendingCompactionTimeline pending compaction timeline.
|
||||
* @param compactionInstantTime compaction instant
|
||||
*/
|
||||
HoodieCompactionPlan generateCompactionPlan(HoodieEngineContext context, HoodieTable<T, I, K, O> hoodieTable, HoodieWriteConfig config,
|
||||
String compactionCommitTime, Set<HoodieFileGroupId> fgIdsInPendingCompactions) throws IOException;
|
||||
public abstract void preCompact(
|
||||
HoodieTable table, HoodieTimeline pendingCompactionTimeline, String compactionInstantTime);
|
||||
|
||||
/**
|
||||
* Maybe persist write status.
|
||||
*
|
||||
* @param writeStatus {@link HoodieData} of {@link WriteStatus}.
|
||||
*/
|
||||
public abstract void maybePersist(HoodieData<WriteStatus> writeStatus, HoodieWriteConfig config);
|
||||
|
||||
/**
|
||||
* Execute compaction operations and report back status.
|
||||
*/
|
||||
O compact(HoodieEngineContext context, HoodieCompactionPlan compactionPlan, HoodieTable<T, I, K, O> hoodieTable,
|
||||
HoodieWriteConfig config, String compactionInstantTime) throws IOException;
|
||||
public HoodieData<WriteStatus> compact(
|
||||
HoodieEngineContext context, HoodieCompactionPlan compactionPlan,
|
||||
HoodieTable table, HoodieWriteConfig config, String compactionInstantTime,
|
||||
HoodieCompactionHandler compactionHandler) {
|
||||
if (compactionPlan == null || (compactionPlan.getOperations() == null)
|
||||
|| (compactionPlan.getOperations().isEmpty())) {
|
||||
return context.emptyHoodieData();
|
||||
}
|
||||
HoodieActiveTimeline timeline = table.getActiveTimeline();
|
||||
HoodieInstant instant = HoodieTimeline.getCompactionRequestedInstant(compactionInstantTime);
|
||||
// Mark instant as compaction inflight
|
||||
timeline.transitionCompactionRequestedToInflight(instant);
|
||||
table.getMetaClient().reloadActiveTimeline();
|
||||
|
||||
HoodieTableMetaClient metaClient = table.getMetaClient();
|
||||
TableSchemaResolver schemaUtil = new TableSchemaResolver(metaClient);
|
||||
|
||||
// Here we firstly use the table schema as the reader schema to read
|
||||
// log file.That is because in the case of MergeInto, the config.getSchema may not
|
||||
// the same with the table schema.
|
||||
try {
|
||||
Schema readerSchema = schemaUtil.getTableAvroSchema(false);
|
||||
config.setSchema(readerSchema.toString());
|
||||
} catch (Exception e) {
|
||||
// If there is no commit in the table, just ignore the exception.
|
||||
}
|
||||
|
||||
// Compacting is very similar to applying updates to existing file
|
||||
List<CompactionOperation> operations = compactionPlan.getOperations().stream()
|
||||
.map(CompactionOperation::convertFromAvroRecordInstance).collect(toList());
|
||||
LOG.info("Compactor compacting " + operations + " files");
|
||||
|
||||
context.setJobStatus(this.getClass().getSimpleName(), "Compacting file slices");
|
||||
TaskContextSupplier taskContextSupplier = table.getTaskContextSupplier();
|
||||
return context.parallelize(operations).map(operation -> compact(
|
||||
compactionHandler, metaClient, config, operation, compactionInstantTime, taskContextSupplier))
|
||||
.flatMap(List::iterator);
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute a single compaction operation and report back status.
|
||||
*/
|
||||
public List<WriteStatus> compact(HoodieCompactionHandler compactionHandler,
|
||||
HoodieTableMetaClient metaClient,
|
||||
HoodieWriteConfig config,
|
||||
CompactionOperation operation,
|
||||
String instantTime,
|
||||
TaskContextSupplier taskContextSupplier) throws IOException {
|
||||
FileSystem fs = metaClient.getFs();
|
||||
|
||||
Schema readerSchema = HoodieAvroUtils.addMetadataFields(
|
||||
new Schema.Parser().parse(config.getSchema()), config.allowOperationMetadataField());
|
||||
LOG.info("Compacting base " + operation.getDataFileName() + " with delta files " + operation.getDeltaFileNames()
|
||||
+ " for commit " + instantTime);
|
||||
// TODO - FIX THIS
|
||||
// Reads the entire avro file. Always only specific blocks should be read from the avro file
|
||||
// (failure recover).
|
||||
// Load all the delta commits since the last compaction commit and get all the blocks to be
|
||||
// loaded and load it using CompositeAvroLogReader
|
||||
// Since a DeltaCommit is not defined yet, reading all the records. revisit this soon.
|
||||
String maxInstantTime = metaClient
|
||||
.getActiveTimeline().getTimelineOfActions(CollectionUtils.createSet(HoodieTimeline.COMMIT_ACTION,
|
||||
HoodieTimeline.ROLLBACK_ACTION, HoodieTimeline.DELTA_COMMIT_ACTION))
|
||||
.filterCompletedInstants().lastInstant().get().getTimestamp();
|
||||
long maxMemoryPerCompaction = IOUtils.getMaxMemoryPerCompaction(taskContextSupplier, config);
|
||||
LOG.info("MaxMemoryPerCompaction => " + maxMemoryPerCompaction);
|
||||
|
||||
List<String> logFiles = operation.getDeltaFileNames().stream().map(
|
||||
p -> new Path(FSUtils.getPartitionPath(metaClient.getBasePath(), operation.getPartitionPath()), p).toString())
|
||||
.collect(toList());
|
||||
HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder()
|
||||
.withFileSystem(fs)
|
||||
.withBasePath(metaClient.getBasePath())
|
||||
.withLogFilePaths(logFiles)
|
||||
.withReaderSchema(readerSchema)
|
||||
.withLatestInstantTime(maxInstantTime)
|
||||
.withMaxMemorySizeInBytes(maxMemoryPerCompaction)
|
||||
.withReadBlocksLazily(config.getCompactionLazyBlockReadEnabled())
|
||||
.withReverseReader(config.getCompactionReverseLogReadEnabled())
|
||||
.withBufferSize(config.getMaxDFSStreamBufferSize())
|
||||
.withSpillableMapBasePath(config.getSpillableMapBasePath())
|
||||
.withDiskMapType(config.getCommonConfig().getSpillableDiskMapType())
|
||||
.withBitCaskDiskMapCompressionEnabled(config.getCommonConfig().isBitCaskDiskMapCompressionEnabled())
|
||||
.build();
|
||||
if (!scanner.iterator().hasNext()) {
|
||||
return new ArrayList<>();
|
||||
}
|
||||
|
||||
Option<HoodieBaseFile> oldDataFileOpt =
|
||||
operation.getBaseFile(metaClient.getBasePath(), operation.getPartitionPath());
|
||||
|
||||
// Compacting is very similar to applying updates to existing file
|
||||
Iterator<List<WriteStatus>> result;
|
||||
// If the dataFile is present, perform updates else perform inserts into a new base file.
|
||||
if (oldDataFileOpt.isPresent()) {
|
||||
result = compactionHandler.handleUpdate(instantTime, operation.getPartitionPath(),
|
||||
operation.getFileId(), scanner.getRecords(),
|
||||
oldDataFileOpt.get());
|
||||
} else {
|
||||
result = compactionHandler.handleInsert(instantTime, operation.getPartitionPath(), operation.getFileId(),
|
||||
scanner.getRecords());
|
||||
}
|
||||
Iterable<List<WriteStatus>> resultIterable = () -> result;
|
||||
return StreamSupport.stream(resultIterable.spliterator(), false).flatMap(Collection::stream).peek(s -> {
|
||||
s.getStat().setTotalUpdatedRecordsCompacted(scanner.getNumMergedRecordsInLog());
|
||||
s.getStat().setTotalLogFilesCompacted(scanner.getTotalLogFiles());
|
||||
s.getStat().setTotalLogRecords(scanner.getTotalLogRecords());
|
||||
s.getStat().setPartitionPath(operation.getPartitionPath());
|
||||
s.getStat()
|
||||
.setTotalLogSizeCompacted(operation.getMetrics().get(CompactionStrategy.TOTAL_LOG_FILE_SIZE).longValue());
|
||||
s.getStat().setTotalLogBlocks(scanner.getTotalLogBlocks());
|
||||
s.getStat().setTotalCorruptLogBlock(scanner.getTotalCorruptBlocks());
|
||||
s.getStat().setTotalRollbackBlocks(scanner.getTotalRollbacks());
|
||||
RuntimeStats runtimeStats = new RuntimeStats();
|
||||
runtimeStats.setTotalScanTime(scanner.getTotalTimeTakenToReadAndMergeBlocks());
|
||||
s.getStat().setRuntimeStats(runtimeStats);
|
||||
scanner.close();
|
||||
}).collect(toList());
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a new compaction plan for scheduling.
|
||||
*
|
||||
* @param context HoodieEngineContext
|
||||
* @param hoodieTable Hoodie Table
|
||||
* @param config Hoodie Write Configuration
|
||||
* @param compactionCommitTime scheduled compaction commit time
|
||||
* @param fgIdsInPendingCompactionAndClustering partition-fileId pairs for which compaction is pending
|
||||
* @return Compaction Plan
|
||||
* @throws IOException when encountering errors
|
||||
*/
|
||||
HoodieCompactionPlan generateCompactionPlan(
|
||||
HoodieEngineContext context, HoodieTable<T, I, K, O> hoodieTable, HoodieWriteConfig config,
|
||||
String compactionCommitTime, Set<HoodieFileGroupId> fgIdsInPendingCompactionAndClustering) throws IOException {
|
||||
// Accumulator to keep track of total log files for a table
|
||||
HoodieAccumulator totalLogFiles = context.newAccumulator();
|
||||
// Accumulator to keep track of total log file slices for a table
|
||||
HoodieAccumulator totalFileSlices = context.newAccumulator();
|
||||
|
||||
ValidationUtils.checkArgument(hoodieTable.getMetaClient().getTableType() == HoodieTableType.MERGE_ON_READ,
|
||||
"Can only compact table of type " + HoodieTableType.MERGE_ON_READ + " and not "
|
||||
+ hoodieTable.getMetaClient().getTableType().name());
|
||||
|
||||
// TODO : check if maxMemory is not greater than JVM or executor memory
|
||||
// TODO - rollback any compactions in flight
|
||||
HoodieTableMetaClient metaClient = hoodieTable.getMetaClient();
|
||||
LOG.info("Compacting " + metaClient.getBasePath() + " with commit " + compactionCommitTime);
|
||||
List<String> partitionPaths = FSUtils.getAllPartitionPaths(context, config.getMetadataConfig(), metaClient.getBasePath());
|
||||
|
||||
// filter the partition paths if needed to reduce list status
|
||||
partitionPaths = config.getCompactionStrategy().filterPartitionPaths(config, partitionPaths);
|
||||
|
||||
if (partitionPaths.isEmpty()) {
|
||||
// In case no partitions could be picked, return no compaction plan
|
||||
return null;
|
||||
}
|
||||
|
||||
SliceView fileSystemView = hoodieTable.getSliceView();
|
||||
LOG.info("Compaction looking for files to compact in " + partitionPaths + " partitions");
|
||||
context.setJobStatus(this.getClass().getSimpleName(), "Looking for files to compact");
|
||||
|
||||
List<HoodieCompactionOperation> operations = context.flatMap(partitionPaths, partitionPath -> fileSystemView
|
||||
.getLatestFileSlices(partitionPath)
|
||||
.filter(slice -> !fgIdsInPendingCompactionAndClustering.contains(slice.getFileGroupId()))
|
||||
.map(s -> {
|
||||
List<HoodieLogFile> logFiles =
|
||||
s.getLogFiles().sorted(HoodieLogFile.getLogFileComparator()).collect(toList());
|
||||
totalLogFiles.add(logFiles.size());
|
||||
totalFileSlices.add(1L);
|
||||
// Avro generated classes are not inheriting Serializable. Using CompactionOperation POJO
|
||||
// for Map operations and collecting them finally in Avro generated classes for storing
|
||||
// into meta files.
|
||||
Option<HoodieBaseFile> dataFile = s.getBaseFile();
|
||||
return new CompactionOperation(dataFile, partitionPath, logFiles,
|
||||
config.getCompactionStrategy().captureMetrics(config, s));
|
||||
})
|
||||
.filter(c -> !c.getDeltaFileNames().isEmpty()), partitionPaths.size()).stream()
|
||||
.map(CompactionUtils::buildHoodieCompactionOperation).collect(toList());
|
||||
|
||||
LOG.info("Total of " + operations.size() + " compactions are retrieved");
|
||||
LOG.info("Total number of latest files slices " + totalFileSlices.value());
|
||||
LOG.info("Total number of log files " + totalLogFiles.value());
|
||||
LOG.info("Total number of file slices " + totalFileSlices.value());
|
||||
// Filter the compactions with the passed in filter. This lets us choose most effective
|
||||
// compactions only
|
||||
HoodieCompactionPlan compactionPlan = config.getCompactionStrategy().generateCompactionPlan(config, operations,
|
||||
CompactionUtils.getAllPendingCompactionPlans(metaClient).stream().map(Pair::getValue).collect(toList()));
|
||||
ValidationUtils.checkArgument(
|
||||
compactionPlan.getOperations().stream().noneMatch(
|
||||
op -> fgIdsInPendingCompactionAndClustering.contains(new HoodieFileGroupId(op.getPartitionPath(), op.getFileId()))),
|
||||
"Bad Compaction Plan. FileId MUST NOT have multiple pending compactions. "
|
||||
+ "Please fix your strategy implementation. FileIdsWithPendingCompactions :" + fgIdsInPendingCompactionAndClustering
|
||||
+ ", Selected workload :" + compactionPlan);
|
||||
if (compactionPlan.getOperations().isEmpty()) {
|
||||
LOG.warn("After filtering, Nothing to compact for " + metaClient.getBasePath());
|
||||
}
|
||||
return compactionPlan;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,94 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.table.action.compact;
|
||||
|
||||
import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
||||
import org.apache.hudi.client.WriteStatus;
|
||||
import org.apache.hudi.common.data.HoodieData;
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.common.model.HoodieCommitMetadata;
|
||||
import org.apache.hudi.common.model.HoodieKey;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.model.HoodieRecordPayload;
|
||||
import org.apache.hudi.common.model.HoodieWriteStat;
|
||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.apache.hudi.common.util.CompactionUtils;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.HoodieCompactionException;
|
||||
import org.apache.hudi.table.HoodieCompactionHandler;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hudi.table.action.BaseActionExecutor;
|
||||
import org.apache.hudi.table.action.HoodieWriteMetadata;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
@SuppressWarnings("checkstyle:LineLength")
|
||||
public class RunCompactionActionExecutor<T extends HoodieRecordPayload> extends
|
||||
BaseActionExecutor<T, HoodieData<HoodieRecord<T>>, HoodieData<HoodieKey>, HoodieData<WriteStatus>, HoodieWriteMetadata<HoodieData<WriteStatus>>> {
|
||||
|
||||
private final HoodieCompactor compactor;
|
||||
private final HoodieCompactionHandler compactionHandler;
|
||||
|
||||
public RunCompactionActionExecutor(HoodieEngineContext context,
|
||||
HoodieWriteConfig config,
|
||||
HoodieTable table,
|
||||
String instantTime,
|
||||
HoodieCompactor compactor,
|
||||
HoodieCompactionHandler compactionHandler) {
|
||||
super(context, config, table, instantTime);
|
||||
this.compactor = compactor;
|
||||
this.compactionHandler = compactionHandler;
|
||||
}
|
||||
|
||||
@Override
|
||||
public HoodieWriteMetadata<HoodieData<WriteStatus>> execute() {
|
||||
HoodieTimeline pendingCompactionTimeline = table.getActiveTimeline().filterPendingCompactionTimeline();
|
||||
compactor.preCompact(table, pendingCompactionTimeline, instantTime);
|
||||
|
||||
HoodieWriteMetadata<HoodieData<WriteStatus>> compactionMetadata = new HoodieWriteMetadata<>();
|
||||
try {
|
||||
// generate compaction plan
|
||||
// should support configurable commit metadata
|
||||
HoodieCompactionPlan compactionPlan =
|
||||
CompactionUtils.getCompactionPlan(table.getMetaClient(), instantTime);
|
||||
|
||||
HoodieData<WriteStatus> statuses = compactor.compact(
|
||||
context, compactionPlan, table, config, instantTime, compactionHandler);
|
||||
|
||||
compactor.maybePersist(statuses, config);
|
||||
context.setJobStatus(this.getClass().getSimpleName(), "Preparing compaction metadata");
|
||||
List<HoodieWriteStat> updateStatusMap = statuses.map(WriteStatus::getStat).collectAsList();
|
||||
HoodieCommitMetadata metadata = new HoodieCommitMetadata(true);
|
||||
for (HoodieWriteStat stat : updateStatusMap) {
|
||||
metadata.addWriteStat(stat.getPartitionPath(), stat);
|
||||
}
|
||||
metadata.addMetadata(HoodieCommitMetadata.SCHEMA_KEY, config.getSchema());
|
||||
|
||||
compactionMetadata.setWriteStatuses(statuses);
|
||||
compactionMetadata.setCommitted(false);
|
||||
compactionMetadata.setCommitMetadata(Option.of(metadata));
|
||||
} catch (IOException e) {
|
||||
throw new HoodieCompactionException("Could not compact " + config.getBasePath(), e);
|
||||
}
|
||||
|
||||
return compactionMetadata;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,193 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.table.action.compact;
|
||||
|
||||
import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.common.model.HoodieFileGroupId;
|
||||
import org.apache.hudi.common.model.HoodieRecordPayload;
|
||||
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
|
||||
import org.apache.hudi.common.table.view.SyncableFileSystemView;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.common.util.ValidationUtils;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.HoodieCompactionException;
|
||||
import org.apache.hudi.exception.HoodieIOException;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hudi.table.action.BaseActionExecutor;
|
||||
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.text.ParseException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class ScheduleCompactionActionExecutor<T extends HoodieRecordPayload, I, K, O> extends BaseActionExecutor<T, I, K, O, Option<HoodieCompactionPlan>> {
|
||||
|
||||
private static final Logger LOG = LogManager.getLogger(ScheduleCompactionActionExecutor.class);
|
||||
|
||||
private final Option<Map<String, String>> extraMetadata;
|
||||
private final HoodieCompactor compactor;
|
||||
|
||||
public ScheduleCompactionActionExecutor(HoodieEngineContext context,
|
||||
HoodieWriteConfig config,
|
||||
HoodieTable<T, I, K, O> table,
|
||||
String instantTime,
|
||||
Option<Map<String, String>> extraMetadata,
|
||||
HoodieCompactor compactor) {
|
||||
super(context, config, table, instantTime);
|
||||
this.extraMetadata = extraMetadata;
|
||||
this.compactor = compactor;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Option<HoodieCompactionPlan> execute() {
|
||||
if (!config.getWriteConcurrencyMode().supportsOptimisticConcurrencyControl()
|
||||
&& !config.getFailedWritesCleanPolicy().isLazy()) {
|
||||
// if there are inflight writes, their instantTime must not be less than that of compaction instant time
|
||||
table.getActiveTimeline().getCommitsTimeline().filterPendingExcludingCompaction().firstInstant()
|
||||
.ifPresent(earliestInflight -> ValidationUtils.checkArgument(
|
||||
HoodieTimeline.compareTimestamps(earliestInflight.getTimestamp(), HoodieTimeline.GREATER_THAN, instantTime),
|
||||
"Earliest write inflight instant time must be later than compaction time. Earliest :" + earliestInflight
|
||||
+ ", Compaction scheduled at " + instantTime));
|
||||
// Committed and pending compaction instants should have strictly lower timestamps
|
||||
List<HoodieInstant> conflictingInstants = table.getActiveTimeline()
|
||||
.getWriteTimeline().filterCompletedAndCompactionInstants().getInstants()
|
||||
.filter(instant -> HoodieTimeline.compareTimestamps(
|
||||
instant.getTimestamp(), HoodieTimeline.GREATER_THAN_OR_EQUALS, instantTime))
|
||||
.collect(Collectors.toList());
|
||||
ValidationUtils.checkArgument(conflictingInstants.isEmpty(),
|
||||
"Following instants have timestamps >= compactionInstant (" + instantTime + ") Instants :"
|
||||
+ conflictingInstants);
|
||||
}
|
||||
|
||||
HoodieCompactionPlan plan = scheduleCompaction();
|
||||
if (plan != null && (plan.getOperations() != null) && (!plan.getOperations().isEmpty())) {
|
||||
extraMetadata.ifPresent(plan::setExtraMetadata);
|
||||
HoodieInstant compactionInstant =
|
||||
new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, instantTime);
|
||||
try {
|
||||
table.getActiveTimeline().saveToCompactionRequested(compactionInstant,
|
||||
TimelineMetadataUtils.serializeCompactionPlan(plan));
|
||||
} catch (IOException ioe) {
|
||||
throw new HoodieIOException("Exception scheduling compaction", ioe);
|
||||
}
|
||||
return Option.of(plan);
|
||||
}
|
||||
return Option.empty();
|
||||
}
|
||||
|
||||
private HoodieCompactionPlan scheduleCompaction() {
|
||||
LOG.info("Checking if compaction needs to be run on " + config.getBasePath());
|
||||
// judge if we need to compact according to num delta commits and time elapsed
|
||||
boolean compactable = needCompact(config.getInlineCompactTriggerStrategy());
|
||||
if (compactable) {
|
||||
LOG.info("Generating compaction plan for merge on read table " + config.getBasePath());
|
||||
try {
|
||||
SyncableFileSystemView fileSystemView = (SyncableFileSystemView) table.getSliceView();
|
||||
Set<HoodieFileGroupId> fgInPendingCompactionAndClustering = fileSystemView.getPendingCompactionOperations()
|
||||
.map(instantTimeOpPair -> instantTimeOpPair.getValue().getFileGroupId())
|
||||
.collect(Collectors.toSet());
|
||||
// exclude files in pending clustering from compaction.
|
||||
fgInPendingCompactionAndClustering.addAll(fileSystemView.getFileGroupsInPendingClustering().map(Pair::getLeft).collect(Collectors.toSet()));
|
||||
return compactor.generateCompactionPlan(context, table, config, instantTime, fgInPendingCompactionAndClustering);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieCompactionException("Could not schedule compaction " + config.getBasePath(), e);
|
||||
}
|
||||
}
|
||||
|
||||
return new HoodieCompactionPlan();
|
||||
}
|
||||
|
||||
private Pair<Integer, String> getLatestDeltaCommitInfo(CompactionTriggerStrategy compactionTriggerStrategy) {
|
||||
Option<HoodieInstant> lastCompaction = table.getActiveTimeline().getCommitTimeline()
|
||||
.filterCompletedInstants().lastInstant();
|
||||
HoodieTimeline deltaCommits = table.getActiveTimeline().getDeltaCommitTimeline();
|
||||
|
||||
String latestInstantTs;
|
||||
int deltaCommitsSinceLastCompaction = 0;
|
||||
if (lastCompaction.isPresent()) {
|
||||
latestInstantTs = lastCompaction.get().getTimestamp();
|
||||
deltaCommitsSinceLastCompaction = deltaCommits.findInstantsAfter(latestInstantTs, Integer.MAX_VALUE).countInstants();
|
||||
} else {
|
||||
latestInstantTs = deltaCommits.firstInstant().get().getTimestamp();
|
||||
deltaCommitsSinceLastCompaction = deltaCommits.findInstantsAfterOrEquals(latestInstantTs, Integer.MAX_VALUE).countInstants();
|
||||
}
|
||||
return Pair.of(deltaCommitsSinceLastCompaction, latestInstantTs);
|
||||
}
|
||||
|
||||
private boolean needCompact(CompactionTriggerStrategy compactionTriggerStrategy) {
|
||||
boolean compactable;
|
||||
// get deltaCommitsSinceLastCompaction and lastCompactionTs
|
||||
Pair<Integer, String> latestDeltaCommitInfo = getLatestDeltaCommitInfo(compactionTriggerStrategy);
|
||||
int inlineCompactDeltaCommitMax = config.getInlineCompactDeltaCommitMax();
|
||||
int inlineCompactDeltaSecondsMax = config.getInlineCompactDeltaSecondsMax();
|
||||
switch (compactionTriggerStrategy) {
|
||||
case NUM_COMMITS:
|
||||
compactable = inlineCompactDeltaCommitMax <= latestDeltaCommitInfo.getLeft();
|
||||
if (compactable) {
|
||||
LOG.info(String.format("The delta commits >= %s, trigger compaction scheduler.", inlineCompactDeltaCommitMax));
|
||||
}
|
||||
break;
|
||||
case TIME_ELAPSED:
|
||||
compactable = inlineCompactDeltaSecondsMax <= parsedToSeconds(instantTime) - parsedToSeconds(latestDeltaCommitInfo.getRight());
|
||||
if (compactable) {
|
||||
LOG.info(String.format("The elapsed time >=%ss, trigger compaction scheduler.", inlineCompactDeltaSecondsMax));
|
||||
}
|
||||
break;
|
||||
case NUM_OR_TIME:
|
||||
compactable = inlineCompactDeltaCommitMax <= latestDeltaCommitInfo.getLeft()
|
||||
|| inlineCompactDeltaSecondsMax <= parsedToSeconds(instantTime) - parsedToSeconds(latestDeltaCommitInfo.getRight());
|
||||
if (compactable) {
|
||||
LOG.info(String.format("The delta commits >= %s or elapsed_time >=%ss, trigger compaction scheduler.", inlineCompactDeltaCommitMax,
|
||||
inlineCompactDeltaSecondsMax));
|
||||
}
|
||||
break;
|
||||
case NUM_AND_TIME:
|
||||
compactable = inlineCompactDeltaCommitMax <= latestDeltaCommitInfo.getLeft()
|
||||
&& inlineCompactDeltaSecondsMax <= parsedToSeconds(instantTime) - parsedToSeconds(latestDeltaCommitInfo.getRight());
|
||||
if (compactable) {
|
||||
LOG.info(String.format("The delta commits >= %s and elapsed_time >=%ss, trigger compaction scheduler.", inlineCompactDeltaCommitMax,
|
||||
inlineCompactDeltaSecondsMax));
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw new HoodieCompactionException("Unsupported compaction trigger strategy: " + config.getInlineCompactTriggerStrategy());
|
||||
}
|
||||
return compactable;
|
||||
}
|
||||
|
||||
private Long parsedToSeconds(String time) {
|
||||
long timestamp;
|
||||
try {
|
||||
timestamp = HoodieActiveTimeline.COMMIT_FORMATTER.parse(time).getTime() / 1000;
|
||||
} catch (ParseException e) {
|
||||
throw new HoodieCompactionException(e.getMessage(), e);
|
||||
}
|
||||
return timestamp;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user