[HUDI-2683] Parallelize deleting archived hoodie commits (#3920)
Co-authored-by: yuezhang <yuezhang@freewheel.tv>
This commit is contained in:
@@ -119,6 +119,11 @@ public class HoodieCompactionConfig extends HoodieConfig {
|
||||
+ " keep the metadata overhead constant, even as the table size grows."
|
||||
+ "This config controls the maximum number of instants to retain in the active timeline. ");
|
||||
|
||||
public static final ConfigProperty<Integer> DELETE_ARCHIVED_INSTANT_PARALLELISM_VALUE = ConfigProperty
|
||||
.key("hoodie.archive.delete.parallelism")
|
||||
.defaultValue(100)
|
||||
.withDocumentation("Parallelism for deleting archived hoodie commits.");
|
||||
|
||||
public static final ConfigProperty<String> MIN_COMMITS_TO_KEEP = ConfigProperty
|
||||
.key("hoodie.keep.min.commits")
|
||||
.defaultValue("20")
|
||||
@@ -568,6 +573,11 @@ public class HoodieCompactionConfig extends HoodieConfig {
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withArchiveDeleteParallelism(int archiveDeleteParallelism) {
|
||||
compactionConfig.setValue(DELETE_ARCHIVED_INSTANT_PARALLELISM_VALUE, String.valueOf(archiveDeleteParallelism));
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withMaxDeltaSecondsBeforeCompaction(int maxDeltaSecondsBeforeCompaction) {
|
||||
compactionConfig.setValue(INLINE_COMPACT_TIME_DELTA_SECONDS, String.valueOf(maxDeltaSecondsBeforeCompaction));
|
||||
return this;
|
||||
|
||||
@@ -1135,6 +1135,10 @@ public class HoodieWriteConfig extends HoodieConfig {
|
||||
return getBoolean(HoodieCompactionConfig.COMPACTION_REVERSE_LOG_READ_ENABLE);
|
||||
}
|
||||
|
||||
public int getArchiveDeleteParallelism() {
|
||||
return getInt(HoodieCompactionConfig.DELETE_ARCHIVED_INSTANT_PARALLELISM_VALUE);
|
||||
}
|
||||
|
||||
public boolean inlineClusteringEnabled() {
|
||||
return getBoolean(HoodieClusteringConfig.INLINE_CLUSTERING);
|
||||
}
|
||||
|
||||
@@ -18,9 +18,11 @@
|
||||
|
||||
package org.apache.hudi.table;
|
||||
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hudi.avro.model.HoodieArchivedMetaEntry;
|
||||
import org.apache.hudi.client.utils.MetadataConversionUtils;
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.model.HoodieArchivedLogFile;
|
||||
import org.apache.hudi.common.model.HoodieAvroPayload;
|
||||
import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
|
||||
@@ -127,7 +129,7 @@ public class HoodieTimelineArchiveLog<T extends HoodieAvroPayload, I, K, O> {
|
||||
LOG.info("Archiving instants " + instantsToArchive);
|
||||
archive(context, instantsToArchive);
|
||||
LOG.info("Deleting archived instants " + instantsToArchive);
|
||||
success = deleteArchivedInstants(instantsToArchive);
|
||||
success = deleteArchivedInstants(instantsToArchive, context);
|
||||
} else {
|
||||
LOG.info("No Instants to archive");
|
||||
}
|
||||
@@ -224,19 +226,34 @@ public class HoodieTimelineArchiveLog<T extends HoodieAvroPayload, I, K, O> {
|
||||
HoodieInstant.getComparableAction(hoodieInstant.getAction()))).stream());
|
||||
}
|
||||
|
||||
private boolean deleteArchivedInstants(List<HoodieInstant> archivedInstants) throws IOException {
|
||||
private boolean deleteArchivedInstants(List<HoodieInstant> archivedInstants, HoodieEngineContext context) throws IOException {
|
||||
LOG.info("Deleting instants " + archivedInstants);
|
||||
boolean success = true;
|
||||
for (HoodieInstant archivedInstant : archivedInstants) {
|
||||
Path commitFile = new Path(metaClient.getMetaPath(), archivedInstant.getFileName());
|
||||
try {
|
||||
if (metaClient.getFs().exists(commitFile)) {
|
||||
success &= metaClient.getFs().delete(commitFile, false);
|
||||
LOG.info("Archived and deleted instant file " + commitFile);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Failed to delete archived instant " + archivedInstant, e);
|
||||
}
|
||||
List<String> instantFiles = archivedInstants.stream().map(archivedInstant -> {
|
||||
return new Path(metaClient.getMetaPath(), archivedInstant.getFileName());
|
||||
}).map(Path::toString).collect(Collectors.toList());
|
||||
|
||||
context.setJobStatus(this.getClass().getSimpleName(), "Delete archived instants");
|
||||
Map<String, Boolean> resultDeleteInstantFiles = FSUtils.parallelizeFilesProcess(context,
|
||||
metaClient.getFs(),
|
||||
config.getArchiveDeleteParallelism(),
|
||||
pairOfSubPathAndConf -> {
|
||||
Path commitFile = new Path(pairOfSubPathAndConf.getKey());
|
||||
try {
|
||||
FileSystem fs = commitFile.getFileSystem(pairOfSubPathAndConf.getValue().get());
|
||||
if (fs.exists(commitFile)) {
|
||||
return fs.delete(commitFile, false);
|
||||
}
|
||||
return true;
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Failed to delete archived instant " + commitFile, e);
|
||||
}
|
||||
},
|
||||
instantFiles);
|
||||
|
||||
for (Map.Entry<String, Boolean> result : resultDeleteInstantFiles.entrySet()) {
|
||||
LOG.info("Archived and deleted instant file " + result.getKey() + " : " + result.getValue());
|
||||
success &= result.getValue();
|
||||
}
|
||||
|
||||
// Remove older meta-data from auxiliary path too
|
||||
|
||||
Reference in New Issue
Block a user