1
0

[HUDI-1576] Make archiving an async service (#4795)

This commit is contained in:
Raymond Xu
2022-02-14 18:15:06 -08:00
committed by GitHub
parent 3b401d839c
commit 27bd7b538e
15 changed files with 327 additions and 127 deletions

View File

@@ -0,0 +1,85 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hudi.async;
import org.apache.hudi.client.BaseHoodieWriteClient;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieException;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
/**
* Async archive service to run concurrently with write operation.
*/
public class AsyncArchiveService extends HoodieAsyncService {
private static final Logger LOG = LogManager.getLogger(AsyncArchiveService.class);
private final BaseHoodieWriteClient writeClient;
private final transient ExecutorService executor = Executors.newSingleThreadExecutor();
protected AsyncArchiveService(BaseHoodieWriteClient writeClient) {
this.writeClient = writeClient;
}
@Override
protected Pair<CompletableFuture, ExecutorService> startService() {
LOG.info("Starting async archive service...");
return Pair.of(CompletableFuture.supplyAsync(() -> {
writeClient.archive();
return true;
}, executor), executor);
}
public static AsyncArchiveService startAsyncArchiveIfEnabled(BaseHoodieWriteClient writeClient) {
HoodieWriteConfig config = writeClient.getConfig();
if (!config.isAutoArchive() || !config.isAsyncArchive()) {
LOG.info("The HoodieWriteClient is not configured to auto & async archive. Async archive service will not start.");
return null;
}
AsyncArchiveService asyncArchiveService = new AsyncArchiveService(writeClient);
asyncArchiveService.start(null);
return asyncArchiveService;
}
public static void waitForCompletion(AsyncArchiveService asyncArchiveService) {
if (asyncArchiveService != null) {
LOG.info("Waiting for async archive service to finish");
try {
asyncArchiveService.waitForShutdown();
} catch (Exception e) {
throw new HoodieException("Error waiting for async archive service to finish", e);
}
}
}
public static void forceShutdown(AsyncArchiveService asyncArchiveService) {
if (asyncArchiveService != null) {
LOG.info("Shutting down async archive service...");
asyncArchiveService.shutdown(true);
}
}
}

View File

@@ -7,21 +7,24 @@
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hudi.client;
package org.apache.hudi.async;
import org.apache.hudi.async.HoodieAsyncService;
import org.apache.hudi.client.BaseHoodieWriteClient;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieException;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
@@ -30,9 +33,9 @@ import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
/**
* Clean service running concurrently with write operation.
* Async clean service to run concurrently with write operation.
*/
class AsyncCleanerService extends HoodieAsyncService {
public class AsyncCleanerService extends HoodieAsyncService {
private static final Logger LOG = LogManager.getLogger(AsyncCleanerService.class);
@@ -46,7 +49,7 @@ class AsyncCleanerService extends HoodieAsyncService {
@Override
protected Pair<CompletableFuture, ExecutorService> startService() {
String instantTime = HoodieActiveTimeline.createNewInstantTime();
LOG.info("Auto cleaning is enabled. Running cleaner async to write operation at instant time " + instantTime);
LOG.info(String.format("Starting async clean service with instant time %s...", instantTime));
return Pair.of(CompletableFuture.supplyAsync(() -> {
writeClient.clean(instantTime);
return true;
@@ -54,30 +57,30 @@ class AsyncCleanerService extends HoodieAsyncService {
}
public static AsyncCleanerService startAsyncCleaningIfEnabled(BaseHoodieWriteClient writeClient) {
AsyncCleanerService asyncCleanerService = null;
if (writeClient.getConfig().isAutoClean() && writeClient.getConfig().isAsyncClean()) {
asyncCleanerService = new AsyncCleanerService(writeClient);
asyncCleanerService.start(null);
} else {
LOG.info("Async auto cleaning is not enabled. Not running cleaner now");
HoodieWriteConfig config = writeClient.getConfig();
if (!config.isAutoClean() || !config.isAsyncClean()) {
LOG.info("The HoodieWriteClient is not configured to auto & async clean. Async clean service will not start.");
return null;
}
AsyncCleanerService asyncCleanerService = new AsyncCleanerService(writeClient);
asyncCleanerService.start(null);
return asyncCleanerService;
}
public static void waitForCompletion(AsyncCleanerService asyncCleanerService) {
if (asyncCleanerService != null) {
LOG.info("Waiting for async cleaner to finish");
LOG.info("Waiting for async clean service to finish");
try {
asyncCleanerService.waitForShutdown();
} catch (Exception e) {
throw new HoodieException("Error waiting for async cleaning to finish", e);
throw new HoodieException("Error waiting for async clean service to finish", e);
}
}
}
public static void forceShutdown(AsyncCleanerService asyncCleanerService) {
if (asyncCleanerService != null) {
LOG.info("Shutting down async cleaner");
LOG.info("Shutting down async clean service...");
asyncCleanerService.shutdown(true);
}
}

View File

@@ -36,7 +36,7 @@ import java.util.concurrent.locks.ReentrantLock;
import java.util.function.Function;
/**
* Base Class for running clean/delta-sync/compaction/clustering in separate thread and controlling their life-cycle.
* Base Class for running archive/clean/delta-sync/compaction/clustering in separate thread and controlling their life-cycles.
*/
public abstract class HoodieAsyncService implements Serializable {
@@ -70,11 +70,15 @@ public abstract class HoodieAsyncService implements Serializable {
this.runInDaemonMode = runInDaemonMode;
}
protected boolean isShutdownRequested() {
public boolean isStarted() {
return started;
}
public boolean isShutdownRequested() {
return shutdownRequested;
}
protected boolean isShutdown() {
public boolean isShutdown() {
return shutdown;
}
@@ -138,8 +142,6 @@ public abstract class HoodieAsyncService implements Serializable {
/**
* Service implementation.
*
* @return
*/
protected abstract Pair<CompletableFuture, ExecutorService> startService();

View File

@@ -18,6 +18,8 @@
package org.apache.hudi.client;
import org.apache.hudi.async.AsyncArchiveService;
import org.apache.hudi.async.AsyncCleanerService;
import org.apache.hudi.avro.model.HoodieCleanMetadata;
import org.apache.hudi.avro.model.HoodieCleanerPlan;
import org.apache.hudi.avro.model.HoodieClusteringPlan;
@@ -67,7 +69,6 @@ import org.apache.hudi.metadata.HoodieTableMetadataWriter;
import org.apache.hudi.metrics.HoodieMetrics;
import org.apache.hudi.table.BulkInsertPartitioner;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.HoodieTimelineArchiveLog;
import org.apache.hudi.table.action.HoodieWriteMetadata;
import org.apache.hudi.table.action.rollback.RollbackUtils;
import org.apache.hudi.table.action.savepoint.SavepointHelpers;
@@ -115,6 +116,7 @@ public abstract class BaseHoodieWriteClient<T extends HoodieRecordPayload, I, K,
private transient WriteOperationType operationType;
private transient HoodieWriteCommitCallback commitCallback;
protected transient AsyncCleanerService asyncCleanerService;
protected transient AsyncArchiveService asyncArchiveService;
protected final TransactionManager txnManager;
protected Option<Pair<HoodieInstant, Map<String, String>>> lastCompletedTxnAndMetadata = Option.empty();
@@ -431,6 +433,11 @@ public abstract class BaseHoodieWriteClient<T extends HoodieRecordPayload, I, K,
} else {
this.asyncCleanerService.start(null);
}
if (null == this.asyncArchiveService) {
this.asyncArchiveService = AsyncArchiveService.startAsyncArchiveIfEnabled(this);
} else {
this.asyncArchiveService.start(null);
}
}
/**
@@ -456,9 +463,7 @@ public abstract class BaseHoodieWriteClient<T extends HoodieRecordPayload, I, K,
WriteMarkersFactory.get(config.getMarkersType(), table, instantTime)
.quietDeleteMarkerDir(context, config.getMarkersDeleteParallelism());
autoCleanOnCommit();
if (config.isAutoArchive()) {
archive(table);
}
autoArchiveOnCommit(table);
} finally {
this.heartbeatClient.stop(instantTime);
}
@@ -523,22 +528,34 @@ public abstract class BaseHoodieWriteClient<T extends HoodieRecordPayload, I, K,
});
}
/**
* Handle auto clean during commit.
*
*/
protected void autoCleanOnCommit() {
if (config.isAutoClean()) {
// Call clean to cleanup if there is anything to cleanup after the commit,
if (config.isAsyncClean()) {
LOG.info("Cleaner has been spawned already. Waiting for it to finish");
AsyncCleanerService.waitForCompletion(asyncCleanerService);
LOG.info("Cleaner has finished");
} else {
// Do not reuse instantTime for clean as metadata table requires all changes to have unique instant timestamps.
LOG.info("Auto cleaning is enabled. Running cleaner now");
clean(true);
}
if (!config.isAutoClean()) {
return;
}
if (config.isAsyncClean()) {
LOG.info("Async cleaner has been spawned. Waiting for it to finish");
AsyncCleanerService.waitForCompletion(asyncCleanerService);
LOG.info("Async cleaner has finished");
} else {
LOG.info("Start to clean synchronously.");
// Do not reuse instantTime for clean as metadata table requires all changes to have unique instant timestamps.
clean(true);
}
}
protected void autoArchiveOnCommit(HoodieTable<T, I, K, O> table) {
if (!config.isAutoArchive()) {
return;
}
if (config.isAsyncArchive()) {
LOG.info("Async archiver has been spawned. Waiting for it to finish");
AsyncArchiveService.waitForCompletion(asyncArchiveService);
LOG.info("Async archiver has finished");
} else {
LOG.info("Start to archive synchronously.");
archive(table);
}
}
@@ -784,8 +801,8 @@ public abstract class BaseHoodieWriteClient<T extends HoodieRecordPayload, I, K,
protected void archive(HoodieTable<T, I, K, O> table) {
try {
// We cannot have unbounded commit files. Archive commits if we have to archive
HoodieTimelineArchiveLog archiveLog = new HoodieTimelineArchiveLog(config, table);
archiveLog.archiveIfRequired(context);
HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(config, table);
archiver.archiveIfRequired(context);
} catch (IOException ioe) {
throw new HoodieIOException("Failed to archive", ioe);
}
@@ -1249,7 +1266,8 @@ public abstract class BaseHoodieWriteClient<T extends HoodieRecordPayload, I, K,
@Override
public void close() {
// release AsyncCleanerService
AsyncArchiveService.forceShutdown(asyncArchiveService);
asyncArchiveService = null;
AsyncCleanerService.forceShutdown(asyncCleanerService);
asyncCleanerService = null;
// Stop timeline-server if running

View File

@@ -7,16 +7,17 @@
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hudi.table;
package org.apache.hudi.client;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@@ -52,6 +53,7 @@ import org.apache.hudi.exception.HoodieCommitException;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.metadata.HoodieTableMetadata;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.marker.WriteMarkers;
import org.apache.hudi.table.marker.WriteMarkersFactory;
@@ -79,9 +81,9 @@ import static org.apache.hudi.common.table.timeline.HoodieTimeline.LESSER_THAN_O
/**
* Archiver to bound the growth of files under .hoodie meta path.
*/
public class HoodieTimelineArchiveLog<T extends HoodieAvroPayload, I, K, O> {
public class HoodieTimelineArchiver<T extends HoodieAvroPayload, I, K, O> {
private static final Logger LOG = LogManager.getLogger(HoodieTimelineArchiveLog.class);
private static final Logger LOG = LogManager.getLogger(HoodieTimelineArchiver.class);
private final Path archiveFilePath;
private final HoodieWriteConfig config;
@@ -91,7 +93,7 @@ public class HoodieTimelineArchiveLog<T extends HoodieAvroPayload, I, K, O> {
private final HoodieTable<T, I, K, O> table;
private final HoodieTableMetaClient metaClient;
public HoodieTimelineArchiveLog(HoodieWriteConfig config, HoodieTable<T, I, K, O> table) {
public HoodieTimelineArchiver(HoodieWriteConfig config, HoodieTable<T, I, K, O> table) {
this.config = config;
this.table = table;
this.metaClient = table.getMetaClient();

View File

@@ -50,13 +50,6 @@ import java.util.stream.Collectors;
+ "cleaning (reclamation of older/unused file groups/slices).")
public class HoodieCompactionConfig extends HoodieConfig {
public static final ConfigProperty<String> AUTO_CLEAN = ConfigProperty
.key("hoodie.clean.automatic")
.defaultValue("true")
.withDocumentation("When enabled, the cleaner table service is invoked immediately after each commit,"
+ " to delete older file slices. It's recommended to enable this, to ensure metadata and data storage"
+ " growth is bounded.");
public static final ConfigProperty<String> AUTO_ARCHIVE = ConfigProperty
.key("hoodie.archive.automatic")
.defaultValue("true")
@@ -64,6 +57,20 @@ public class HoodieCompactionConfig extends HoodieConfig {
+ " to archive commits if we cross a maximum value of commits."
+ " It's recommended to enable this, to ensure number of active commits is bounded.");
public static final ConfigProperty<String> ASYNC_ARCHIVE = ConfigProperty
.key("hoodie.archive.async")
.defaultValue("false")
.sinceVersion("0.11.0")
.withDocumentation("Only applies when " + AUTO_ARCHIVE.key() + " is turned on. "
+ "When turned on runs archiver async with writing, which can speed up overall write performance.");
public static final ConfigProperty<String> AUTO_CLEAN = ConfigProperty
.key("hoodie.clean.automatic")
.defaultValue("true")
.withDocumentation("When enabled, the cleaner table service is invoked immediately after each commit,"
+ " to delete older file slices. It's recommended to enable this, to ensure metadata and data storage"
+ " growth is bounded.");
public static final ConfigProperty<String> ASYNC_CLEAN = ConfigProperty
.key("hoodie.clean.async")
.defaultValue("false")
@@ -522,6 +529,16 @@ public class HoodieCompactionConfig extends HoodieConfig {
return this;
}
public Builder withAutoArchive(Boolean autoArchive) {
compactionConfig.setValue(AUTO_ARCHIVE, String.valueOf(autoArchive));
return this;
}
public Builder withAsyncArchive(Boolean asyncArchive) {
compactionConfig.setValue(ASYNC_ARCHIVE, String.valueOf(asyncArchive));
return this;
}
public Builder withAutoClean(Boolean autoClean) {
compactionConfig.setValue(AUTO_CLEAN, String.valueOf(autoClean));
return this;
@@ -532,11 +549,6 @@ public class HoodieCompactionConfig extends HoodieConfig {
return this;
}
public Builder withAutoArchive(Boolean autoArchive) {
compactionConfig.setValue(AUTO_ARCHIVE, String.valueOf(autoArchive));
return this;
}
public Builder withIncrementalCleaningMode(Boolean incrementalCleaningMode) {
compactionConfig.setValue(CLEANER_INCREMENTAL_MODE_ENABLE, String.valueOf(incrementalCleaningMode));
return this;

View File

@@ -1112,10 +1112,6 @@ public class HoodieWriteConfig extends HoodieConfig {
return getInt(HoodieCompactionConfig.CLEANER_PARALLELISM_VALUE);
}
public boolean isAutoClean() {
return getBoolean(HoodieCompactionConfig.AUTO_CLEAN);
}
public boolean getArchiveMergeEnable() {
return getBoolean(HoodieCompactionConfig.ARCHIVE_MERGE_ENABLE);
}
@@ -1128,6 +1124,14 @@ public class HoodieWriteConfig extends HoodieConfig {
return getBoolean(HoodieCompactionConfig.AUTO_ARCHIVE);
}
public boolean isAsyncArchive() {
return getBoolean(HoodieCompactionConfig.ASYNC_ARCHIVE);
}
public boolean isAutoClean() {
return getBoolean(HoodieCompactionConfig.AUTO_CLEAN);
}
public boolean isAsyncClean() {
return getBoolean(HoodieCompactionConfig.ASYNC_CLEAN);
}
@@ -1872,7 +1876,7 @@ public class HoodieWriteConfig extends HoodieConfig {
* @return True if any table services are configured to run inline, false otherwise.
*/
public Boolean areAnyTableServicesExecutedInline() {
return inlineClusteringEnabled() || inlineCompactionEnabled() || isAutoClean();
return inlineClusteringEnabled() || inlineCompactionEnabled() || isAutoClean() || isAutoArchive();
}
/**
@@ -1881,7 +1885,7 @@ public class HoodieWriteConfig extends HoodieConfig {
* @return True if any table services are configured to run async, false otherwise.
*/
public Boolean areAnyTableServicesAsync() {
return isAsyncClusteringEnabled() || !inlineCompactionEnabled() || isAsyncClean();
return isAsyncClusteringEnabled() || !inlineCompactionEnabled() || isAsyncClean() || isAsyncArchive();
}
public Boolean areAnyTableServicesScheduledInline() {

View File

@@ -0,0 +1,75 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hudi.async;
import org.apache.hudi.client.BaseHoodieWriteClient;
import org.apache.hudi.config.HoodieWriteConfig;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import java.util.concurrent.ExecutionException;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
@ExtendWith(MockitoExtension.class)
class TestAsyncArchiveService {
@Mock
BaseHoodieWriteClient writeClient;
@Mock
HoodieWriteConfig config;
@Test
void startAsyncArchiveReturnsNullWhenAutoArchiveDisabled() {
when(config.isAutoArchive()).thenReturn(false);
when(writeClient.getConfig()).thenReturn(config);
assertNull(AsyncArchiveService.startAsyncArchiveIfEnabled(writeClient));
}
@Test
void startAsyncArchiveReturnsNullWhenAsyncArchiveDisabled() {
when(config.isAutoArchive()).thenReturn(true);
when(config.isAsyncArchive()).thenReturn(false);
when(writeClient.getConfig()).thenReturn(config);
assertNull(AsyncArchiveService.startAsyncArchiveIfEnabled(writeClient));
}
@Test
void startAsyncArchiveIfEnabled() {
when(config.isAutoArchive()).thenReturn(true);
when(config.isAsyncArchive()).thenReturn(true);
when(writeClient.getConfig()).thenReturn(config);
assertNotNull(AsyncArchiveService.startAsyncArchiveIfEnabled(writeClient));
}
@Test
void startServiceShouldInvokeCallArchiveMethod() throws ExecutionException, InterruptedException {
AsyncArchiveService service = new AsyncArchiveService(writeClient);
assertEquals(true, service.startService().getLeft().get());
verify(writeClient).archive();
}
}