1
0

[HUDI-1098] Adding OptimisticConsistencyGuard to be used during FinalizeWrite (#1912)

This commit is contained in:
Sivabalan Narayanan
2020-08-09 20:51:37 -04:00
committed by GitHub
parent ff53e8f0b6
commit 858eda85d7
6 changed files with 349 additions and 100 deletions

View File

@@ -36,15 +36,23 @@ public class ConsistencyGuardConfig extends DefaultHoodieConfig {
// time between successive attempts to ensure written data's metadata is consistent on storage
private static final String INITIAL_CONSISTENCY_CHECK_INTERVAL_MS_PROP =
"hoodie.consistency.check.initial_interval_ms";
private static long DEFAULT_INITIAL_CONSISTENCY_CHECK_INTERVAL_MS = 2000L;
private static long DEFAULT_INITIAL_CONSISTENCY_CHECK_INTERVAL_MS = 400L;
// max interval time
private static final String MAX_CONSISTENCY_CHECK_INTERVAL_MS_PROP = "hoodie.consistency.check.max_interval_ms";
private static long DEFAULT_MAX_CONSISTENCY_CHECK_INTERVAL_MS = 300000L;
private static long DEFAULT_MAX_CONSISTENCY_CHECK_INTERVAL_MS = 20000L;
// maximum number of checks, for consistency of written data. Will wait upto 256 Secs
// maximum number of checks, for consistency of written data. Will wait upto 140 Secs
private static final String MAX_CONSISTENCY_CHECKS_PROP = "hoodie.consistency.check.max_checks";
private static int DEFAULT_MAX_CONSISTENCY_CHECKS = 7;
private static int DEFAULT_MAX_CONSISTENCY_CHECKS = 6;
// sleep time for OptimisticConsistencyGuard
private static final String OPTIMISTIC_CONSISTENCY_GUARD_SLEEP_TIME_MS_PROP = "hoodie.optimistic.consistency.guard.sleep_time_ms";
private static long DEFAULT_OPTIMISTIC_CONSISTENCY_GUARD_SLEEP_TIME_MS_PROP = 500L;
// config to enable OptimisticConsistencyGuard in finalizeWrite instead of FailSafeConsistencyGuard
private static final String ENABLE_OPTIMISTIC_CONSISTENCY_GUARD = "_hoodie.optimistic.consistency.guard.enable";
private static boolean DEFAULT_ENABLE_OPTIMISTIC_CONSISTENCY_GUARD = true;
public ConsistencyGuardConfig(Properties props) {
super(props);
@@ -70,6 +78,14 @@ public class ConsistencyGuardConfig extends DefaultHoodieConfig {
return Integer.parseInt(props.getProperty(MAX_CONSISTENCY_CHECK_INTERVAL_MS_PROP));
}
public long getOptimisticConsistencyGuardSleepTimeMs() {
return Long.parseLong(props.getProperty(OPTIMISTIC_CONSISTENCY_GUARD_SLEEP_TIME_MS_PROP));
}
public boolean shouldEnableOptimisticConsistencyGuard() {
return Boolean.parseBoolean(props.getProperty(ENABLE_OPTIMISTIC_CONSISTENCY_GUARD));
}
/**
* The builder used to build consistency configurations.
*/
@@ -109,6 +125,16 @@ public class ConsistencyGuardConfig extends DefaultHoodieConfig {
return this;
}
public Builder withOptimisticConsistencyGuardSleepTimeMs(long sleepTimeMs) {
props.setProperty(OPTIMISTIC_CONSISTENCY_GUARD_SLEEP_TIME_MS_PROP, String.valueOf(sleepTimeMs));
return this;
}
public Builder withEnableOptimisticConsistencyGuard(boolean enableOptimisticConsistencyGuard) {
props.setProperty(ENABLE_OPTIMISTIC_CONSISTENCY_GUARD, String.valueOf(enableOptimisticConsistencyGuard));
return this;
}
public ConsistencyGuardConfig build() {
setDefaultOnCondition(props, !props.containsKey(CONSISTENCY_CHECK_ENABLED_PROP), CONSISTENCY_CHECK_ENABLED_PROP,
DEFAULT_CONSISTENCY_CHECK_ENABLED);
@@ -118,7 +144,11 @@ public class ConsistencyGuardConfig extends DefaultHoodieConfig {
MAX_CONSISTENCY_CHECK_INTERVAL_MS_PROP, String.valueOf(DEFAULT_MAX_CONSISTENCY_CHECK_INTERVAL_MS));
setDefaultOnCondition(props, !props.containsKey(MAX_CONSISTENCY_CHECKS_PROP), MAX_CONSISTENCY_CHECKS_PROP,
String.valueOf(DEFAULT_MAX_CONSISTENCY_CHECKS));
setDefaultOnCondition(props, !props.containsKey(OPTIMISTIC_CONSISTENCY_GUARD_SLEEP_TIME_MS_PROP),
OPTIMISTIC_CONSISTENCY_GUARD_SLEEP_TIME_MS_PROP, String.valueOf(DEFAULT_OPTIMISTIC_CONSISTENCY_GUARD_SLEEP_TIME_MS_PROP));
setDefaultOnCondition(props, !props.containsKey(ENABLE_OPTIMISTIC_CONSISTENCY_GUARD),
ENABLE_OPTIMISTIC_CONSISTENCY_GUARD,
String.valueOf(DEFAULT_ENABLE_OPTIMISTIC_CONSISTENCY_GUARD));
return new ConsistencyGuardConfig(props);
}
}

View File

@@ -32,7 +32,6 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.TimeoutException;
import java.util.function.Function;
import java.util.stream.Collectors;
/**
@@ -42,8 +41,8 @@ public class FailSafeConsistencyGuard implements ConsistencyGuard {
private static final Logger LOG = LogManager.getLogger(FailSafeConsistencyGuard.class);
private final FileSystem fs;
private final ConsistencyGuardConfig consistencyGuardConfig;
protected final FileSystem fs;
protected final ConsistencyGuardConfig consistencyGuardConfig;
public FailSafeConsistencyGuard(FileSystem fs, ConsistencyGuardConfig consistencyGuardConfig) {
this.fs = fs;
@@ -73,7 +72,7 @@ public class FailSafeConsistencyGuard implements ConsistencyGuard {
/**
* Helper function to wait for all files belonging to single directory to appear.
*
*
* @param dirPath Dir Path
* @param files Files to appear/disappear
* @param event Appear/Disappear
@@ -81,45 +80,19 @@ public class FailSafeConsistencyGuard implements ConsistencyGuard {
*/
public void waitForFilesVisibility(String dirPath, List<String> files, FileVisibility event) throws TimeoutException {
Path dir = new Path(dirPath);
List<String> filesWithoutSchemeAndAuthority =
files.stream().map(f -> Path.getPathWithoutSchemeAndAuthority(new Path(f))).map(Path::toString)
.collect(Collectors.toList());
retryTillSuccess((retryNum) -> {
try {
LOG.info("Trying " + retryNum);
FileStatus[] entries = fs.listStatus(dir);
List<String> gotFiles = Arrays.stream(entries).map(e -> Path.getPathWithoutSchemeAndAuthority(e.getPath()))
.map(Path::toString).collect(Collectors.toList());
List<String> candidateFiles = new ArrayList<>(filesWithoutSchemeAndAuthority);
boolean altered = candidateFiles.removeAll(gotFiles);
switch (event) {
case DISAPPEAR:
LOG.info("Following files are visible" + candidateFiles);
// If no candidate files gets removed, it means all of them have disappeared
return !altered;
case APPEAR:
default:
// if all files appear, the list is empty
return candidateFiles.isEmpty();
}
} catch (IOException ioe) {
LOG.warn("Got IOException waiting for file event. Have tried " + retryNum + " time(s)", ioe);
}
return false;
}, "Timed out waiting for files to become visible");
List<String> filesWithoutSchemeAndAuthority = getFilesWithoutSchemeAndAuthority(files);
retryTillSuccess(dir, filesWithoutSchemeAndAuthority, event);
}
/**
* Helper to check of file visibility.
*
*
* @param filePath File Path
* @param visibility Visibility
* @return true (if file visible in Path), false (otherwise)
* @throws IOException -
*/
private boolean checkFileVisibility(Path filePath, FileVisibility visibility) throws IOException {
protected boolean checkFileVisibility(Path filePath, FileVisibility visibility) throws IOException {
try {
FileStatus status = fs.getFileStatus(filePath);
switch (visibility) {
@@ -142,10 +115,8 @@ public class FailSafeConsistencyGuard implements ConsistencyGuard {
/**
* Helper function to wait till file either appears/disappears.
*
*
* @param filePath File Path
* @param visibility
* @throws TimeoutException
*/
private void waitForFileVisibility(Path filePath, FileVisibility visibility) throws TimeoutException {
long waitMs = consistencyGuardConfig.getInitialConsistencyCheckIntervalMs();
@@ -169,17 +140,18 @@ public class FailSafeConsistencyGuard implements ConsistencyGuard {
/**
* Retries the predicate for condfigurable number of times till we the predicate returns success.
*
* @param predicate Predicate Function
* @param timedOutMessage Timed-Out message for logging
*
* @param dir directory of interest in which list of files are checked for visibility
* @param files List of files to check for visibility
* @param event {@link org.apache.hudi.common.fs.ConsistencyGuard.FileVisibility} event of interest.
* @throws TimeoutException when retries are exhausted
*/
private void retryTillSuccess(Function<Integer, Boolean> predicate, String timedOutMessage) throws TimeoutException {
private void retryTillSuccess(Path dir, List<String> files, FileVisibility event) throws TimeoutException {
long waitMs = consistencyGuardConfig.getInitialConsistencyCheckIntervalMs();
int attempt = 0;
LOG.info("Max Attempts=" + consistencyGuardConfig.getMaxConsistencyChecks());
while (attempt < consistencyGuardConfig.getMaxConsistencyChecks()) {
boolean success = predicate.apply(attempt);
boolean success = checkFilesVisibility(attempt, dir, files, event);
if (success) {
return;
}
@@ -188,11 +160,55 @@ public class FailSafeConsistencyGuard implements ConsistencyGuard {
waitMs = Math.min(waitMs, consistencyGuardConfig.getMaxConsistencyCheckIntervalMs());
attempt++;
}
throw new TimeoutException(timedOutMessage);
throw new TimeoutException("Timed out waiting for files to adhere to event " + event.name());
}
void sleepSafe(long waitMs) {
/**
* Helper to check for file visibility based on {@link org.apache.hudi.common.fs.ConsistencyGuard.FileVisibility} event.
*
* @param retryNum retry attempt count.
* @param dir directory of interest in which list of files are checked for visibility
* @param files List of files to check for visibility
* @param event {@link org.apache.hudi.common.fs.ConsistencyGuard.FileVisibility} event of interest.
* @return {@code true} if condition succeeded. else {@code false}.
*/
protected boolean checkFilesVisibility(int retryNum, Path dir, List<String> files, FileVisibility event) {
try {
LOG.info("Trying " + retryNum);
FileStatus[] entries = fs.listStatus(dir);
List<String> gotFiles = Arrays.stream(entries).map(e -> Path.getPathWithoutSchemeAndAuthority(e.getPath()))
.map(Path::toString).collect(Collectors.toList());
List<String> candidateFiles = new ArrayList<>(files);
boolean altered = candidateFiles.removeAll(gotFiles);
switch (event) {
case DISAPPEAR:
LOG.info("Following files are visible" + candidateFiles);
// If no candidate files gets removed, it means all of them have disappeared
return !altered;
case APPEAR:
default:
// if all files appear, the list is empty
return candidateFiles.isEmpty();
}
} catch (IOException ioe) {
LOG.warn("Got IOException waiting for file event. Have tried " + retryNum + " time(s)", ioe);
}
return false;
}
/**
* Generate file names without scheme and authority.
*
* @param files list of files of interest.
* @return the filenames without scheme and authority.
*/
protected List<String> getFilesWithoutSchemeAndAuthority(List<String> files) {
return files.stream().map(f -> Path.getPathWithoutSchemeAndAuthority(new Path(f))).map(Path::toString)
.collect(Collectors.toList());
}
private void sleepSafe(long waitMs) {
try {
Thread.sleep(waitMs);
} catch (InterruptedException e) {

View File

@@ -0,0 +1,90 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.common.fs;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import java.io.IOException;
import java.util.List;
import java.util.concurrent.TimeoutException;
/**
* A consistency guard which sleeps for configured period of time only on APPEAR. It is a no-op for DISAPPEAR.
* This is specifically for S3A filesystem and here is the rational.
* This guard is used when deleting data files corresponding to marker files that needs to be deleted.
* There are two tricky cases that needs to be considered. Case 1 : A data file creation is eventually consistent and hence
* when issuing deletes, it may not be found. Case 2: a data file was never created in the first place since the process crashed.
* In S3A, GET and LIST are eventually consistent, and delete() implementation internally does a LIST/EXISTS.
* Prior to this patch, hudi was leveraging {@link FailSafeConsistencyGuard} which was doing the following to delete data files.
* Step1: wait for all files to appear with linear backoff.
* Step2: issue deletes
* Step3: wait for all files to disappear with linear backoff.
* Step1 and Step2 is handled by {@link FailSafeConsistencyGuard}.
*
* We are simplifying these steps with {@link OptimisticConsistencyGuard}.
* Step1: Check if all files adhere to visibility event. If yes, proceed to Sptep 3.
* Step2: If not, Sleep for a configured threshold and then proceed to next step.
* Step3: issue deletes.
*
* With this, if any files that was created, should be available within configured threshold(eventual consistency).
* Delete() will return false if FileNotFound. So, both cases are taken care of this {@link ConsistencyGuard}.
*/
public class OptimisticConsistencyGuard extends FailSafeConsistencyGuard {
private static final Logger LOG = LogManager.getLogger(OptimisticConsistencyGuard.class);
public OptimisticConsistencyGuard(FileSystem fs, ConsistencyGuardConfig consistencyGuardConfig) {
super(fs, consistencyGuardConfig);
}
@Override
public void waitTillFileAppears(Path filePath) throws TimeoutException {
try {
if (!checkFileVisibility(filePath, FileVisibility.APPEAR)) {
Thread.sleep(consistencyGuardConfig.getOptimisticConsistencyGuardSleepTimeMs());
}
} catch (IOException | InterruptedException ioe) {
LOG.warn("Got IOException or InterruptedException waiting for file visibility. Ignoring", ioe);
}
}
@Override
public void waitTillFileDisappears(Path filePath) throws TimeoutException {
// no op
}
@Override
public void waitTillAllFilesAppear(String dirPath, List<String> files) throws TimeoutException {
try {
if (!checkFilesVisibility(1, new Path(dirPath), getFilesWithoutSchemeAndAuthority(files), FileVisibility.APPEAR)) {
Thread.sleep(consistencyGuardConfig.getOptimisticConsistencyGuardSleepTimeMs());
}
} catch (InterruptedException ie) {
LOG.warn("Got InterruptedException waiting for file visibility. Ignoring", ie);
}
}
@Override
public void waitTillAllFilesDisappear(String dirPath, List<String> files) throws TimeoutException {
// no op
}
}