[HUDI-3634] Could read empty or partial HoodieCommitMetaData in downstream if using HDFS (#5048)
Add the differentiated logic of creating immutable file in HDFS by first creating the file.tmp and then renaming the file
This commit is contained in:
@@ -21,6 +21,7 @@ package org.apache.hudi.common.fs;
|
|||||||
import org.apache.hudi.common.metrics.Registry;
|
import org.apache.hudi.common.metrics.Registry;
|
||||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||||
import org.apache.hudi.common.util.HoodieTimer;
|
import org.apache.hudi.common.util.HoodieTimer;
|
||||||
|
import org.apache.hudi.common.util.Option;
|
||||||
import org.apache.hudi.exception.HoodieException;
|
import org.apache.hudi.exception.HoodieException;
|
||||||
import org.apache.hudi.exception.HoodieIOException;
|
import org.apache.hudi.exception.HoodieIOException;
|
||||||
|
|
||||||
@@ -60,6 +61,8 @@ import java.util.concurrent.ConcurrentHashMap;
|
|||||||
import java.util.concurrent.ConcurrentMap;
|
import java.util.concurrent.ConcurrentMap;
|
||||||
import java.util.concurrent.TimeoutException;
|
import java.util.concurrent.TimeoutException;
|
||||||
|
|
||||||
|
import static org.apache.hudi.common.fs.StorageSchemes.HDFS;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* HoodieWrapperFileSystem wraps the default file system. It holds state about the open streams in the file system to
|
* HoodieWrapperFileSystem wraps the default file system. It holds state about the open streams in the file system to
|
||||||
* support getting the written size to each of the open streams.
|
* support getting the written size to each of the open streams.
|
||||||
@@ -68,6 +71,8 @@ public class HoodieWrapperFileSystem extends FileSystem {
|
|||||||
|
|
||||||
public static final String HOODIE_SCHEME_PREFIX = "hoodie-";
|
public static final String HOODIE_SCHEME_PREFIX = "hoodie-";
|
||||||
|
|
||||||
|
private static final String TMP_PATH_POSTFIX = ".tmp";
|
||||||
|
|
||||||
protected enum MetricName {
|
protected enum MetricName {
|
||||||
create, rename, delete, listStatus, mkdirs, getFileStatus, globStatus, listFiles, read, write
|
create, rename, delete, listStatus, mkdirs, getFileStatus, globStatus, listFiles, read, write
|
||||||
}
|
}
|
||||||
@@ -986,6 +991,65 @@ public class HoodieWrapperFileSystem extends FileSystem {
|
|||||||
file.toString() + " does not have a open stream. Cannot get the bytes written on the stream");
|
file.toString() + " does not have a open stream. Cannot get the bytes written on the stream");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected boolean needCreateTempFile() {
|
||||||
|
return HDFS.getScheme().equals(fileSystem.getScheme());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new file with overwrite set to false. This ensures files are created
|
||||||
|
* only once and never rewritten, also, here we take care if the content is not
|
||||||
|
* empty, will first write the content to a temp file if {needCreateTempFile} is
|
||||||
|
* true, and then rename it back after the content is written.
|
||||||
|
*
|
||||||
|
* @param fullPath File Path
|
||||||
|
* @param content Content to be stored
|
||||||
|
*/
|
||||||
|
public void createImmutableFileInPath(Path fullPath, Option<byte[]> content)
|
||||||
|
throws HoodieIOException {
|
||||||
|
FSDataOutputStream fsout = null;
|
||||||
|
Path tmpPath = null;
|
||||||
|
|
||||||
|
boolean needTempFile = needCreateTempFile();
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (!content.isPresent()) {
|
||||||
|
fsout = fileSystem.create(fullPath, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (content.isPresent() && needTempFile) {
|
||||||
|
Path parent = fullPath.getParent();
|
||||||
|
tmpPath = new Path(parent, fullPath.getName() + TMP_PATH_POSTFIX);
|
||||||
|
fsout = fileSystem.create(tmpPath, false);
|
||||||
|
fsout.write(content.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (content.isPresent() && !needTempFile) {
|
||||||
|
fsout = fileSystem.create(fullPath, false);
|
||||||
|
fsout.write(content.get());
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
String errorMsg = "Failed to create file" + (tmpPath != null ? tmpPath : fullPath);
|
||||||
|
throw new HoodieIOException(errorMsg, e);
|
||||||
|
} finally {
|
||||||
|
try {
|
||||||
|
if (null != fsout) {
|
||||||
|
fsout.close();
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
String errorMsg = "Failed to close file" + (needTempFile ? tmpPath : fullPath);
|
||||||
|
throw new HoodieIOException(errorMsg, e);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (null != tmpPath) {
|
||||||
|
fileSystem.rename(tmpPath, fullPath);
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new HoodieIOException("Failed to rename " + tmpPath + " to the target " + fullPath, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public FileSystem getFileSystem() {
|
public FileSystem getFileSystem() {
|
||||||
return fileSystem;
|
return fileSystem;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -30,7 +30,6 @@ import org.apache.hudi.common.util.collection.Pair;
|
|||||||
import org.apache.hudi.exception.HoodieIOException;
|
import org.apache.hudi.exception.HoodieIOException;
|
||||||
|
|
||||||
import org.apache.hadoop.fs.FSDataInputStream;
|
import org.apache.hadoop.fs.FSDataInputStream;
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.log4j.LogManager;
|
import org.apache.log4j.LogManager;
|
||||||
@@ -539,7 +538,7 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
|
|||||||
if (allowRedundantTransitions) {
|
if (allowRedundantTransitions) {
|
||||||
FileIOUtils.createFileInPath(metaClient.getFs(), getInstantFileNamePath(toInstant.getFileName()), data);
|
FileIOUtils.createFileInPath(metaClient.getFs(), getInstantFileNamePath(toInstant.getFileName()), data);
|
||||||
} else {
|
} else {
|
||||||
createImmutableFileInPath(getInstantFileNamePath(toInstant.getFileName()), data);
|
metaClient.getFs().createImmutableFileInPath(getInstantFileNamePath(toInstant.getFileName()), data);
|
||||||
}
|
}
|
||||||
LOG.info("Create new file for toInstant ?" + getInstantFileNamePath(toInstant.getFileName()));
|
LOG.info("Create new file for toInstant ?" + getInstantFileNamePath(toInstant.getFileName()));
|
||||||
}
|
}
|
||||||
@@ -706,33 +705,7 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
|
|||||||
if (allowOverwrite || metaClient.getTimelineLayoutVersion().isNullVersion()) {
|
if (allowOverwrite || metaClient.getTimelineLayoutVersion().isNullVersion()) {
|
||||||
FileIOUtils.createFileInPath(metaClient.getFs(), fullPath, content);
|
FileIOUtils.createFileInPath(metaClient.getFs(), fullPath, content);
|
||||||
} else {
|
} else {
|
||||||
createImmutableFileInPath(fullPath, content);
|
metaClient.getFs().createImmutableFileInPath(fullPath, content);
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates a new file in timeline with overwrite set to false. This ensures
|
|
||||||
* files are created only once and never rewritten
|
|
||||||
* @param fullPath File Path
|
|
||||||
* @param content Content to be stored
|
|
||||||
*/
|
|
||||||
private void createImmutableFileInPath(Path fullPath, Option<byte[]> content) {
|
|
||||||
FSDataOutputStream fsout = null;
|
|
||||||
try {
|
|
||||||
fsout = metaClient.getFs().create(fullPath, false);
|
|
||||||
if (content.isPresent()) {
|
|
||||||
fsout.write(content.get());
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new HoodieIOException("Failed to create file " + fullPath, e);
|
|
||||||
} finally {
|
|
||||||
try {
|
|
||||||
if (null != fsout) {
|
|
||||||
fsout.close();
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new HoodieIOException("Failed to close file " + fullPath, e);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -18,6 +18,8 @@
|
|||||||
|
|
||||||
package org.apache.hudi.common.table.timeline;
|
package org.apache.hudi.common.table.timeline;
|
||||||
|
|
||||||
|
import org.apache.hudi.common.fs.HoodieWrapperFileSystem;
|
||||||
|
import org.apache.hudi.common.fs.NoOpConsistencyGuard;
|
||||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||||
import org.apache.hudi.common.table.timeline.HoodieInstant.State;
|
import org.apache.hudi.common.table.timeline.HoodieInstant.State;
|
||||||
import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
|
import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
|
||||||
@@ -32,6 +34,7 @@ import org.junit.jupiter.api.BeforeEach;
|
|||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.text.ParseException;
|
import java.text.ParseException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
@@ -45,6 +48,7 @@ import java.util.concurrent.Executors;
|
|||||||
import java.util.concurrent.Future;
|
import java.util.concurrent.Future;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
import java.util.function.BiConsumer;
|
import java.util.function.BiConsumer;
|
||||||
|
import java.util.function.Consumer;
|
||||||
import java.util.function.Supplier;
|
import java.util.function.Supplier;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
@@ -199,6 +203,25 @@ public class TestHoodieActiveTimeline extends HoodieCommonTestHarness {
|
|||||||
assertTrue(activeCommitTimeline.isBeforeTimelineStarts("00"));
|
assertTrue(activeCommitTimeline.isBeforeTimelineStarts("00"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAllowTempCommit() {
|
||||||
|
shouldAllowTempCommit(true, hoodieMetaClient -> {
|
||||||
|
timeline = new HoodieActiveTimeline(hoodieMetaClient);
|
||||||
|
|
||||||
|
HoodieInstant instant1 = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, "1");
|
||||||
|
timeline.createNewInstant(instant1);
|
||||||
|
|
||||||
|
byte[] data = "commit".getBytes(StandardCharsets.UTF_8);
|
||||||
|
timeline.saveAsComplete(new HoodieInstant(true, instant1.getAction(),
|
||||||
|
instant1.getTimestamp()), Option.of(data));
|
||||||
|
|
||||||
|
timeline = timeline.reload();
|
||||||
|
|
||||||
|
assertTrue(timeline.getContiguousCompletedWriteTimeline().lastInstant().isPresent());
|
||||||
|
assertEquals(instant1.getTimestamp(), timeline.getContiguousCompletedWriteTimeline().lastInstant().get().getTimestamp());
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGetContiguousCompletedWriteTimeline() {
|
public void testGetContiguousCompletedWriteTimeline() {
|
||||||
// a mock timeline with holes
|
// a mock timeline with holes
|
||||||
@@ -594,4 +617,25 @@ public class TestHoodieActiveTimeline extends HoodieCommonTestHarness {
|
|||||||
}
|
}
|
||||||
return allInstants;
|
return allInstants;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void shouldAllowTempCommit(boolean allowTempCommit, Consumer<HoodieTableMetaClient> fun) {
|
||||||
|
if (allowTempCommit) {
|
||||||
|
HoodieWrapperFileSystem fs = metaClient.getFs();
|
||||||
|
HoodieWrapperFileSystem newFs = new HoodieWrapperFileSystem(fs.getFileSystem(), new NoOpConsistencyGuard()) {
|
||||||
|
@Override
|
||||||
|
protected boolean needCreateTempFile() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
metaClient.setFs(newFs);
|
||||||
|
try {
|
||||||
|
fun.accept(metaClient);
|
||||||
|
} finally {
|
||||||
|
metaClient.setFs(fs);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
fun.accept(metaClient);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user