[HUDI-296] Explore use of spotless to auto fix formatting errors (#945)
- Add spotless format fixing to project - One time reformatting for conformity - Build fails for formatting changes and mvn spotless:apply autofixes them
This commit is contained in:
@@ -35,8 +35,7 @@ public class HoodieAvroWriteSupport extends AvroWriteSupport {
|
||||
private String maxRecordKey;
|
||||
|
||||
|
||||
public static final String HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY =
|
||||
"org.apache.hudi.bloomfilter";
|
||||
public static final String HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY = "org.apache.hudi.bloomfilter";
|
||||
public static final String HOODIE_MIN_RECORD_KEY_FOOTER = "hoodie_min_record_key";
|
||||
public static final String HOODIE_MAX_RECORD_KEY_FOOTER = "hoodie_max_record_key";
|
||||
|
||||
@@ -50,8 +49,7 @@ public class HoodieAvroWriteSupport extends AvroWriteSupport {
|
||||
public WriteSupport.FinalizedWriteContext finalizeWrite() {
|
||||
HashMap<String, String> extraMetaData = new HashMap<>();
|
||||
if (bloomFilter != null) {
|
||||
extraMetaData
|
||||
.put(HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY, bloomFilter.serializeToString());
|
||||
extraMetaData.put(HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY, bloomFilter.serializeToString());
|
||||
if (minRecordKey != null && maxRecordKey != null) {
|
||||
extraMetaData.put(HOODIE_MIN_RECORD_KEY_FOOTER, minRecordKey);
|
||||
extraMetaData.put(HOODIE_MAX_RECORD_KEY_FOOTER, maxRecordKey);
|
||||
|
||||
@@ -30,8 +30,8 @@ import org.apache.avro.generic.GenericRecord;
|
||||
|
||||
/**
|
||||
* Marjority of this is copied from
|
||||
* https://github.com/jwills/avro-json/blob/master/src/main/java/com/cloudera/science/avro/
|
||||
* common/JsonConverter.java Adjusted for expected behavior of our use cases
|
||||
* https://github.com/jwills/avro-json/blob/master/src/main/java/com/cloudera/science/avro/ common/JsonConverter.java
|
||||
* Adjusted for expected behavior of our use cases
|
||||
*/
|
||||
public class MercifulJsonConverter {
|
||||
|
||||
@@ -51,8 +51,7 @@ public class MercifulJsonConverter {
|
||||
}
|
||||
}
|
||||
|
||||
private GenericRecord convert(Map<String, Object> raw, Schema schema)
|
||||
throws IOException {
|
||||
private GenericRecord convert(Map<String, Object> raw, Schema schema) throws IOException {
|
||||
GenericRecord result = new GenericData.Record(schema);
|
||||
for (Schema.Field f : schema.getFields()) {
|
||||
String name = f.name();
|
||||
@@ -128,17 +127,15 @@ public class MercifulJsonConverter {
|
||||
}
|
||||
return mapRes;
|
||||
default:
|
||||
throw new IllegalArgumentException(
|
||||
"JsonConverter cannot handle type: " + schema.getType());
|
||||
throw new IllegalArgumentException("JsonConverter cannot handle type: " + schema.getType());
|
||||
}
|
||||
throw new JsonConversionException(value, name, schema);
|
||||
}
|
||||
|
||||
private boolean isOptional(Schema schema) {
|
||||
return schema.getType().equals(Schema.Type.UNION)
|
||||
&& schema.getTypes().size() == 2
|
||||
return schema.getType().equals(Schema.Type.UNION) && schema.getTypes().size() == 2
|
||||
&& (schema.getTypes().get(0).getType().equals(Schema.Type.NULL)
|
||||
|| schema.getTypes().get(1).getType().equals(Schema.Type.NULL));
|
||||
|| schema.getTypes().get(1).getType().equals(Schema.Type.NULL));
|
||||
}
|
||||
|
||||
private Schema getNonNull(Schema schema) {
|
||||
@@ -160,8 +157,7 @@ public class MercifulJsonConverter {
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("Type conversion error for field %s, %s for %s",
|
||||
fieldName, value, schema);
|
||||
return String.format("Type conversion error for field %s, %s for %s", fieldName, value, schema);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -41,9 +41,8 @@ public class HoodieCleanStat implements Serializable {
|
||||
// Earliest commit that was retained in this clean
|
||||
private final String earliestCommitToRetain;
|
||||
|
||||
public HoodieCleanStat(HoodieCleaningPolicy policy, String partitionPath,
|
||||
List<String> deletePathPatterns, List<String> successDeleteFiles,
|
||||
List<String> failedDeleteFiles, String earliestCommitToRetain) {
|
||||
public HoodieCleanStat(HoodieCleaningPolicy policy, String partitionPath, List<String> deletePathPatterns,
|
||||
List<String> successDeleteFiles, List<String> failedDeleteFiles, String earliestCommitToRetain) {
|
||||
this.policy = policy;
|
||||
this.partitionPath = partitionPath;
|
||||
this.deletePathPatterns = deletePathPatterns;
|
||||
@@ -115,14 +114,14 @@ public class HoodieCleanStat implements Serializable {
|
||||
}
|
||||
|
||||
public Builder withEarliestCommitRetained(Option<HoodieInstant> earliestCommitToRetain) {
|
||||
this.earliestCommitToRetain = (earliestCommitToRetain.isPresent())
|
||||
? earliestCommitToRetain.get().getTimestamp() : "-1";
|
||||
this.earliestCommitToRetain =
|
||||
(earliestCommitToRetain.isPresent()) ? earliestCommitToRetain.get().getTimestamp() : "-1";
|
||||
return this;
|
||||
}
|
||||
|
||||
public HoodieCleanStat build() {
|
||||
return new HoodieCleanStat(policy, partitionPath, deletePathPatterns,
|
||||
successDeleteFiles, failedDeleteFiles, earliestCommitToRetain);
|
||||
return new HoodieCleanStat(policy, partitionPath, deletePathPatterns, successDeleteFiles, failedDeleteFiles,
|
||||
earliestCommitToRetain);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -50,8 +50,7 @@ public class HoodieJsonPayload implements HoodieRecordPayload<HoodieJsonPayload>
|
||||
}
|
||||
|
||||
@Override
|
||||
public Option<IndexedRecord> combineAndGetUpdateValue(IndexedRecord oldRec, Schema schema)
|
||||
throws IOException {
|
||||
public Option<IndexedRecord> combineAndGetUpdateValue(IndexedRecord oldRec, Schema schema) throws IOException {
|
||||
return getInsertValue(schema);
|
||||
}
|
||||
|
||||
@@ -68,8 +67,7 @@ public class HoodieJsonPayload implements HoodieRecordPayload<HoodieJsonPayload>
|
||||
private byte[] compressData(String jsonData) throws IOException {
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
Deflater deflater = new Deflater(Deflater.BEST_COMPRESSION);
|
||||
DeflaterOutputStream dos =
|
||||
new DeflaterOutputStream(baos, deflater, true);
|
||||
DeflaterOutputStream dos = new DeflaterOutputStream(baos, deflater, true);
|
||||
try {
|
||||
dos.write(jsonData.getBytes());
|
||||
} finally {
|
||||
|
||||
@@ -37,8 +37,8 @@ public class HoodieRollbackStat implements Serializable {
|
||||
// Count of HoodieLogFile to commandBlocks written for a particular rollback
|
||||
private final Map<FileStatus, Long> commandBlocksCount;
|
||||
|
||||
public HoodieRollbackStat(String partitionPath, List<String> successDeleteFiles,
|
||||
List<String> failedDeleteFiles, Map<FileStatus, Long> commandBlocksCount) {
|
||||
public HoodieRollbackStat(String partitionPath, List<String> successDeleteFiles, List<String> failedDeleteFiles,
|
||||
Map<FileStatus, Long> commandBlocksCount) {
|
||||
this.partitionPath = partitionPath;
|
||||
this.successDeleteFiles = successDeleteFiles;
|
||||
this.failedDeleteFiles = failedDeleteFiles;
|
||||
@@ -73,7 +73,7 @@ public class HoodieRollbackStat implements Serializable {
|
||||
private String partitionPath;
|
||||
|
||||
public Builder withDeletedFileResults(Map<FileStatus, Boolean> deletedFiles) {
|
||||
//noinspection Convert2MethodRef
|
||||
// noinspection Convert2MethodRef
|
||||
successDeleteFiles = deletedFiles.entrySet().stream().filter(s -> s.getValue())
|
||||
.map(s -> s.getKey().getPath().toString()).collect(Collectors.toList());
|
||||
failedDeleteFiles = deletedFiles.entrySet().stream().filter(s -> !s.getValue())
|
||||
@@ -92,8 +92,7 @@ public class HoodieRollbackStat implements Serializable {
|
||||
}
|
||||
|
||||
public HoodieRollbackStat build() {
|
||||
return new HoodieRollbackStat(partitionPath, successDeleteFiles, failedDeleteFiles,
|
||||
commandBlocksCount);
|
||||
return new HoodieRollbackStat(partitionPath, successDeleteFiles, failedDeleteFiles, commandBlocksCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -57,8 +57,7 @@ public class SerializableConfiguration implements Serializable {
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder str = new StringBuilder();
|
||||
configuration.iterator().forEachRemaining(e ->
|
||||
str.append(String.format("%s => %s \n", e.getKey(), e.getValue())));
|
||||
configuration.iterator().forEachRemaining(e -> str.append(String.format("%s => %s \n", e.getKey(), e.getValue())));
|
||||
return configuration.toString();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -59,21 +59,19 @@ import org.apache.hudi.exception.HoodieException;
|
||||
import org.apache.hudi.exception.HoodieIOException;
|
||||
|
||||
/**
|
||||
* HoodieWrapperFileSystem wraps the default file system. It holds state about the open streams in
|
||||
* the file system to support getting the written size to each of the open streams.
|
||||
* HoodieWrapperFileSystem wraps the default file system. It holds state about the open streams in the file system to
|
||||
* support getting the written size to each of the open streams.
|
||||
*/
|
||||
public class HoodieWrapperFileSystem extends FileSystem {
|
||||
|
||||
public static final String HOODIE_SCHEME_PREFIX = "hoodie-";
|
||||
|
||||
private ConcurrentMap<String, SizeAwareFSDataOutputStream> openStreams = new
|
||||
ConcurrentHashMap<>();
|
||||
private ConcurrentMap<String, SizeAwareFSDataOutputStream> openStreams = new ConcurrentHashMap<>();
|
||||
private FileSystem fileSystem;
|
||||
private URI uri;
|
||||
private ConsistencyGuard consistencyGuard = new NoOpConsistencyGuard();
|
||||
|
||||
public HoodieWrapperFileSystem() {
|
||||
}
|
||||
public HoodieWrapperFileSystem() {}
|
||||
|
||||
public HoodieWrapperFileSystem(FileSystem fileSystem, ConsistencyGuard consistencyGuard) {
|
||||
this.fileSystem = fileSystem;
|
||||
@@ -94,8 +92,8 @@ public class HoodieWrapperFileSystem extends FileSystem {
|
||||
URI oldURI = oldPath.toUri();
|
||||
URI newURI;
|
||||
try {
|
||||
newURI = new URI(newScheme, oldURI.getUserInfo(), oldURI.getHost(), oldURI.getPort(),
|
||||
oldURI.getPath(), oldURI.getQuery(), oldURI.getFragment());
|
||||
newURI = new URI(newScheme, oldURI.getUserInfo(), oldURI.getHost(), oldURI.getPort(), oldURI.getPath(),
|
||||
oldURI.getQuery(), oldURI.getFragment());
|
||||
return new Path(newURI);
|
||||
} catch (URISyntaxException e) {
|
||||
// TODO - Better Exception handling
|
||||
@@ -108,8 +106,7 @@ public class HoodieWrapperFileSystem extends FileSystem {
|
||||
if (StorageSchemes.isSchemeSupported(scheme)) {
|
||||
newScheme = HOODIE_SCHEME_PREFIX + scheme;
|
||||
} else {
|
||||
throw new IllegalArgumentException(
|
||||
"BlockAlignedAvroParquetWriter does not support scheme " + scheme);
|
||||
throw new IllegalArgumentException("BlockAlignedAvroParquetWriter does not support scheme " + scheme);
|
||||
}
|
||||
return newScheme;
|
||||
}
|
||||
@@ -143,22 +140,21 @@ public class HoodieWrapperFileSystem extends FileSystem {
|
||||
}
|
||||
|
||||
@Override
|
||||
public FSDataOutputStream create(Path f, FsPermission permission, boolean overwrite,
|
||||
int bufferSize, short replication, long blockSize, Progressable progress) throws IOException {
|
||||
public FSDataOutputStream create(Path f, FsPermission permission, boolean overwrite, int bufferSize,
|
||||
short replication, long blockSize, Progressable progress) throws IOException {
|
||||
final Path translatedPath = convertToDefaultPath(f);
|
||||
return wrapOutputStream(f, fileSystem
|
||||
.create(translatedPath, permission, overwrite, bufferSize, replication, blockSize,
|
||||
progress));
|
||||
return wrapOutputStream(f,
|
||||
fileSystem.create(translatedPath, permission, overwrite, bufferSize, replication, blockSize, progress));
|
||||
}
|
||||
|
||||
private FSDataOutputStream wrapOutputStream(final Path path,
|
||||
FSDataOutputStream fsDataOutputStream) throws IOException {
|
||||
private FSDataOutputStream wrapOutputStream(final Path path, FSDataOutputStream fsDataOutputStream)
|
||||
throws IOException {
|
||||
if (fsDataOutputStream instanceof SizeAwareFSDataOutputStream) {
|
||||
return fsDataOutputStream;
|
||||
}
|
||||
|
||||
SizeAwareFSDataOutputStream os = new SizeAwareFSDataOutputStream(path,
|
||||
fsDataOutputStream, consistencyGuard, () -> openStreams.remove(path.getName()));
|
||||
SizeAwareFSDataOutputStream os = new SizeAwareFSDataOutputStream(path, fsDataOutputStream, consistencyGuard,
|
||||
() -> openStreams.remove(path.getName()));
|
||||
openStreams.put(path.getName(), os);
|
||||
return os;
|
||||
}
|
||||
@@ -184,8 +180,7 @@ public class HoodieWrapperFileSystem extends FileSystem {
|
||||
}
|
||||
|
||||
@Override
|
||||
public FSDataOutputStream create(Path f, short replication, Progressable progress)
|
||||
throws IOException {
|
||||
public FSDataOutputStream create(Path f, short replication, Progressable progress) throws IOException {
|
||||
return wrapOutputStream(f, fileSystem.create(convertToDefaultPath(f), replication, progress));
|
||||
}
|
||||
|
||||
@@ -201,39 +196,35 @@ public class HoodieWrapperFileSystem extends FileSystem {
|
||||
}
|
||||
|
||||
@Override
|
||||
public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize, short replication,
|
||||
long blockSize, Progressable progress) throws IOException {
|
||||
return wrapOutputStream(f, fileSystem
|
||||
.create(convertToDefaultPath(f), overwrite, bufferSize, replication, blockSize, progress));
|
||||
public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize, short replication, long blockSize,
|
||||
Progressable progress) throws IOException {
|
||||
return wrapOutputStream(f,
|
||||
fileSystem.create(convertToDefaultPath(f), overwrite, bufferSize, replication, blockSize, progress));
|
||||
}
|
||||
|
||||
@Override
|
||||
public FSDataOutputStream create(Path f, FsPermission permission, EnumSet<CreateFlag> flags,
|
||||
int bufferSize, short replication, long blockSize, Progressable progress) throws IOException {
|
||||
return wrapOutputStream(f, fileSystem
|
||||
.create(convertToDefaultPath(f), permission, flags, bufferSize, replication, blockSize,
|
||||
progress));
|
||||
public FSDataOutputStream create(Path f, FsPermission permission, EnumSet<CreateFlag> flags, int bufferSize,
|
||||
short replication, long blockSize, Progressable progress) throws IOException {
|
||||
return wrapOutputStream(f,
|
||||
fileSystem.create(convertToDefaultPath(f), permission, flags, bufferSize, replication, blockSize, progress));
|
||||
}
|
||||
|
||||
@Override
|
||||
public FSDataOutputStream create(Path f, FsPermission permission, EnumSet<CreateFlag> flags,
|
||||
int bufferSize, short replication, long blockSize, Progressable progress,
|
||||
Options.ChecksumOpt checksumOpt) throws IOException {
|
||||
return wrapOutputStream(f, fileSystem
|
||||
.create(convertToDefaultPath(f), permission, flags, bufferSize, replication, blockSize,
|
||||
progress, checksumOpt));
|
||||
public FSDataOutputStream create(Path f, FsPermission permission, EnumSet<CreateFlag> flags, int bufferSize,
|
||||
short replication, long blockSize, Progressable progress, Options.ChecksumOpt checksumOpt) throws IOException {
|
||||
return wrapOutputStream(f, fileSystem.create(convertToDefaultPath(f), permission, flags, bufferSize, replication,
|
||||
blockSize, progress, checksumOpt));
|
||||
}
|
||||
|
||||
@Override
|
||||
public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize, short replication,
|
||||
long blockSize) throws IOException {
|
||||
return wrapOutputStream(f, fileSystem
|
||||
.create(convertToDefaultPath(f), overwrite, bufferSize, replication, blockSize));
|
||||
}
|
||||
|
||||
@Override
|
||||
public FSDataOutputStream append(Path f, int bufferSize, Progressable progress)
|
||||
public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize, short replication, long blockSize)
|
||||
throws IOException {
|
||||
return wrapOutputStream(f,
|
||||
fileSystem.create(convertToDefaultPath(f), overwrite, bufferSize, replication, blockSize));
|
||||
}
|
||||
|
||||
@Override
|
||||
public FSDataOutputStream append(Path f, int bufferSize, Progressable progress) throws IOException {
|
||||
return wrapOutputStream(f, fileSystem.append(convertToDefaultPath(f), bufferSize, progress));
|
||||
}
|
||||
|
||||
@@ -341,8 +332,7 @@ public class HoodieWrapperFileSystem extends FileSystem {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Token<?>[] addDelegationTokens(String renewer, Credentials credentials)
|
||||
throws IOException {
|
||||
public Token<?>[] addDelegationTokens(String renewer, Credentials credentials) throws IOException {
|
||||
return fileSystem.addDelegationTokens(renewer, credentials);
|
||||
}
|
||||
|
||||
@@ -352,8 +342,7 @@ public class HoodieWrapperFileSystem extends FileSystem {
|
||||
}
|
||||
|
||||
@Override
|
||||
public BlockLocation[] getFileBlockLocations(FileStatus file, long start, long len)
|
||||
throws IOException {
|
||||
public BlockLocation[] getFileBlockLocations(FileStatus file, long start, long len) throws IOException {
|
||||
return fileSystem.getFileBlockLocations(file, start, len);
|
||||
}
|
||||
|
||||
@@ -383,28 +372,27 @@ public class HoodieWrapperFileSystem extends FileSystem {
|
||||
}
|
||||
|
||||
@Override
|
||||
public FSDataOutputStream createNonRecursive(Path f, boolean overwrite, int bufferSize,
|
||||
public FSDataOutputStream createNonRecursive(Path f, boolean overwrite, int bufferSize, short replication,
|
||||
long blockSize, Progressable progress) throws IOException {
|
||||
Path p = convertToDefaultPath(f);
|
||||
return wrapOutputStream(p,
|
||||
fileSystem.createNonRecursive(p, overwrite, bufferSize, replication, blockSize, progress));
|
||||
}
|
||||
|
||||
@Override
|
||||
public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, boolean overwrite, int bufferSize,
|
||||
short replication, long blockSize, Progressable progress) throws IOException {
|
||||
Path p = convertToDefaultPath(f);
|
||||
return wrapOutputStream(p, fileSystem.createNonRecursive(p, overwrite, bufferSize, replication, blockSize,
|
||||
progress));
|
||||
return wrapOutputStream(p,
|
||||
fileSystem.createNonRecursive(p, permission, overwrite, bufferSize, replication, blockSize, progress));
|
||||
}
|
||||
|
||||
@Override
|
||||
public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, boolean overwrite,
|
||||
public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, EnumSet<CreateFlag> flags,
|
||||
int bufferSize, short replication, long blockSize, Progressable progress) throws IOException {
|
||||
Path p = convertToDefaultPath(f);
|
||||
return wrapOutputStream(p, fileSystem.createNonRecursive(p, permission, overwrite, bufferSize, replication,
|
||||
blockSize, progress));
|
||||
}
|
||||
|
||||
@Override
|
||||
public FSDataOutputStream createNonRecursive(Path f, FsPermission permission,
|
||||
EnumSet<CreateFlag> flags, int bufferSize, short replication, long blockSize,
|
||||
Progressable progress) throws IOException {
|
||||
Path p = convertToDefaultPath(f);
|
||||
return wrapOutputStream(p, fileSystem.createNonRecursive(p, permission, flags, bufferSize, replication,
|
||||
blockSize, progress));
|
||||
return wrapOutputStream(p,
|
||||
fileSystem.createNonRecursive(p, permission, flags, bufferSize, replication, blockSize, progress));
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -590,10 +578,8 @@ public class HoodieWrapperFileSystem extends FileSystem {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copyFromLocalFile(boolean delSrc, boolean overwrite, Path[] srcs, Path dst)
|
||||
throws IOException {
|
||||
fileSystem
|
||||
.copyFromLocalFile(delSrc, overwrite, convertLocalPaths(srcs), convertToDefaultPath(dst));
|
||||
public void copyFromLocalFile(boolean delSrc, boolean overwrite, Path[] srcs, Path dst) throws IOException {
|
||||
fileSystem.copyFromLocalFile(delSrc, overwrite, convertLocalPaths(srcs), convertToDefaultPath(dst));
|
||||
try {
|
||||
consistencyGuard.waitTillFileAppears(convertToDefaultPath(dst));
|
||||
} catch (TimeoutException e) {
|
||||
@@ -602,10 +588,8 @@ public class HoodieWrapperFileSystem extends FileSystem {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copyFromLocalFile(boolean delSrc, boolean overwrite, Path src, Path dst)
|
||||
throws IOException {
|
||||
fileSystem
|
||||
.copyFromLocalFile(delSrc, overwrite, convertToLocalPath(src), convertToDefaultPath(dst));
|
||||
public void copyFromLocalFile(boolean delSrc, boolean overwrite, Path src, Path dst) throws IOException {
|
||||
fileSystem.copyFromLocalFile(delSrc, overwrite, convertToLocalPath(src), convertToDefaultPath(dst));
|
||||
try {
|
||||
consistencyGuard.waitTillFileAppears(convertToDefaultPath(dst));
|
||||
} catch (TimeoutException e) {
|
||||
@@ -629,22 +613,19 @@ public class HoodieWrapperFileSystem extends FileSystem {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copyToLocalFile(boolean delSrc, Path src, Path dst, boolean useRawLocalFileSystem)
|
||||
throws IOException {
|
||||
fileSystem.copyToLocalFile(delSrc, convertToDefaultPath(src), convertToLocalPath(dst),
|
||||
useRawLocalFileSystem);
|
||||
public void copyToLocalFile(boolean delSrc, Path src, Path dst, boolean useRawLocalFileSystem) throws IOException {
|
||||
fileSystem.copyToLocalFile(delSrc, convertToDefaultPath(src), convertToLocalPath(dst), useRawLocalFileSystem);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile) throws IOException {
|
||||
return convertToHoodiePath(fileSystem
|
||||
.startLocalOutput(convertToDefaultPath(fsOutputFile), convertToDefaultPath(tmpLocalFile)));
|
||||
return convertToHoodiePath(
|
||||
fileSystem.startLocalOutput(convertToDefaultPath(fsOutputFile), convertToDefaultPath(tmpLocalFile)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile) throws IOException {
|
||||
fileSystem.completeLocalOutput(convertToDefaultPath(fsOutputFile),
|
||||
convertToDefaultPath(tmpLocalFile));
|
||||
fileSystem.completeLocalOutput(convertToDefaultPath(fsOutputFile), convertToDefaultPath(tmpLocalFile));
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -691,8 +672,7 @@ public class HoodieWrapperFileSystem extends FileSystem {
|
||||
|
||||
@Override
|
||||
public void createSymlink(Path target, Path link, boolean createParent) throws IOException {
|
||||
fileSystem
|
||||
.createSymlink(convertToDefaultPath(target), convertToDefaultPath(link), createParent);
|
||||
fileSystem.createSymlink(convertToDefaultPath(target), convertToDefaultPath(link), createParent);
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -761,8 +741,7 @@ public class HoodieWrapperFileSystem extends FileSystem {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void renameSnapshot(Path path, String snapshotOldName, String snapshotNewName)
|
||||
throws IOException {
|
||||
public void renameSnapshot(Path path, String snapshotOldName, String snapshotNewName) throws IOException {
|
||||
fileSystem.renameSnapshot(convertToDefaultPath(path), snapshotOldName, snapshotNewName);
|
||||
}
|
||||
|
||||
@@ -807,8 +786,7 @@ public class HoodieWrapperFileSystem extends FileSystem {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setXAttr(Path path, String name, byte[] value, EnumSet<XAttrSetFlag> flag)
|
||||
throws IOException {
|
||||
public void setXAttr(Path path, String name, byte[] value, EnumSet<XAttrSetFlag> flag) throws IOException {
|
||||
fileSystem.setXAttr(convertToDefaultPath(path), name, value, flag);
|
||||
}
|
||||
|
||||
@@ -899,8 +877,8 @@ public class HoodieWrapperFileSystem extends FileSystem {
|
||||
return openStreams.get(file.getName()).getBytesWritten();
|
||||
}
|
||||
// When the file is first written, we do not have a track of it
|
||||
throw new IllegalArgumentException(file.toString()
|
||||
+ " does not have a open stream. Cannot get the bytes written on the stream");
|
||||
throw new IllegalArgumentException(
|
||||
file.toString() + " does not have a open stream. Cannot get the bytes written on the stream");
|
||||
}
|
||||
|
||||
public FileSystem getFileSystem() {
|
||||
|
||||
@@ -27,8 +27,8 @@ import org.apache.hudi.common.util.ConsistencyGuard;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
|
||||
/**
|
||||
* Wrapper over <code>FSDataOutputStream</code> to keep track of the size of the written bytes. This
|
||||
* gives a cheap way to check on the underlying file size.
|
||||
* Wrapper over <code>FSDataOutputStream</code> to keep track of the size of the written bytes. This gives a cheap way
|
||||
* to check on the underlying file size.
|
||||
*/
|
||||
public class SizeAwareFSDataOutputStream extends FSDataOutputStream {
|
||||
|
||||
@@ -41,8 +41,8 @@ public class SizeAwareFSDataOutputStream extends FSDataOutputStream {
|
||||
// Consistency guard
|
||||
private final ConsistencyGuard consistencyGuard;
|
||||
|
||||
public SizeAwareFSDataOutputStream(Path path, FSDataOutputStream out,
|
||||
ConsistencyGuard consistencyGuard, Runnable closeCallback) throws IOException {
|
||||
public SizeAwareFSDataOutputStream(Path path, FSDataOutputStream out, ConsistencyGuard consistencyGuard,
|
||||
Runnable closeCallback) throws IOException {
|
||||
super(out);
|
||||
this.path = path;
|
||||
this.closeCallback = closeCallback;
|
||||
|
||||
@@ -31,8 +31,8 @@ import org.apache.hudi.common.util.FSUtils;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
|
||||
/**
|
||||
* Encapsulates all the needed information about a compaction and make a decision whether this
|
||||
* compaction is effective or not
|
||||
* Encapsulates all the needed information about a compaction and make a decision whether this compaction is effective
|
||||
* or not
|
||||
*
|
||||
*/
|
||||
public class CompactionOperation implements Serializable {
|
||||
@@ -44,10 +44,9 @@ public class CompactionOperation implements Serializable {
|
||||
private HoodieFileGroupId id;
|
||||
private Map<String, Double> metrics;
|
||||
|
||||
//Only for serialization/de-serialization
|
||||
// Only for serialization/de-serialization
|
||||
@Deprecated
|
||||
public CompactionOperation() {
|
||||
}
|
||||
public CompactionOperation() {}
|
||||
|
||||
public CompactionOperation(String fileId, String partitionPath, String baseInstantTime,
|
||||
Option<String> dataFileCommitTime, List<String> deltaFilePaths, Option<String> dataFilePath,
|
||||
@@ -60,8 +59,8 @@ public class CompactionOperation implements Serializable {
|
||||
this.metrics = metrics;
|
||||
}
|
||||
|
||||
public CompactionOperation(Option<HoodieDataFile> dataFile, String partitionPath,
|
||||
List<HoodieLogFile> logFiles, Map<String, Double> metrics) {
|
||||
public CompactionOperation(Option<HoodieDataFile> dataFile, String partitionPath, List<HoodieLogFile> logFiles,
|
||||
Map<String, Double> metrics) {
|
||||
if (dataFile.isPresent()) {
|
||||
this.baseInstantTime = dataFile.get().getCommitTime();
|
||||
this.dataFilePath = Option.of(dataFile.get().getPath());
|
||||
@@ -75,8 +74,7 @@ public class CompactionOperation implements Serializable {
|
||||
this.dataFileCommitTime = Option.empty();
|
||||
}
|
||||
|
||||
this.deltaFilePaths = logFiles.stream().map(s -> s.getPath().toString())
|
||||
.collect(Collectors.toList());
|
||||
this.deltaFilePaths = logFiles.stream().map(s -> s.getPath().toString()).collect(Collectors.toList());
|
||||
this.metrics = metrics;
|
||||
}
|
||||
|
||||
@@ -113,12 +111,13 @@ public class CompactionOperation implements Serializable {
|
||||
}
|
||||
|
||||
public Option<HoodieDataFile> getBaseFile() {
|
||||
//TODO: HUDI-130 - Paths return in compaction plan needs to be relative to base-path
|
||||
// TODO: HUDI-130 - Paths return in compaction plan needs to be relative to base-path
|
||||
return dataFilePath.map(df -> new HoodieDataFile(df));
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert Avro generated Compaction operation to POJO for Spark RDD operation
|
||||
*
|
||||
* @param operation Hoodie Compaction Operation
|
||||
* @return
|
||||
*/
|
||||
@@ -126,8 +125,7 @@ public class CompactionOperation implements Serializable {
|
||||
CompactionOperation op = new CompactionOperation();
|
||||
op.baseInstantTime = operation.getBaseInstantTime();
|
||||
op.dataFilePath = Option.ofNullable(operation.getDataFilePath());
|
||||
op.dataFileCommitTime =
|
||||
op.dataFilePath.map(p -> FSUtils.getCommitTime(new Path(p).getName()));
|
||||
op.dataFileCommitTime = op.dataFilePath.map(p -> FSUtils.getCommitTime(new Path(p).getName()));
|
||||
op.deltaFilePaths = new ArrayList<>(operation.getDeltaFilePaths());
|
||||
op.id = new HoodieFileGroupId(operation.getPartitionPath(), operation.getFileId());
|
||||
op.metrics = operation.getMetrics() == null ? new HashMap<>() : new HashMap<>(operation.getMetrics());
|
||||
@@ -136,14 +134,9 @@ public class CompactionOperation implements Serializable {
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "CompactionOperation{"
|
||||
+ "baseInstantTime='" + baseInstantTime + '\''
|
||||
+ ", dataFileCommitTime=" + dataFileCommitTime
|
||||
+ ", deltaFilePaths=" + deltaFilePaths
|
||||
+ ", dataFilePath=" + dataFilePath
|
||||
+ ", id='" + id + '\''
|
||||
+ ", metrics=" + metrics
|
||||
+ '}';
|
||||
return "CompactionOperation{" + "baseInstantTime='" + baseInstantTime + '\'' + ", dataFileCommitTime="
|
||||
+ dataFileCommitTime + ", deltaFilePaths=" + deltaFilePaths + ", dataFilePath=" + dataFilePath + ", id='" + id
|
||||
+ '\'' + ", metrics=" + metrics + '}';
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -158,8 +151,7 @@ public class CompactionOperation implements Serializable {
|
||||
return Objects.equals(baseInstantTime, operation.baseInstantTime)
|
||||
&& Objects.equals(dataFileCommitTime, operation.dataFileCommitTime)
|
||||
&& Objects.equals(deltaFilePaths, operation.deltaFilePaths)
|
||||
&& Objects.equals(dataFilePath, operation.dataFilePath)
|
||||
&& Objects.equals(id, operation.id);
|
||||
&& Objects.equals(dataFilePath, operation.dataFilePath) && Objects.equals(id, operation.id);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@@ -25,8 +25,8 @@ import java.util.stream.Stream;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
|
||||
/**
|
||||
* Within a file group, a slice is a combination of data file written at a commit time and list of
|
||||
* log files, containing changes to the data file from that commit time
|
||||
* Within a file group, a slice is a combination of data file written at a commit time and list of log files, containing
|
||||
* changes to the data file from that commit time
|
||||
*/
|
||||
public class FileSlice implements Serializable {
|
||||
|
||||
@@ -46,8 +46,8 @@ public class FileSlice implements Serializable {
|
||||
private HoodieDataFile dataFile;
|
||||
|
||||
/**
|
||||
* List of appendable log files with real time data - Sorted with greater log version first -
|
||||
* Always empty for copy_on_write storage.
|
||||
* List of appendable log files with real time data - Sorted with greater log version first - Always empty for
|
||||
* copy_on_write storage.
|
||||
*/
|
||||
private final TreeSet<HoodieLogFile> logFiles;
|
||||
|
||||
@@ -100,6 +100,7 @@ public class FileSlice implements Serializable {
|
||||
|
||||
/**
|
||||
* Returns true if there is no data file and no log files. Happens as part of pending compaction
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public boolean isEmpty() {
|
||||
@@ -126,10 +127,8 @@ public class FileSlice implements Serializable {
|
||||
return false;
|
||||
}
|
||||
FileSlice slice = (FileSlice) o;
|
||||
return Objects.equals(fileGroupId, slice.fileGroupId)
|
||||
&& Objects.equals(baseInstantTime, slice.baseInstantTime)
|
||||
&& Objects.equals(dataFile, slice.dataFile)
|
||||
&& Objects.equals(logFiles, slice.logFiles);
|
||||
return Objects.equals(fileGroupId, slice.fileGroupId) && Objects.equals(baseInstantTime, slice.baseInstantTime)
|
||||
&& Objects.equals(dataFile, slice.dataFile) && Objects.equals(logFiles, slice.logFiles);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@@ -27,14 +27,14 @@ import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.exception.HoodieIOException;
|
||||
|
||||
/**
|
||||
* This is a payload to wrap a existing Hoodie Avro Record. Useful to create a HoodieRecord over
|
||||
* existing GenericRecords in a hoodie datasets (useful in compactions)
|
||||
* This is a payload to wrap a existing Hoodie Avro Record. Useful to create a HoodieRecord over existing GenericRecords
|
||||
* in a hoodie datasets (useful in compactions)
|
||||
*/
|
||||
public class HoodieAvroPayload implements HoodieRecordPayload<HoodieAvroPayload> {
|
||||
|
||||
// Store the GenericRecord converted to bytes - 1) Doesn't store schema hence memory efficient 2) Makes the payload
|
||||
// java serializable
|
||||
private final byte [] recordBytes;
|
||||
private final byte[] recordBytes;
|
||||
|
||||
public HoodieAvroPayload(Option<GenericRecord> record) {
|
||||
try {
|
||||
@@ -54,8 +54,7 @@ public class HoodieAvroPayload implements HoodieRecordPayload<HoodieAvroPayload>
|
||||
}
|
||||
|
||||
@Override
|
||||
public Option<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema)
|
||||
throws IOException {
|
||||
public Option<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema) throws IOException {
|
||||
return getInsertValue(schema);
|
||||
}
|
||||
|
||||
|
||||
@@ -19,6 +19,5 @@
|
||||
package org.apache.hudi.common.model;
|
||||
|
||||
public enum HoodieCleaningPolicy {
|
||||
KEEP_LATEST_FILE_VERSIONS,
|
||||
KEEP_LATEST_COMMITS
|
||||
KEEP_LATEST_FILE_VERSIONS, KEEP_LATEST_COMMITS
|
||||
}
|
||||
|
||||
@@ -338,10 +338,7 @@ public class HoodieCommitMetadata implements Serializable {
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "HoodieCommitMetadata{"
|
||||
+ "partitionToWriteStats=" + partitionToWriteStats
|
||||
+ ", compacted=" + compacted
|
||||
+ ", extraMetadataMap=" + extraMetadataMap
|
||||
+ '}';
|
||||
return "HoodieCommitMetadata{" + "partitionToWriteStats=" + partitionToWriteStats + ", compacted=" + compacted
|
||||
+ ", extraMetadataMap=" + extraMetadataMap + '}';
|
||||
}
|
||||
}
|
||||
|
||||
@@ -93,9 +93,6 @@ public class HoodieDataFile implements Serializable {
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "HoodieDataFile{"
|
||||
+ "fullPath=" + fullPath
|
||||
+ ", fileLen=" + fileLen
|
||||
+ '}';
|
||||
return "HoodieDataFile{" + "fullPath=" + fullPath + ", fileLen=" + fileLen + '}';
|
||||
}
|
||||
}
|
||||
|
||||
@@ -69,8 +69,8 @@ public class HoodieFileGroup implements Serializable {
|
||||
}
|
||||
|
||||
/**
|
||||
* Potentially add a new file-slice by adding base-instant time
|
||||
* A file-slice without any data-file and log-files can exist (if a compaction just got requested)
|
||||
* Potentially add a new file-slice by adding base-instant time A file-slice without any data-file and log-files can
|
||||
* exist (if a compaction just got requested)
|
||||
*/
|
||||
public void addNewFileSliceAtInstant(String baseInstantTime) {
|
||||
if (!fileSlices.containsKey(baseInstantTime)) {
|
||||
@@ -107,15 +107,13 @@ public class HoodieFileGroup implements Serializable {
|
||||
}
|
||||
|
||||
/**
|
||||
* A FileSlice is considered committed, if one of the following is true - There is a committed
|
||||
* data file - There are some log files, that are based off a commit or delta commit
|
||||
* A FileSlice is considered committed, if one of the following is true - There is a committed data file - There are
|
||||
* some log files, that are based off a commit or delta commit
|
||||
*/
|
||||
private boolean isFileSliceCommitted(FileSlice slice) {
|
||||
String maxCommitTime = lastInstant.get().getTimestamp();
|
||||
return timeline.containsOrBeforeTimelineStarts(slice.getBaseInstantTime())
|
||||
&& HoodieTimeline.compareTimestamps(slice.getBaseInstantTime(),
|
||||
maxCommitTime,
|
||||
HoodieTimeline.LESSER_OR_EQUAL);
|
||||
&& HoodieTimeline.compareTimestamps(slice.getBaseInstantTime(), maxCommitTime, HoodieTimeline.LESSER_OR_EQUAL);
|
||||
|
||||
}
|
||||
|
||||
@@ -138,9 +136,7 @@ public class HoodieFileGroup implements Serializable {
|
||||
*/
|
||||
public Stream<FileSlice> getAllFileSlices() {
|
||||
if (!timeline.empty()) {
|
||||
return fileSlices.entrySet().stream()
|
||||
.map(Map.Entry::getValue)
|
||||
.filter(this::isFileSliceCommitted);
|
||||
return fileSlices.entrySet().stream().map(Map.Entry::getValue).filter(this::isFileSliceCommitted);
|
||||
}
|
||||
return Stream.empty();
|
||||
}
|
||||
@@ -166,41 +162,32 @@ public class HoodieFileGroup implements Serializable {
|
||||
* Obtain the latest file slice, upto a commitTime i.e <= maxCommitTime
|
||||
*/
|
||||
public Option<FileSlice> getLatestFileSliceBeforeOrOn(String maxCommitTime) {
|
||||
return Option.fromJavaOptional(getAllFileSlices()
|
||||
.filter(slice ->
|
||||
HoodieTimeline.compareTimestamps(slice.getBaseInstantTime(),
|
||||
maxCommitTime,
|
||||
HoodieTimeline.LESSER_OR_EQUAL))
|
||||
.findFirst());
|
||||
return Option.fromJavaOptional(getAllFileSlices().filter(slice -> HoodieTimeline
|
||||
.compareTimestamps(slice.getBaseInstantTime(), maxCommitTime, HoodieTimeline.LESSER_OR_EQUAL)).findFirst());
|
||||
}
|
||||
|
||||
/**
|
||||
* Obtain the latest file slice, upto a commitTime i.e < maxInstantTime
|
||||
*
|
||||
* @param maxInstantTime Max Instant Time
|
||||
* @return
|
||||
*/
|
||||
public Option<FileSlice> getLatestFileSliceBefore(String maxInstantTime) {
|
||||
return Option.fromJavaOptional(getAllFileSlices()
|
||||
.filter(slice ->
|
||||
HoodieTimeline.compareTimestamps(slice.getBaseInstantTime(),
|
||||
maxInstantTime,
|
||||
HoodieTimeline.LESSER))
|
||||
return Option.fromJavaOptional(getAllFileSlices().filter(
|
||||
slice -> HoodieTimeline.compareTimestamps(slice.getBaseInstantTime(), maxInstantTime, HoodieTimeline.LESSER))
|
||||
.findFirst());
|
||||
}
|
||||
|
||||
public Option<FileSlice> getLatestFileSliceInRange(List<String> commitRange) {
|
||||
return Option.fromJavaOptional(getAllFileSlices()
|
||||
.filter(slice -> commitRange.contains(slice.getBaseInstantTime()))
|
||||
.findFirst());
|
||||
return Option.fromJavaOptional(
|
||||
getAllFileSlices().filter(slice -> commitRange.contains(slice.getBaseInstantTime())).findFirst());
|
||||
}
|
||||
|
||||
/**
|
||||
* Stream of committed data files, sorted reverse commit time
|
||||
*/
|
||||
public Stream<HoodieDataFile> getAllDataFiles() {
|
||||
return getAllFileSlices()
|
||||
.filter(slice -> slice.getDataFile().isPresent())
|
||||
.map(slice -> slice.getDataFile().get());
|
||||
return getAllFileSlices().filter(slice -> slice.getDataFile().isPresent()).map(slice -> slice.getDataFile().get());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@@ -52,8 +52,7 @@ public class HoodieFileGroupId implements Serializable {
|
||||
return false;
|
||||
}
|
||||
HoodieFileGroupId that = (HoodieFileGroupId) o;
|
||||
return Objects.equals(partitionPath, that.partitionPath)
|
||||
&& Objects.equals(fileId, that.fileId);
|
||||
return Objects.equals(partitionPath, that.partitionPath) && Objects.equals(fileId, that.fileId);
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -63,9 +62,6 @@ public class HoodieFileGroupId implements Serializable {
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "HoodieFileGroupId{"
|
||||
+ "partitionPath='" + partitionPath + '\''
|
||||
+ ", fileId='" + fileId + '\''
|
||||
+ '}';
|
||||
return "HoodieFileGroupId{" + "partitionPath='" + partitionPath + '\'' + ", fileId='" + fileId + '\'' + '}';
|
||||
}
|
||||
}
|
||||
|
||||
@@ -24,8 +24,8 @@ import java.io.Serializable;
|
||||
/**
|
||||
* HoodieKey consists of
|
||||
* <p>
|
||||
* - recordKey : a recordKey that acts as primary key for a record - partitionPath : path to the
|
||||
* partition that contains the record
|
||||
* - recordKey : a recordKey that acts as primary key for a record - partitionPath : path to the partition that contains
|
||||
* the record
|
||||
*/
|
||||
public class HoodieKey implements Serializable {
|
||||
|
||||
@@ -56,8 +56,7 @@ public class HoodieKey implements Serializable {
|
||||
return false;
|
||||
}
|
||||
HoodieKey otherKey = (HoodieKey) o;
|
||||
return Objects.equal(recordKey, otherKey.recordKey)
|
||||
&& Objects.equal(partitionPath, otherKey.partitionPath);
|
||||
return Objects.equal(recordKey, otherKey.recordKey) && Objects.equal(partitionPath, otherKey.partitionPath);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@@ -109,9 +109,7 @@ public class HoodieLogFile implements Serializable {
|
||||
String baseCommitTime = getBaseCommitTime();
|
||||
Path path = getPath();
|
||||
String extension = "." + FSUtils.getFileExtensionFromLog(path);
|
||||
int newVersion = FSUtils
|
||||
.computeNextLogVersion(fs, path.getParent(), fileId,
|
||||
extension, baseCommitTime);
|
||||
int newVersion = FSUtils.computeNextLogVersion(fs, path.getParent(), fileId, extension, baseCommitTime);
|
||||
return new HoodieLogFile(new Path(path.getParent(),
|
||||
FSUtils.makeLogFileName(fileId, extension, baseCommitTime, newVersion, logWriteToken)));
|
||||
}
|
||||
@@ -179,9 +177,6 @@ public class HoodieLogFile implements Serializable {
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "HoodieLogFile{"
|
||||
+ "pathStr='" + pathStr + '\''
|
||||
+ ", fileLen=" + fileLen
|
||||
+ '}';
|
||||
return "HoodieLogFile{" + "pathStr='" + pathStr + '\'' + ", fileLen=" + fileLen + '}';
|
||||
}
|
||||
}
|
||||
|
||||
@@ -64,12 +64,10 @@ public class HoodiePartitionMetadata {
|
||||
/**
|
||||
* Construct metadata object to be written out.
|
||||
*/
|
||||
public HoodiePartitionMetadata(FileSystem fs, String commitTime, Path basePath,
|
||||
Path partitionPath) {
|
||||
public HoodiePartitionMetadata(FileSystem fs, String commitTime, Path basePath, Path partitionPath) {
|
||||
this(fs, partitionPath);
|
||||
props.setProperty(COMMIT_TIME_KEY, commitTime);
|
||||
props
|
||||
.setProperty(PARTITION_DEPTH_KEY, String.valueOf(partitionPath.depth() - basePath.depth()));
|
||||
props.setProperty(PARTITION_DEPTH_KEY, String.valueOf(partitionPath.depth() - basePath.depth()));
|
||||
}
|
||||
|
||||
public int getPartitionDepth() {
|
||||
@@ -83,8 +81,8 @@ public class HoodiePartitionMetadata {
|
||||
* Write the metadata safely into partition atomically.
|
||||
*/
|
||||
public void trySave(int taskPartitionId) {
|
||||
Path tmpMetaPath = new Path(partitionPath,
|
||||
HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE + "_" + taskPartitionId);
|
||||
Path tmpMetaPath =
|
||||
new Path(partitionPath, HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE + "_" + taskPartitionId);
|
||||
Path metaPath = new Path(partitionPath, HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE);
|
||||
boolean metafileExists = false;
|
||||
|
||||
@@ -102,9 +100,8 @@ public class HoodiePartitionMetadata {
|
||||
fs.rename(tmpMetaPath, metaPath);
|
||||
}
|
||||
} catch (IOException ioe) {
|
||||
log.warn(
|
||||
"Error trying to save partition metadata (this is okay, as long as "
|
||||
+ "atleast 1 of these succced), " + partitionPath, ioe);
|
||||
log.warn("Error trying to save partition metadata (this is okay, as long as " + "atleast 1 of these succced), "
|
||||
+ partitionPath, ioe);
|
||||
} finally {
|
||||
if (!metafileExists) {
|
||||
try {
|
||||
@@ -129,8 +126,7 @@ public class HoodiePartitionMetadata {
|
||||
is = fs.open(metaFile);
|
||||
props.load(is);
|
||||
} catch (IOException ioe) {
|
||||
throw new HoodieException("Error reading Hoodie partition metadata for " + partitionPath,
|
||||
ioe);
|
||||
throw new HoodieException("Error reading Hoodie partition metadata for " + partitionPath, ioe);
|
||||
} finally {
|
||||
if (is != null) {
|
||||
is.close();
|
||||
@@ -143,8 +139,7 @@ public class HoodiePartitionMetadata {
|
||||
try {
|
||||
return fs.exists(new Path(partitionPath, HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
|
||||
} catch (IOException ioe) {
|
||||
throw new HoodieException("Error checking Hoodie partition metadata for " + partitionPath,
|
||||
ioe);
|
||||
throw new HoodieException("Error checking Hoodie partition metadata for " + partitionPath, ioe);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -36,12 +36,8 @@ public class HoodieRecord<T extends HoodieRecordPayload> implements Serializable
|
||||
public static String FILENAME_METADATA_FIELD = "_hoodie_file_name";
|
||||
|
||||
public static final List<String> HOODIE_META_COLUMNS =
|
||||
new ImmutableList.Builder<String>().add(COMMIT_TIME_METADATA_FIELD)
|
||||
.add(COMMIT_SEQNO_METADATA_FIELD)
|
||||
.add(RECORD_KEY_METADATA_FIELD)
|
||||
.add(PARTITION_PATH_METADATA_FIELD)
|
||||
.add(FILENAME_METADATA_FIELD)
|
||||
.build();
|
||||
new ImmutableList.Builder<String>().add(COMMIT_TIME_METADATA_FIELD).add(COMMIT_SEQNO_METADATA_FIELD)
|
||||
.add(RECORD_KEY_METADATA_FIELD).add(PARTITION_PATH_METADATA_FIELD).add(FILENAME_METADATA_FIELD).build();
|
||||
|
||||
/**
|
||||
* Identifies the record across the table
|
||||
@@ -95,8 +91,8 @@ public class HoodieRecord<T extends HoodieRecordPayload> implements Serializable
|
||||
}
|
||||
|
||||
/**
|
||||
* Release the actual payload, to ease memory pressure. To be called after the record has been
|
||||
* written to storage. Once deflated, cannot be inflated.
|
||||
* Release the actual payload, to ease memory pressure. To be called after the record has been written to storage.
|
||||
* Once deflated, cannot be inflated.
|
||||
*/
|
||||
public void deflate() {
|
||||
this.data = null;
|
||||
@@ -118,8 +114,7 @@ public class HoodieRecord<T extends HoodieRecordPayload> implements Serializable
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the new currentLocation of the record, after being written. This again should happen
|
||||
* exactly-once.
|
||||
* Sets the new currentLocation of the record, after being written. This again should happen exactly-once.
|
||||
*/
|
||||
public HoodieRecord setNewLocation(HoodieRecordLocation location) {
|
||||
checkState();
|
||||
@@ -145,10 +140,8 @@ public class HoodieRecord<T extends HoodieRecordPayload> implements Serializable
|
||||
return false;
|
||||
}
|
||||
HoodieRecord that = (HoodieRecord) o;
|
||||
return Objects.equal(key, that.key)
|
||||
&& Objects.equal(data, that.data)
|
||||
&& Objects.equal(currentLocation, that.currentLocation)
|
||||
&& Objects.equal(newLocation, that.newLocation);
|
||||
return Objects.equal(key, that.key) && Objects.equal(data, that.data)
|
||||
&& Objects.equal(currentLocation, that.currentLocation) && Objects.equal(newLocation, that.newLocation);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@@ -22,8 +22,7 @@ import com.google.common.base.Objects;
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* Location of a HoodieRecord within the partition it belongs to. Ultimately, this points to an
|
||||
* actual file on disk
|
||||
* Location of a HoodieRecord within the partition it belongs to. Ultimately, this points to an actual file on disk
|
||||
*/
|
||||
public class HoodieRecordLocation implements Serializable {
|
||||
|
||||
@@ -44,8 +43,7 @@ public class HoodieRecordLocation implements Serializable {
|
||||
return false;
|
||||
}
|
||||
HoodieRecordLocation otherLoc = (HoodieRecordLocation) o;
|
||||
return Objects.equal(instantTime, otherLoc.instantTime)
|
||||
&& Objects.equal(fileId, otherLoc.fileId);
|
||||
return Objects.equal(instantTime, otherLoc.instantTime) && Objects.equal(fileId, otherLoc.fileId);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@@ -26,45 +26,41 @@ import org.apache.avro.generic.IndexedRecord;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
|
||||
/**
|
||||
* Every Hoodie dataset has an implementation of the <code>HoodieRecordPayload</code> This abstracts
|
||||
* out callbacks which depend on record specific logic
|
||||
* Every Hoodie dataset has an implementation of the <code>HoodieRecordPayload</code> This abstracts out callbacks which
|
||||
* depend on record specific logic
|
||||
*/
|
||||
public interface HoodieRecordPayload<T extends HoodieRecordPayload> extends Serializable {
|
||||
|
||||
/**
|
||||
* When more than one HoodieRecord have the same HoodieKey, this function combines them before
|
||||
* attempting to insert/upsert (if combining turned on in HoodieClientConfig)
|
||||
* When more than one HoodieRecord have the same HoodieKey, this function combines them before attempting to
|
||||
* insert/upsert (if combining turned on in HoodieClientConfig)
|
||||
*/
|
||||
T preCombine(T another);
|
||||
|
||||
/**
|
||||
* This methods lets you write custom merging/combining logic to produce new values as a function
|
||||
* of current value on storage and whats contained in this object.
|
||||
* This methods lets you write custom merging/combining logic to produce new values as a function of current value on
|
||||
* storage and whats contained in this object.
|
||||
* <p>
|
||||
* eg: 1) You are updating counters, you may want to add counts to currentValue and write back
|
||||
* updated counts 2) You may be reading DB redo logs, and merge them with current image for a
|
||||
* database row on storage
|
||||
* eg: 1) You are updating counters, you may want to add counts to currentValue and write back updated counts 2) You
|
||||
* may be reading DB redo logs, and merge them with current image for a database row on storage
|
||||
*
|
||||
* @param currentValue Current value in storage, to merge/combine this payload with
|
||||
* @param schema Schema used for record
|
||||
* @return new combined/merged value to be written back to storage. EMPTY to skip writing this
|
||||
* record.
|
||||
* @param schema Schema used for record
|
||||
* @return new combined/merged value to be written back to storage. EMPTY to skip writing this record.
|
||||
*/
|
||||
Option<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema)
|
||||
throws IOException;
|
||||
Option<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema) throws IOException;
|
||||
|
||||
/**
|
||||
* Generates an avro record out of the given HoodieRecordPayload, to be written out to storage.
|
||||
* Called when writing a new value for the given HoodieKey, wherein there is no existing record in
|
||||
* storage to be combined against. (i.e insert) Return EMPTY to skip writing this record.
|
||||
* Generates an avro record out of the given HoodieRecordPayload, to be written out to storage. Called when writing a
|
||||
* new value for the given HoodieKey, wherein there is no existing record in storage to be combined against. (i.e
|
||||
* insert) Return EMPTY to skip writing this record.
|
||||
*/
|
||||
Option<IndexedRecord> getInsertValue(Schema schema) throws IOException;
|
||||
|
||||
/**
|
||||
* This method can be used to extract some metadata from HoodieRecordPayload. The metadata is
|
||||
* passed to {@code WriteStatus.markSuccess()} and {@code WriteStatus.markFailure()} in order to
|
||||
* compute some aggregate metrics using the metadata in the context of a write success or
|
||||
* failure.
|
||||
* This method can be used to extract some metadata from HoodieRecordPayload. The metadata is passed to
|
||||
* {@code WriteStatus.markSuccess()} and {@code WriteStatus.markFailure()} in order to compute some aggregate metrics
|
||||
* using the metadata in the context of a write success or failure.
|
||||
*/
|
||||
default Option<Map<String, String>> getMetadata() {
|
||||
return Option.empty();
|
||||
|
||||
@@ -23,14 +23,13 @@ package org.apache.hudi.common.model;
|
||||
* <p>
|
||||
* Currently, 1 type is supported
|
||||
* <p>
|
||||
* COPY_ON_WRITE - Performs upserts by versioning entire files, with later versions containing newer
|
||||
* value of a record.
|
||||
* COPY_ON_WRITE - Performs upserts by versioning entire files, with later versions containing newer value of a record.
|
||||
* <p>
|
||||
* In the future, following might be added.
|
||||
* <p>
|
||||
* MERGE_ON_READ - Speeds up upserts, by delaying merge until enough work piles up.
|
||||
* <p>
|
||||
* SIMPLE_LSM - A simple 2 level LSM tree.
|
||||
* SIMPLE_LSM - A simple 2 level LSM tree.
|
||||
*/
|
||||
public enum HoodieTableType {
|
||||
COPY_ON_WRITE, MERGE_ON_READ
|
||||
|
||||
@@ -48,8 +48,8 @@ public class HoodieWriteStat implements Serializable {
|
||||
private String prevCommit;
|
||||
|
||||
/**
|
||||
* Total number of records written for this file. - for updates, its the entire number of records
|
||||
* in the file - for inserts, its the actual number of records inserted.
|
||||
* Total number of records written for this file. - for updates, its the entire number of records in the file - for
|
||||
* inserts, its the actual number of records inserted.
|
||||
*/
|
||||
private long numWrites;
|
||||
|
||||
@@ -318,25 +318,13 @@ public class HoodieWriteStat implements Serializable {
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "HoodieWriteStat{"
|
||||
+ "fileId='" + fileId + '\''
|
||||
+ ", path='" + path + '\''
|
||||
+ ", prevCommit='" + prevCommit + '\''
|
||||
+ ", numWrites=" + numWrites
|
||||
+ ", numDeletes=" + numDeletes
|
||||
+ ", numUpdateWrites=" + numUpdateWrites
|
||||
+ ", totalWriteBytes=" + totalWriteBytes
|
||||
+ ", totalWriteErrors=" + totalWriteErrors
|
||||
+ ", tempPath='" + tempPath + '\''
|
||||
+ ", partitionPath='" + partitionPath
|
||||
+ '\'' + ", totalLogRecords=" + totalLogRecords
|
||||
+ ", totalLogFilesCompacted=" + totalLogFilesCompacted
|
||||
+ ", totalLogSizeCompacted=" + totalLogSizeCompacted
|
||||
+ ", totalUpdatedRecordsCompacted=" + totalUpdatedRecordsCompacted
|
||||
+ ", totalLogBlocks=" + totalLogBlocks
|
||||
+ ", totalCorruptLogBlock=" + totalCorruptLogBlock
|
||||
+ ", totalRollbackBlocks=" + totalRollbackBlocks
|
||||
+ '}';
|
||||
return "HoodieWriteStat{" + "fileId='" + fileId + '\'' + ", path='" + path + '\'' + ", prevCommit='" + prevCommit
|
||||
+ '\'' + ", numWrites=" + numWrites + ", numDeletes=" + numDeletes + ", numUpdateWrites=" + numUpdateWrites
|
||||
+ ", totalWriteBytes=" + totalWriteBytes + ", totalWriteErrors=" + totalWriteErrors + ", tempPath='" + tempPath
|
||||
+ '\'' + ", partitionPath='" + partitionPath + '\'' + ", totalLogRecords=" + totalLogRecords
|
||||
+ ", totalLogFilesCompacted=" + totalLogFilesCompacted + ", totalLogSizeCompacted=" + totalLogSizeCompacted
|
||||
+ ", totalUpdatedRecordsCompacted=" + totalUpdatedRecordsCompacted + ", totalLogBlocks=" + totalLogBlocks
|
||||
+ ", totalCorruptLogBlock=" + totalCorruptLogBlock + ", totalRollbackBlocks=" + totalRollbackBlocks + '}';
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@@ -33,8 +33,7 @@ public enum StorageSchemes {
|
||||
// Apache Ignite FS
|
||||
IGNITE("igfs", true),
|
||||
// AWS S3
|
||||
S3A("s3a", false),
|
||||
S3("s3", false),
|
||||
S3A("s3a", false), S3("s3", false),
|
||||
// Google Cloud Storage
|
||||
GCS("gs", false),
|
||||
// View FS for federated setups. If federating across cloud stores, then append support is false
|
||||
|
||||
@@ -36,10 +36,9 @@ import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
/**
|
||||
* Configurations on the Hoodie Table like type of ingestion, storage formats, hive table name etc
|
||||
* Configurations are loaded from hoodie.properties, these properties are usually set during
|
||||
* initializing a path as hoodie base path and never changes during the lifetime of a hoodie
|
||||
* dataset.
|
||||
* Configurations on the Hoodie Table like type of ingestion, storage formats, hive table name etc Configurations are
|
||||
* loaded from hoodie.properties, these properties are usually set during initializing a path as hoodie base path and
|
||||
* never changes during the lifetime of a hoodie dataset.
|
||||
*
|
||||
* @see HoodieTableMetaClient
|
||||
* @since 0.3.0
|
||||
@@ -51,10 +50,8 @@ public class HoodieTableConfig implements Serializable {
|
||||
public static final String HOODIE_PROPERTIES_FILE = "hoodie.properties";
|
||||
public static final String HOODIE_TABLE_NAME_PROP_NAME = "hoodie.table.name";
|
||||
public static final String HOODIE_TABLE_TYPE_PROP_NAME = "hoodie.table.type";
|
||||
public static final String HOODIE_RO_FILE_FORMAT_PROP_NAME =
|
||||
"hoodie.table.ro.file.format";
|
||||
public static final String HOODIE_RT_FILE_FORMAT_PROP_NAME =
|
||||
"hoodie.table.rt.file.format";
|
||||
public static final String HOODIE_RO_FILE_FORMAT_PROP_NAME = "hoodie.table.ro.file.format";
|
||||
public static final String HOODIE_RT_FILE_FORMAT_PROP_NAME = "hoodie.table.rt.file.format";
|
||||
public static final String HOODIE_PAYLOAD_CLASS_PROP_NAME = "hoodie.compaction.payload.class";
|
||||
public static final String HOODIE_ARCHIVELOG_FOLDER_PROP_NAME = "hoodie.archivelog.folder";
|
||||
|
||||
@@ -88,37 +85,32 @@ public class HoodieTableConfig implements Serializable {
|
||||
*
|
||||
* @deprecated
|
||||
*/
|
||||
public HoodieTableConfig() {
|
||||
}
|
||||
public HoodieTableConfig() {}
|
||||
|
||||
/**
|
||||
* Initialize the hoodie meta directory and any necessary files inside the meta (including the
|
||||
* hoodie.properties)
|
||||
* Initialize the hoodie meta directory and any necessary files inside the meta (including the hoodie.properties)
|
||||
*/
|
||||
public static void createHoodieProperties(FileSystem fs, Path metadataFolder,
|
||||
Properties properties) throws IOException {
|
||||
public static void createHoodieProperties(FileSystem fs, Path metadataFolder, Properties properties)
|
||||
throws IOException {
|
||||
if (!fs.exists(metadataFolder)) {
|
||||
fs.mkdirs(metadataFolder);
|
||||
}
|
||||
Path propertyPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE);
|
||||
try (FSDataOutputStream outputStream = fs.create(propertyPath)) {
|
||||
if (!properties.containsKey(HOODIE_TABLE_NAME_PROP_NAME)) {
|
||||
throw new IllegalArgumentException(
|
||||
HOODIE_TABLE_NAME_PROP_NAME + " property needs to be specified");
|
||||
throw new IllegalArgumentException(HOODIE_TABLE_NAME_PROP_NAME + " property needs to be specified");
|
||||
}
|
||||
if (!properties.containsKey(HOODIE_TABLE_TYPE_PROP_NAME)) {
|
||||
properties.setProperty(HOODIE_TABLE_TYPE_PROP_NAME, DEFAULT_TABLE_TYPE.name());
|
||||
}
|
||||
if (properties.getProperty(HOODIE_TABLE_TYPE_PROP_NAME) == HoodieTableType.MERGE_ON_READ
|
||||
.name()
|
||||
&& !properties.containsKey(HOODIE_PAYLOAD_CLASS_PROP_NAME)) {
|
||||
if (properties.getProperty(HOODIE_TABLE_TYPE_PROP_NAME) == HoodieTableType.MERGE_ON_READ.name()
|
||||
&& !properties.containsKey(HOODIE_PAYLOAD_CLASS_PROP_NAME)) {
|
||||
properties.setProperty(HOODIE_PAYLOAD_CLASS_PROP_NAME, DEFAULT_PAYLOAD_CLASS);
|
||||
}
|
||||
if (!properties.containsKey(HOODIE_ARCHIVELOG_FOLDER_PROP_NAME)) {
|
||||
properties.setProperty(HOODIE_ARCHIVELOG_FOLDER_PROP_NAME, DEFAULT_ARCHIVELOG_FOLDER);
|
||||
}
|
||||
properties
|
||||
.store(outputStream, "Properties saved on " + new Date(System.currentTimeMillis()));
|
||||
properties.store(outputStream, "Properties saved on " + new Date(System.currentTimeMillis()));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -139,8 +131,8 @@ public class HoodieTableConfig implements Serializable {
|
||||
public String getPayloadClass() {
|
||||
// There could be datasets written with payload class from com.uber.hoodie. Need to transparently
|
||||
// change to org.apache.hudi
|
||||
return props.getProperty(HOODIE_PAYLOAD_CLASS_PROP_NAME, DEFAULT_PAYLOAD_CLASS)
|
||||
.replace("com.uber.hoodie", "org.apache.hudi");
|
||||
return props.getProperty(HOODIE_PAYLOAD_CLASS_PROP_NAME, DEFAULT_PAYLOAD_CLASS).replace("com.uber.hoodie",
|
||||
"org.apache.hudi");
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -182,7 +174,7 @@ public class HoodieTableConfig implements Serializable {
|
||||
}
|
||||
|
||||
public Map<String, String> getProps() {
|
||||
return props.entrySet().stream().collect(
|
||||
Collectors.toMap(e -> String.valueOf(e.getKey()), e -> String.valueOf(e.getValue())));
|
||||
return props.entrySet().stream()
|
||||
.collect(Collectors.toMap(e -> String.valueOf(e.getKey()), e -> String.valueOf(e.getValue())));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -50,12 +50,12 @@ import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
/**
|
||||
* <code>HoodieTableMetaClient</code> allows to access meta-data about a hoodie table It returns
|
||||
* meta-data about commits, savepoints, compactions, cleanups as a <code>HoodieTimeline</code>
|
||||
* Create an instance of the <code>HoodieTableMetaClient</code> with FileSystem and basePath to
|
||||
* start getting the meta-data. <p> All the timelines are computed lazily, once computed the
|
||||
* timeline is cached and never refreshed. Use the <code>HoodieTimeline.reload()</code> to refresh
|
||||
* timelines.
|
||||
* <code>HoodieTableMetaClient</code> allows to access meta-data about a hoodie table It returns meta-data about
|
||||
* commits, savepoints, compactions, cleanups as a <code>HoodieTimeline</code> Create an instance of the
|
||||
* <code>HoodieTableMetaClient</code> with FileSystem and basePath to start getting the meta-data.
|
||||
* <p>
|
||||
* All the timelines are computed lazily, once computed the timeline is cached and never refreshed. Use the
|
||||
* <code>HoodieTimeline.reload()</code> to refresh timelines.
|
||||
*
|
||||
* @see HoodieTimeline
|
||||
* @since 0.3.0
|
||||
@@ -79,20 +79,17 @@ public class HoodieTableMetaClient implements Serializable {
|
||||
private HoodieArchivedTimeline archivedTimeline;
|
||||
private ConsistencyGuardConfig consistencyGuardConfig = ConsistencyGuardConfig.newBuilder().build();
|
||||
|
||||
public HoodieTableMetaClient(Configuration conf, String basePath)
|
||||
throws DatasetNotFoundException {
|
||||
public HoodieTableMetaClient(Configuration conf, String basePath) throws DatasetNotFoundException {
|
||||
// Do not load any timeline by default
|
||||
this(conf, basePath, false);
|
||||
}
|
||||
|
||||
public HoodieTableMetaClient(Configuration conf, String basePath,
|
||||
boolean loadActiveTimelineOnLoad) {
|
||||
public HoodieTableMetaClient(Configuration conf, String basePath, boolean loadActiveTimelineOnLoad) {
|
||||
this(conf, basePath, loadActiveTimelineOnLoad, ConsistencyGuardConfig.newBuilder().build());
|
||||
}
|
||||
|
||||
public HoodieTableMetaClient(Configuration conf, String basePath,
|
||||
boolean loadActiveTimelineOnLoad, ConsistencyGuardConfig consistencyGuardConfig)
|
||||
throws DatasetNotFoundException {
|
||||
public HoodieTableMetaClient(Configuration conf, String basePath, boolean loadActiveTimelineOnLoad,
|
||||
ConsistencyGuardConfig consistencyGuardConfig) throws DatasetNotFoundException {
|
||||
log.info("Loading HoodieTableMetaClient from " + basePath);
|
||||
this.basePath = basePath;
|
||||
this.consistencyGuardConfig = consistencyGuardConfig;
|
||||
@@ -117,15 +114,11 @@ public class HoodieTableMetaClient implements Serializable {
|
||||
*
|
||||
* @deprecated
|
||||
*/
|
||||
public HoodieTableMetaClient() {
|
||||
}
|
||||
public HoodieTableMetaClient() {}
|
||||
|
||||
public static HoodieTableMetaClient reload(HoodieTableMetaClient oldMetaClient) {
|
||||
return new HoodieTableMetaClient(
|
||||
oldMetaClient.hadoopConf.get(),
|
||||
oldMetaClient.basePath,
|
||||
oldMetaClient.loadActiveTimelineOnLoad,
|
||||
oldMetaClient.consistencyGuardConfig);
|
||||
return new HoodieTableMetaClient(oldMetaClient.hadoopConf.get(), oldMetaClient.basePath,
|
||||
oldMetaClient.loadActiveTimelineOnLoad, oldMetaClient.consistencyGuardConfig);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -133,14 +126,12 @@ public class HoodieTableMetaClient implements Serializable {
|
||||
*
|
||||
* @deprecated
|
||||
*/
|
||||
private void readObject(java.io.ObjectInputStream in)
|
||||
throws IOException, ClassNotFoundException {
|
||||
private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException {
|
||||
in.defaultReadObject();
|
||||
fs = null; // will be lazily inited
|
||||
}
|
||||
|
||||
private void writeObject(java.io.ObjectOutputStream out)
|
||||
throws IOException {
|
||||
private void writeObject(java.io.ObjectOutputStream out) throws IOException {
|
||||
out.defaultWriteObject();
|
||||
}
|
||||
|
||||
@@ -173,8 +164,9 @@ public class HoodieTableMetaClient implements Serializable {
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns Marker folder path
|
||||
* @param instantTs Instant Timestamp
|
||||
* Returns Marker folder path
|
||||
*
|
||||
* @param instantTs Instant Timestamp
|
||||
* @return
|
||||
*/
|
||||
public String getMarkerFolderPath(String instantTs) {
|
||||
@@ -215,14 +207,17 @@ public class HoodieTableMetaClient implements Serializable {
|
||||
FileSystem fileSystem = FSUtils.getFs(metaPath, hadoopConf.newCopy());
|
||||
Preconditions.checkArgument(!(fileSystem instanceof HoodieWrapperFileSystem),
|
||||
"File System not expected to be that of HoodieWrapperFileSystem");
|
||||
fs = new HoodieWrapperFileSystem(fileSystem, consistencyGuardConfig.isConsistencyCheckEnabled()
|
||||
? new FailSafeConsistencyGuard(fileSystem, consistencyGuardConfig) : new NoOpConsistencyGuard());
|
||||
fs = new HoodieWrapperFileSystem(fileSystem,
|
||||
consistencyGuardConfig.isConsistencyCheckEnabled()
|
||||
? new FailSafeConsistencyGuard(fileSystem, consistencyGuardConfig)
|
||||
: new NoOpConsistencyGuard());
|
||||
}
|
||||
return fs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return raw file-system
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public FileSystem getRawFs() {
|
||||
@@ -260,8 +255,8 @@ public class HoodieTableMetaClient implements Serializable {
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the archived commits as a timeline. This is costly operation, as all data from the archived
|
||||
* files are read. This should not be used, unless for historical debugging purposes
|
||||
* Get the archived commits as a timeline. This is costly operation, as all data from the archived files are read.
|
||||
* This should not be used, unless for historical debugging purposes
|
||||
*
|
||||
* @return Active commit timeline
|
||||
*/
|
||||
@@ -276,8 +271,8 @@ public class HoodieTableMetaClient implements Serializable {
|
||||
/**
|
||||
* Helper method to initialize a dataset, with given basePath, tableType, name, archiveFolder
|
||||
*/
|
||||
public static HoodieTableMetaClient initTableType(Configuration hadoopConf, String basePath,
|
||||
String tableType, String tableName, String archiveLogFolder) throws IOException {
|
||||
public static HoodieTableMetaClient initTableType(Configuration hadoopConf, String basePath, String tableType,
|
||||
String tableName, String archiveLogFolder) throws IOException {
|
||||
HoodieTableType type = HoodieTableType.valueOf(tableType);
|
||||
Properties properties = new Properties();
|
||||
properties.put(HoodieTableConfig.HOODIE_TABLE_NAME_PROP_NAME, tableName);
|
||||
@@ -301,13 +296,12 @@ public class HoodieTableMetaClient implements Serializable {
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method to initialize a given path as a hoodie dataset with configs passed in as as
|
||||
* Properties
|
||||
* Helper method to initialize a given path as a hoodie dataset with configs passed in as as Properties
|
||||
*
|
||||
* @return Instance of HoodieTableMetaClient
|
||||
*/
|
||||
public static HoodieTableMetaClient initDatasetAndGetMetaClient(Configuration hadoopConf,
|
||||
String basePath, Properties props) throws IOException {
|
||||
public static HoodieTableMetaClient initDatasetAndGetMetaClient(Configuration hadoopConf, String basePath,
|
||||
Properties props) throws IOException {
|
||||
log.info("Initializing " + basePath + " as hoodie dataset " + basePath);
|
||||
Path basePathDir = new Path(basePath);
|
||||
final FileSystem fs = FSUtils.getFs(basePath, hadoopConf);
|
||||
@@ -320,9 +314,8 @@ public class HoodieTableMetaClient implements Serializable {
|
||||
}
|
||||
|
||||
// if anything other than default archive log folder is specified, create that too
|
||||
String archiveLogPropVal = props
|
||||
.getProperty(HoodieTableConfig.HOODIE_ARCHIVELOG_FOLDER_PROP_NAME,
|
||||
HoodieTableConfig.DEFAULT_ARCHIVELOG_FOLDER);
|
||||
String archiveLogPropVal = props.getProperty(HoodieTableConfig.HOODIE_ARCHIVELOG_FOLDER_PROP_NAME,
|
||||
HoodieTableConfig.DEFAULT_ARCHIVELOG_FOLDER);
|
||||
if (!archiveLogPropVal.equals(HoodieTableConfig.DEFAULT_ARCHIVELOG_FOLDER)) {
|
||||
Path archiveLogDir = new Path(metaPathDir, archiveLogPropVal);
|
||||
if (!fs.exists(archiveLogDir)) {
|
||||
@@ -346,14 +339,12 @@ public class HoodieTableMetaClient implements Serializable {
|
||||
// We should not use fs.getConf as this might be different from the original configuration
|
||||
// used to create the fs in unit tests
|
||||
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(hadoopConf, basePath);
|
||||
log.info("Finished initializing Table of type " + metaClient.getTableConfig().getTableType()
|
||||
+ " from " + basePath);
|
||||
log.info("Finished initializing Table of type " + metaClient.getTableConfig().getTableType() + " from " + basePath);
|
||||
return metaClient;
|
||||
}
|
||||
|
||||
// HELPER METHODS TO CREATE META FILE NAMES
|
||||
public static FileStatus[] scanFiles(FileSystem fs, Path metaPath, PathFilter nameFilter)
|
||||
throws IOException {
|
||||
public static FileStatus[] scanFiles(FileSystem fs, Path metaPath, PathFilter nameFilter) throws IOException {
|
||||
return fs.listStatus(metaPath, nameFilter);
|
||||
}
|
||||
|
||||
@@ -375,10 +366,10 @@ public class HoodieTableMetaClient implements Serializable {
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the commit + pending-compaction timeline visible for this table.
|
||||
* A RT filesystem view is constructed with this timeline so that file-slice after pending compaction-requested
|
||||
* instant-time is also considered valid. A RT file-system view for reading must then merge the file-slices before
|
||||
* and after pending compaction instant so that all delta-commits are read.
|
||||
* Get the commit + pending-compaction timeline visible for this table. A RT filesystem view is constructed with this
|
||||
* timeline so that file-slice after pending compaction-requested instant-time is also considered valid. A RT
|
||||
* file-system view for reading must then merge the file-slices before and after pending compaction instant so that
|
||||
* all delta-commits are read.
|
||||
*/
|
||||
public HoodieTimeline getCommitsAndCompactionTimeline() {
|
||||
switch (this.getTableType()) {
|
||||
@@ -415,8 +406,7 @@ public class HoodieTableMetaClient implements Serializable {
|
||||
case MERGE_ON_READ:
|
||||
return HoodieActiveTimeline.DELTA_COMMIT_ACTION;
|
||||
default:
|
||||
throw new HoodieException(
|
||||
"Could not commit on unknown storage type " + this.getTableType());
|
||||
throw new HoodieException("Could not commit on unknown storage type " + this.getTableType());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -424,23 +414,21 @@ public class HoodieTableMetaClient implements Serializable {
|
||||
/**
|
||||
* Helper method to scan all hoodie-instant metafiles and construct HoodieInstant objects
|
||||
*
|
||||
* @param fs FileSystem
|
||||
* @param metaPath Meta Path where hoodie instants are present
|
||||
* @param fs FileSystem
|
||||
* @param metaPath Meta Path where hoodie instants are present
|
||||
* @param includedExtensions Included hoodie extensions
|
||||
* @return List of Hoodie Instants generated
|
||||
* @throws IOException in case of failure
|
||||
*/
|
||||
public static List<HoodieInstant> scanHoodieInstantsFromFileSystem(
|
||||
FileSystem fs, Path metaPath, Set<String> includedExtensions) throws IOException {
|
||||
return Arrays.stream(
|
||||
HoodieTableMetaClient
|
||||
.scanFiles(fs, metaPath, path -> {
|
||||
// Include only the meta files with extensions that needs to be included
|
||||
String extension = FSUtils.getFileExtension(path.getName());
|
||||
return includedExtensions.contains(extension);
|
||||
})).sorted(Comparator.comparing(
|
||||
// Sort the meta-data by the instant time (first part of the file name)
|
||||
fileStatus -> FSUtils.getInstantTime(fileStatus.getPath().getName())))
|
||||
public static List<HoodieInstant> scanHoodieInstantsFromFileSystem(FileSystem fs, Path metaPath,
|
||||
Set<String> includedExtensions) throws IOException {
|
||||
return Arrays.stream(HoodieTableMetaClient.scanFiles(fs, metaPath, path -> {
|
||||
// Include only the meta files with extensions that needs to be included
|
||||
String extension = FSUtils.getFileExtension(path.getName());
|
||||
return includedExtensions.contains(extension);
|
||||
})).sorted(Comparator.comparing(
|
||||
// Sort the meta-data by the instant time (first part of the file name)
|
||||
fileStatus -> FSUtils.getInstantTime(fileStatus.getPath().getName())))
|
||||
// create HoodieInstantMarkers from FileStatus, which extracts properties
|
||||
.map(HoodieInstant::new).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
@@ -29,10 +29,11 @@ import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.common.util.StringUtils;
|
||||
|
||||
/**
|
||||
* HoodieTimeline is a view of meta-data instants in the hoodie dataset. Instants are specific
|
||||
* points in time represented as HoodieInstant. <p> Timelines are immutable once created and
|
||||
* operations create new instance of timelines which filter on the instants and this can be
|
||||
* chained.
|
||||
* HoodieTimeline is a view of meta-data instants in the hoodie dataset. Instants are specific points in time
|
||||
* represented as HoodieInstant.
|
||||
* <p>
|
||||
* Timelines are immutable once created and operations create new instance of timelines which filter on the instants and
|
||||
* this can be chained.
|
||||
*
|
||||
* @see HoodieTableMetaClient
|
||||
* @see HoodieDefaultTimeline
|
||||
@@ -58,22 +59,19 @@ public interface HoodieTimeline extends Serializable {
|
||||
String CLEAN_EXTENSION = "." + CLEAN_ACTION;
|
||||
String ROLLBACK_EXTENSION = "." + ROLLBACK_ACTION;
|
||||
String SAVEPOINT_EXTENSION = "." + SAVEPOINT_ACTION;
|
||||
//this is to preserve backwards compatibility on commit in-flight filenames
|
||||
// this is to preserve backwards compatibility on commit in-flight filenames
|
||||
String INFLIGHT_COMMIT_EXTENSION = INFLIGHT_EXTENSION;
|
||||
String INFLIGHT_DELTA_COMMIT_EXTENSION = "." + DELTA_COMMIT_ACTION + INFLIGHT_EXTENSION;
|
||||
String INFLIGHT_CLEAN_EXTENSION = "." + CLEAN_ACTION + INFLIGHT_EXTENSION;
|
||||
String INFLIGHT_ROLLBACK_EXTENSION = "." + ROLLBACK_ACTION + INFLIGHT_EXTENSION;
|
||||
String INFLIGHT_SAVEPOINT_EXTENSION = "." + SAVEPOINT_ACTION + INFLIGHT_EXTENSION;
|
||||
String REQUESTED_COMPACTION_SUFFIX =
|
||||
StringUtils.join(COMPACTION_ACTION, REQUESTED_EXTENSION);
|
||||
String REQUESTED_COMPACTION_EXTENSION =
|
||||
StringUtils.join(".", REQUESTED_COMPACTION_SUFFIX);
|
||||
String INFLIGHT_COMPACTION_EXTENSION =
|
||||
StringUtils.join(".", COMPACTION_ACTION, INFLIGHT_EXTENSION);
|
||||
String REQUESTED_COMPACTION_SUFFIX = StringUtils.join(COMPACTION_ACTION, REQUESTED_EXTENSION);
|
||||
String REQUESTED_COMPACTION_EXTENSION = StringUtils.join(".", REQUESTED_COMPACTION_SUFFIX);
|
||||
String INFLIGHT_COMPACTION_EXTENSION = StringUtils.join(".", COMPACTION_ACTION, INFLIGHT_EXTENSION);
|
||||
String INFLIGHT_RESTORE_EXTENSION = "." + RESTORE_ACTION + INFLIGHT_EXTENSION;
|
||||
String RESTORE_EXTENSION = "." + RESTORE_ACTION;
|
||||
|
||||
String INVALID_INSTANT_TS = "0";
|
||||
String INVALID_INSTANT_TS = "0";
|
||||
|
||||
/**
|
||||
* Filter this timeline to just include the in-flights
|
||||
@@ -97,22 +95,25 @@ public interface HoodieTimeline extends Serializable {
|
||||
HoodieTimeline filterCompletedInstants();
|
||||
|
||||
/**
|
||||
* Filter this timeline to just include the completed + compaction (inflight + requested) instants
|
||||
* A RT filesystem view is constructed with this timeline so that file-slice after pending compaction-requested
|
||||
* instant-time is also considered valid. A RT file-system view for reading must then merge the file-slices before
|
||||
* and after pending compaction instant so that all delta-commits are read.
|
||||
* Filter this timeline to just include the completed + compaction (inflight + requested) instants A RT filesystem
|
||||
* view is constructed with this timeline so that file-slice after pending compaction-requested instant-time is also
|
||||
* considered valid. A RT file-system view for reading must then merge the file-slices before and after pending
|
||||
* compaction instant so that all delta-commits are read.
|
||||
*
|
||||
* @return New instance of HoodieTimeline with just completed instants
|
||||
*/
|
||||
HoodieTimeline filterCompletedAndCompactionInstants();
|
||||
|
||||
/**
|
||||
* Timeline to just include commits (commit/deltacommit) and compaction actions
|
||||
* Timeline to just include commits (commit/deltacommit) and compaction actions
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
HoodieTimeline getCommitsAndCompactionTimeline();
|
||||
|
||||
/**
|
||||
* Filter this timeline to just include requested and inflight compaction instants
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
HoodieTimeline filterPendingCompactionTimeline();
|
||||
@@ -162,6 +163,7 @@ public interface HoodieTimeline extends Serializable {
|
||||
|
||||
/**
|
||||
* Get hash of timeline
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
String getTimelineHash();
|
||||
@@ -177,8 +179,8 @@ public interface HoodieTimeline extends Serializable {
|
||||
boolean containsInstant(HoodieInstant instant);
|
||||
|
||||
/**
|
||||
* @return true if the passed instant is present as a completed instant on the timeline or if the
|
||||
* instant is before the first completed instant in the timeline
|
||||
* @return true if the passed instant is present as a completed instant on the timeline or if the instant is before
|
||||
* the first completed instant in the timeline
|
||||
*/
|
||||
boolean containsOrBeforeTimelineStarts(String ts);
|
||||
|
||||
@@ -188,8 +190,8 @@ public interface HoodieTimeline extends Serializable {
|
||||
Stream<HoodieInstant> getInstants();
|
||||
|
||||
/**
|
||||
* @return Get the stream of completed instants in reverse order
|
||||
* TODO Change code references to getInstants() that reverse the instants later on to use this method instead.
|
||||
* @return Get the stream of completed instants in reverse order TODO Change code references to getInstants() that
|
||||
* reverse the instants later on to use this method instead.
|
||||
*/
|
||||
Stream<HoodieInstant> getReverseOrderedInstants();
|
||||
|
||||
@@ -206,17 +208,13 @@ public interface HoodieTimeline extends Serializable {
|
||||
/**
|
||||
* Helper methods to compare instants
|
||||
**/
|
||||
BiPredicate<String, String> EQUAL =
|
||||
(commit1, commit2) -> commit1.compareTo(commit2) == 0;
|
||||
BiPredicate<String, String> GREATER_OR_EQUAL =
|
||||
(commit1, commit2) -> commit1.compareTo(commit2) >= 0;
|
||||
BiPredicate<String, String> EQUAL = (commit1, commit2) -> commit1.compareTo(commit2) == 0;
|
||||
BiPredicate<String, String> GREATER_OR_EQUAL = (commit1, commit2) -> commit1.compareTo(commit2) >= 0;
|
||||
BiPredicate<String, String> GREATER = (commit1, commit2) -> commit1.compareTo(commit2) > 0;
|
||||
BiPredicate<String, String> LESSER_OR_EQUAL =
|
||||
(commit1, commit2) -> commit1.compareTo(commit2) <= 0;
|
||||
BiPredicate<String, String> LESSER_OR_EQUAL = (commit1, commit2) -> commit1.compareTo(commit2) <= 0;
|
||||
BiPredicate<String, String> LESSER = (commit1, commit2) -> commit1.compareTo(commit2) < 0;
|
||||
|
||||
static boolean compareTimestamps(String commit1, String commit2,
|
||||
BiPredicate<String, String> predicateToApply) {
|
||||
static boolean compareTimestamps(String commit1, String commit2, BiPredicate<String, String> predicateToApply) {
|
||||
return predicateToApply.test(commit1, commit2);
|
||||
}
|
||||
|
||||
|
||||
@@ -22,8 +22,8 @@ package org.apache.hudi.common.table;
|
||||
* A consolidated file-system view interface exposing both realtime and read-optimized views along with
|
||||
* update operations.
|
||||
*/
|
||||
public interface SyncableFileSystemView extends TableFileSystemView, TableFileSystemView.ReadOptimizedView,
|
||||
TableFileSystemView.RealtimeView {
|
||||
public interface SyncableFileSystemView
|
||||
extends TableFileSystemView, TableFileSystemView.ReadOptimizedView, TableFileSystemView.RealtimeView {
|
||||
|
||||
|
||||
|
||||
@@ -38,9 +38,9 @@ public interface SyncableFileSystemView extends TableFileSystemView, TableFileSy
|
||||
void reset();
|
||||
|
||||
/**
|
||||
* Read the latest timeline and refresh the file-system view to match the current state of the file-system.
|
||||
* The refresh can either be done incrementally (from reading file-slices in metadata files) or from scratch by
|
||||
* reseting view storage
|
||||
* Read the latest timeline and refresh the file-system view to match the current state of the file-system. The
|
||||
* refresh can either be done incrementally (from reading file-slices in metadata files) or from scratch by reseting
|
||||
* view storage
|
||||
*/
|
||||
void sync();
|
||||
}
|
||||
|
||||
@@ -59,8 +59,7 @@ public interface TableFileSystemView {
|
||||
* Stream all the latest version data files in the given partition with precondition that commitTime(file) before
|
||||
* maxCommitTime
|
||||
*/
|
||||
Stream<HoodieDataFile> getLatestDataFilesBeforeOrOn(String partitionPath,
|
||||
String maxCommitTime);
|
||||
Stream<HoodieDataFile> getLatestDataFilesBeforeOrOn(String partitionPath, String maxCommitTime);
|
||||
|
||||
/**
|
||||
* Stream all the latest data files pass
|
||||
@@ -105,20 +104,20 @@ public interface TableFileSystemView {
|
||||
Stream<FileSlice> getLatestUnCompactedFileSlices(String partitionPath);
|
||||
|
||||
/**
|
||||
* Stream all latest file slices in given partition with precondition that commitTime(file) before maxCommitTime
|
||||
* Stream all latest file slices in given partition with precondition that commitTime(file) before maxCommitTime
|
||||
*
|
||||
* @param partitionPath Partition path
|
||||
* @param maxCommitTime Max Instant Time
|
||||
* @param includeFileSlicesInPendingCompaction include file-slices that are in pending compaction
|
||||
*/
|
||||
Stream<FileSlice> getLatestFileSlicesBeforeOrOn(String partitionPath,
|
||||
String maxCommitTime, boolean includeFileSlicesInPendingCompaction);
|
||||
Stream<FileSlice> getLatestFileSlicesBeforeOrOn(String partitionPath, String maxCommitTime,
|
||||
boolean includeFileSlicesInPendingCompaction);
|
||||
|
||||
/**
|
||||
* Stream all "merged" file-slices before on an instant time
|
||||
* If a file-group has a pending compaction request, the file-slice before and after compaction request instant
|
||||
* is merged and returned.
|
||||
* @param partitionPath Partition Path
|
||||
* Stream all "merged" file-slices before on an instant time If a file-group has a pending compaction request, the
|
||||
* file-slice before and after compaction request instant is merged and returned.
|
||||
*
|
||||
* @param partitionPath Partition Path
|
||||
* @param maxInstantTime Max Instant Time
|
||||
* @return
|
||||
*/
|
||||
|
||||
@@ -51,15 +51,16 @@ import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
/**
|
||||
* Implements logic to scan log blocks and expose valid and deleted log records to subclass implementation.
|
||||
* Subclass is free to either apply merging or expose raw data back to the caller.
|
||||
* Implements logic to scan log blocks and expose valid and deleted log records to subclass implementation. Subclass is
|
||||
* free to either apply merging or expose raw data back to the caller.
|
||||
*
|
||||
* NOTE: If readBlockLazily is
|
||||
* turned on, does not merge, instead keeps reading log blocks and merges everything at once This is an optimization to
|
||||
* avoid seek() back and forth to read new block (forward seek()) and lazily read content of seen block (reverse and
|
||||
* forward seek()) during merge | | Read Block 1 Metadata | | Read Block 1 Data | | | Read Block 2
|
||||
* Metadata | | Read Block 2 Data | | I/O Pass 1 | ..................... | I/O Pass 2 | ................. | |
|
||||
* | Read Block N Metadata | | Read Block N Data | <p> This results in two I/O passes over the log file.
|
||||
* NOTE: If readBlockLazily is turned on, does not merge, instead keeps reading log blocks and merges everything at once
|
||||
* This is an optimization to avoid seek() back and forth to read new block (forward seek()) and lazily read content of
|
||||
* seen block (reverse and forward seek()) during merge | | Read Block 1 Metadata | | Read Block 1 Data | | | Read Block
|
||||
* 2 Metadata | | Read Block 2 Data | | I/O Pass 1 | ..................... | I/O Pass 2 | ................. | | | Read
|
||||
* Block N Metadata | | Read Block N Data |
|
||||
* <p>
|
||||
* This results in two I/O passes over the log file.
|
||||
*/
|
||||
public abstract class AbstractHoodieLogRecordScanner {
|
||||
|
||||
@@ -122,10 +123,9 @@ public abstract class AbstractHoodieLogRecordScanner {
|
||||
HoodieLogFormatReader logFormatReaderWrapper = null;
|
||||
try {
|
||||
// iterate over the paths
|
||||
logFormatReaderWrapper =
|
||||
new HoodieLogFormatReader(fs,
|
||||
logFilePaths.stream().map(logFile -> new HoodieLogFile(new Path(logFile)))
|
||||
.collect(Collectors.toList()), readerSchema, readBlocksLazily, reverseReader, bufferSize);
|
||||
logFormatReaderWrapper = new HoodieLogFormatReader(fs,
|
||||
logFilePaths.stream().map(logFile -> new HoodieLogFile(new Path(logFile))).collect(Collectors.toList()),
|
||||
readerSchema, readBlocksLazily, reverseReader, bufferSize);
|
||||
Set<HoodieLogFile> scannedLogFiles = new HashSet<>();
|
||||
while (logFormatReaderWrapper.hasNext()) {
|
||||
HoodieLogFile logFile = logFormatReaderWrapper.getLogFile();
|
||||
@@ -136,10 +136,9 @@ public abstract class AbstractHoodieLogRecordScanner {
|
||||
HoodieLogBlock r = logFormatReaderWrapper.next();
|
||||
totalLogBlocks.incrementAndGet();
|
||||
if (r.getBlockType() != CORRUPT_BLOCK
|
||||
&& !HoodieTimeline.compareTimestamps(r.getLogBlockHeader().get(INSTANT_TIME),
|
||||
this.latestInstantTime,
|
||||
HoodieTimeline.LESSER_OR_EQUAL)) {
|
||||
//hit a block with instant time greater than should be processed, stop processing further
|
||||
&& !HoodieTimeline.compareTimestamps(r.getLogBlockHeader().get(INSTANT_TIME), this.latestInstantTime,
|
||||
HoodieTimeline.LESSER_OR_EQUAL)) {
|
||||
// hit a block with instant time greater than should be processed, stop processing further
|
||||
break;
|
||||
}
|
||||
switch (r.getBlockType()) {
|
||||
@@ -167,7 +166,7 @@ public abstract class AbstractHoodieLogRecordScanner {
|
||||
// Consider the following scenario
|
||||
// (Time 0, C1, Task T1) -> Running
|
||||
// (Time 1, C1, Task T1) -> Failed (Wrote either a corrupt block or a correct
|
||||
// DataBlock (B1) with commitTime C1
|
||||
// DataBlock (B1) with commitTime C1
|
||||
// (Time 2, C1, Task T1.2) -> Running (Task T1 was retried and the attempt number is 2)
|
||||
// (Time 3, C1, Task T1.2) -> Finished (Wrote a correct DataBlock B2)
|
||||
// Now a logFile L1 can have 2 correct Datablocks (B1 and B2) which are the same.
|
||||
@@ -179,8 +178,8 @@ public abstract class AbstractHoodieLogRecordScanner {
|
||||
log.info("Reading a command block from file " + logFile.getPath());
|
||||
// This is a command block - take appropriate action based on the command
|
||||
HoodieCommandBlock commandBlock = (HoodieCommandBlock) r;
|
||||
String targetInstantForCommandBlock = r.getLogBlockHeader()
|
||||
.get(HoodieLogBlock.HeaderMetadataType.TARGET_INSTANT_TIME);
|
||||
String targetInstantForCommandBlock =
|
||||
r.getLogBlockHeader().get(HoodieLogBlock.HeaderMetadataType.TARGET_INSTANT_TIME);
|
||||
switch (commandBlock.getType()) { // there can be different types of command blocks
|
||||
case ROLLBACK_PREVIOUS_BLOCK:
|
||||
// Rollback the last read log block
|
||||
@@ -195,20 +194,17 @@ public abstract class AbstractHoodieLogRecordScanner {
|
||||
HoodieLogBlock lastBlock = currentInstantLogBlocks.peek();
|
||||
// handle corrupt blocks separately since they may not have metadata
|
||||
if (lastBlock.getBlockType() == CORRUPT_BLOCK) {
|
||||
log.info(
|
||||
"Rolling back the last corrupted log block read in " + logFile.getPath());
|
||||
log.info("Rolling back the last corrupted log block read in " + logFile.getPath());
|
||||
currentInstantLogBlocks.pop();
|
||||
numBlocksRolledBack++;
|
||||
} else if (lastBlock.getBlockType() != CORRUPT_BLOCK
|
||||
&& targetInstantForCommandBlock
|
||||
.contentEquals(lastBlock.getLogBlockHeader().get(INSTANT_TIME))) {
|
||||
&& targetInstantForCommandBlock.contentEquals(lastBlock.getLogBlockHeader().get(INSTANT_TIME))) {
|
||||
// rollback last data block or delete block
|
||||
log.info("Rolling back the last log block read in " + logFile.getPath());
|
||||
currentInstantLogBlocks.pop();
|
||||
numBlocksRolledBack++;
|
||||
} else if (!targetInstantForCommandBlock
|
||||
.contentEquals(
|
||||
currentInstantLogBlocks.peek().getLogBlockHeader().get(INSTANT_TIME))) {
|
||||
.contentEquals(currentInstantLogBlocks.peek().getLogBlockHeader().get(INSTANT_TIME))) {
|
||||
// invalid or extra rollback block
|
||||
log.warn("TargetInstantTime " + targetInstantForCommandBlock
|
||||
+ " invalid or extra rollback command block in " + logFile.getPath());
|
||||
@@ -260,15 +256,14 @@ public abstract class AbstractHoodieLogRecordScanner {
|
||||
* Checks if the current logblock belongs to a later instant
|
||||
*/
|
||||
private boolean isNewInstantBlock(HoodieLogBlock logBlock) {
|
||||
return currentInstantLogBlocks.size() > 0
|
||||
&& currentInstantLogBlocks.peek().getBlockType() != CORRUPT_BLOCK
|
||||
return currentInstantLogBlocks.size() > 0 && currentInstantLogBlocks.peek().getBlockType() != CORRUPT_BLOCK
|
||||
&& !logBlock.getLogBlockHeader().get(INSTANT_TIME)
|
||||
.contentEquals(currentInstantLogBlocks.peek().getLogBlockHeader().get(INSTANT_TIME));
|
||||
.contentEquals(currentInstantLogBlocks.peek().getLogBlockHeader().get(INSTANT_TIME));
|
||||
}
|
||||
|
||||
/**
|
||||
* Iterate over the GenericRecord in the block, read the hoodie key and partition path and
|
||||
* call subclass processors to handle it.
|
||||
* Iterate over the GenericRecord in the block, read the hoodie key and partition path and call subclass processors to
|
||||
* handle it.
|
||||
*/
|
||||
private void processAvroDataBlock(HoodieAvroDataBlock dataBlock) throws Exception {
|
||||
// TODO (NA) - Implement getRecordItr() in HoodieAvroDataBlock and use that here
|
||||
@@ -286,8 +281,7 @@ public abstract class AbstractHoodieLogRecordScanner {
|
||||
*
|
||||
* @param hoodieRecord Hoodie Record to process
|
||||
*/
|
||||
protected abstract void processNextRecord(HoodieRecord<? extends HoodieRecordPayload> hoodieRecord)
|
||||
throws Exception;
|
||||
protected abstract void processNextRecord(HoodieRecord<? extends HoodieRecordPayload> hoodieRecord) throws Exception;
|
||||
|
||||
/**
|
||||
* Process next deleted key
|
||||
@@ -299,8 +293,7 @@ public abstract class AbstractHoodieLogRecordScanner {
|
||||
/**
|
||||
* Process the set of log blocks belonging to the last instant which is read fully.
|
||||
*/
|
||||
private void processQueuedBlocksForInstant(Deque<HoodieLogBlock> lastBlocks, int numLogFilesSeen)
|
||||
throws Exception {
|
||||
private void processQueuedBlocksForInstant(Deque<HoodieLogBlock> lastBlocks, int numLogFilesSeen) throws Exception {
|
||||
while (!lastBlocks.isEmpty()) {
|
||||
log.info("Number of remaining logblocks to merge " + lastBlocks.size());
|
||||
// poll the element at the bottom of the stack since that's the order it was inserted
|
||||
|
||||
@@ -46,9 +46,8 @@ import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
/**
|
||||
* Scans a log file and provides block level iterator on the log file Loads the entire block
|
||||
* contents in memory Can emit either a DataBlock, CommandBlock, DeleteBlock or CorruptBlock (if one
|
||||
* is found)
|
||||
* Scans a log file and provides block level iterator on the log file Loads the entire block contents in memory Can emit
|
||||
* either a DataBlock, CommandBlock, DeleteBlock or CorruptBlock (if one is found)
|
||||
*/
|
||||
class HoodieLogFileReader implements HoodieLogFormat.Reader {
|
||||
|
||||
@@ -71,8 +70,7 @@ class HoodieLogFileReader implements HoodieLogFormat.Reader {
|
||||
FSDataInputStream fsDataInputStream = fs.open(logFile.getPath(), bufferSize);
|
||||
if (fsDataInputStream.getWrappedStream() instanceof FSInputStream) {
|
||||
this.inputStream = new FSDataInputStream(
|
||||
new BufferedFSInputStream((FSInputStream) fsDataInputStream.getWrappedStream(),
|
||||
bufferSize));
|
||||
new BufferedFSInputStream((FSInputStream) fsDataInputStream.getWrappedStream(), bufferSize));
|
||||
} else {
|
||||
// fsDataInputStream.getWrappedStream() maybe a BufferedFSInputStream
|
||||
// need to wrap in another BufferedFSInputStream the make bufferSize work?
|
||||
@@ -84,19 +82,17 @@ class HoodieLogFileReader implements HoodieLogFormat.Reader {
|
||||
this.readBlockLazily = readBlockLazily;
|
||||
this.reverseReader = reverseReader;
|
||||
if (this.reverseReader) {
|
||||
this.reverseLogFilePosition = this.lastReverseLogFilePosition = fs
|
||||
.getFileStatus(logFile.getPath()).getLen();
|
||||
this.reverseLogFilePosition = this.lastReverseLogFilePosition = fs.getFileStatus(logFile.getPath()).getLen();
|
||||
}
|
||||
addShutDownHook();
|
||||
}
|
||||
|
||||
HoodieLogFileReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema,
|
||||
boolean readBlockLazily, boolean reverseReader) throws IOException {
|
||||
HoodieLogFileReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema, boolean readBlockLazily,
|
||||
boolean reverseReader) throws IOException {
|
||||
this(fs, logFile, readerSchema, DEFAULT_BUFFER_SIZE, readBlockLazily, reverseReader);
|
||||
}
|
||||
|
||||
HoodieLogFileReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema)
|
||||
throws IOException {
|
||||
HoodieLogFileReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema) throws IOException {
|
||||
this(fs, logFile, readerSchema, DEFAULT_BUFFER_SIZE, false, false);
|
||||
}
|
||||
|
||||
@@ -154,8 +150,7 @@ class HoodieLogFileReader implements HoodieLogFormat.Reader {
|
||||
if (nextBlockVersion.getVersion() != HoodieLogFormatVersion.DEFAULT_VERSION) {
|
||||
type = inputStream.readInt();
|
||||
|
||||
Preconditions.checkArgument(type < HoodieLogBlockType.values().length,
|
||||
"Invalid block byte type found " + type);
|
||||
Preconditions.checkArgument(type < HoodieLogBlockType.values().length, "Invalid block byte type found " + type);
|
||||
blockType = HoodieLogBlockType.values()[type];
|
||||
}
|
||||
|
||||
@@ -198,18 +193,15 @@ class HoodieLogFileReader implements HoodieLogFormat.Reader {
|
||||
if (nextBlockVersion.getVersion() == HoodieLogFormatVersion.DEFAULT_VERSION) {
|
||||
return HoodieAvroDataBlock.getBlock(content, readerSchema);
|
||||
} else {
|
||||
return HoodieAvroDataBlock
|
||||
.getBlock(logFile, inputStream, Option.ofNullable(content), readBlockLazily,
|
||||
contentPosition, contentLength, blockEndPos, readerSchema, header, footer);
|
||||
return HoodieAvroDataBlock.getBlock(logFile, inputStream, Option.ofNullable(content), readBlockLazily,
|
||||
contentPosition, contentLength, blockEndPos, readerSchema, header, footer);
|
||||
}
|
||||
case DELETE_BLOCK:
|
||||
return HoodieDeleteBlock
|
||||
.getBlock(logFile, inputStream, Option.ofNullable(content), readBlockLazily,
|
||||
contentPosition, contentLength, blockEndPos, header, footer);
|
||||
return HoodieDeleteBlock.getBlock(logFile, inputStream, Option.ofNullable(content), readBlockLazily,
|
||||
contentPosition, contentLength, blockEndPos, header, footer);
|
||||
case COMMAND_BLOCK:
|
||||
return HoodieCommandBlock
|
||||
.getBlock(logFile, inputStream, Option.ofNullable(content), readBlockLazily,
|
||||
contentPosition, contentLength, blockEndPos, header, footer);
|
||||
return HoodieCommandBlock.getBlock(logFile, inputStream, Option.ofNullable(content), readBlockLazily,
|
||||
contentPosition, contentLength, blockEndPos, header, footer);
|
||||
default:
|
||||
throw new HoodieNotSupportedException("Unsupported Block " + blockType);
|
||||
}
|
||||
@@ -224,12 +216,9 @@ class HoodieLogFileReader implements HoodieLogFormat.Reader {
|
||||
log.info("Next available block in " + logFile + " starts at " + nextBlockOffset);
|
||||
int corruptedBlockSize = (int) (nextBlockOffset - currentPos);
|
||||
long contentPosition = inputStream.getPos();
|
||||
byte[] corruptedBytes = HoodieLogBlock
|
||||
.readOrSkipContent(inputStream, corruptedBlockSize, readBlockLazily);
|
||||
return HoodieCorruptBlock
|
||||
.getBlock(logFile, inputStream, Option.ofNullable(corruptedBytes), readBlockLazily,
|
||||
contentPosition, corruptedBlockSize, corruptedBlockSize, new HashMap<>(),
|
||||
new HashMap<>());
|
||||
byte[] corruptedBytes = HoodieLogBlock.readOrSkipContent(inputStream, corruptedBlockSize, readBlockLazily);
|
||||
return HoodieCorruptBlock.getBlock(logFile, inputStream, Option.ofNullable(corruptedBytes), readBlockLazily,
|
||||
contentPosition, corruptedBlockSize, corruptedBlockSize, new HashMap<>(), new HashMap<>());
|
||||
}
|
||||
|
||||
private boolean isBlockCorrupt(int blocksize) throws IOException {
|
||||
@@ -311,8 +300,7 @@ class HoodieLogFileReader implements HoodieLogFormat.Reader {
|
||||
boolean hasMagic = hasNextMagic();
|
||||
if (!hasMagic) {
|
||||
throw new CorruptedLogFileException(
|
||||
logFile
|
||||
+ "could not be read. Did not find the magic bytes at the start of the block");
|
||||
logFile + "could not be read. Did not find the magic bytes at the start of the block");
|
||||
}
|
||||
return hasMagic;
|
||||
} catch (EOFException e) {
|
||||
@@ -362,9 +350,9 @@ class HoodieLogFileReader implements HoodieLogFormat.Reader {
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a reverse iterator Note: At any point, an instance of HoodieLogFileReader should either
|
||||
* iterate reverse (prev) or forward (next). Doing both in the same instance is not supported
|
||||
* WARNING : Every call to prev() should be preceded with hasPrev()
|
||||
* This is a reverse iterator Note: At any point, an instance of HoodieLogFileReader should either iterate reverse
|
||||
* (prev) or forward (next). Doing both in the same instance is not supported WARNING : Every call to prev() should be
|
||||
* preceded with hasPrev()
|
||||
*/
|
||||
@Override
|
||||
public HoodieLogBlock prev() throws IOException {
|
||||
@@ -380,9 +368,8 @@ class HoodieLogFileReader implements HoodieLogFormat.Reader {
|
||||
} catch (Exception e) {
|
||||
// this could be a corrupt block
|
||||
inputStream.seek(blockEndPos);
|
||||
throw new CorruptedLogFileException(
|
||||
"Found possible corrupted block, cannot read log file in reverse, "
|
||||
+ "fallback to forward reading of logfile");
|
||||
throw new CorruptedLogFileException("Found possible corrupted block, cannot read log file in reverse, "
|
||||
+ "fallback to forward reading of logfile");
|
||||
}
|
||||
boolean hasNext = hasNext();
|
||||
reverseLogFilePosition -= blockSize;
|
||||
@@ -391,10 +378,9 @@ class HoodieLogFileReader implements HoodieLogFormat.Reader {
|
||||
}
|
||||
|
||||
/**
|
||||
* Reverse pointer, does not read the block. Return the current position of the log file (in
|
||||
* reverse) If the pointer (inputstream) is moved in any way, it is the job of the client of this
|
||||
* class to seek/reset it back to the file position returned from the method to expect correct
|
||||
* results
|
||||
* Reverse pointer, does not read the block. Return the current position of the log file (in reverse) If the pointer
|
||||
* (inputstream) is moved in any way, it is the job of the client of this class to seek/reset it back to the file
|
||||
* position returned from the method to expect correct results
|
||||
*/
|
||||
public long moveToPrev() throws IOException {
|
||||
|
||||
|
||||
@@ -33,11 +33,10 @@ import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
/**
|
||||
* File Format for Hoodie Log Files. The File Format consists of blocks each separated with a
|
||||
* MAGIC sync marker. A Block can either be a Data block, Command block or Delete Block. Data
|
||||
* Block - Contains log records serialized as Avro Binary Format Command Block - Specific commands
|
||||
* like ROLLBACK_PREVIOUS-BLOCK - Tombstone for the previously written block Delete Block - List of
|
||||
* keys to delete - tombstone for keys
|
||||
* File Format for Hoodie Log Files. The File Format consists of blocks each separated with a MAGIC sync marker. A Block
|
||||
* can either be a Data block, Command block or Delete Block. Data Block - Contains log records serialized as Avro
|
||||
* Binary Format Command Block - Specific commands like ROLLBACK_PREVIOUS-BLOCK - Tombstone for the previously written
|
||||
* block Delete Block - List of keys to delete - tombstone for keys
|
||||
*/
|
||||
public interface HoodieLogFormat {
|
||||
|
||||
@@ -47,8 +46,8 @@ public interface HoodieLogFormat {
|
||||
byte[] MAGIC = new byte[] {'#', 'H', 'U', 'D', 'I', '#'};
|
||||
|
||||
/**
|
||||
* The current version of the log format. Anytime the log format changes this version needs to be
|
||||
* bumped and corresponding changes need to be made to {@link HoodieLogFormatVersion}
|
||||
* The current version of the log format. Anytime the log format changes this version needs to be bumped and
|
||||
* corresponding changes need to be made to {@link HoodieLogFormatVersion}
|
||||
*/
|
||||
int currentVersion = 1;
|
||||
|
||||
@@ -84,12 +83,14 @@ public interface HoodieLogFormat {
|
||||
|
||||
/**
|
||||
* Read log file in reverse order and check if prev block is present
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public boolean hasPrev();
|
||||
|
||||
/**
|
||||
* Read log file in reverse order and return prev block if present
|
||||
*
|
||||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
@@ -220,9 +221,8 @@ public interface HoodieLogFormat {
|
||||
// Use rollover write token as write token to create new log file with tokens
|
||||
logWriteToken = rolloverLogWriteToken;
|
||||
}
|
||||
log.info(
|
||||
"Computed the next log version for " + logFileId + " in " + parentPath + " as "
|
||||
+ logVersion + " with write-token " + logWriteToken);
|
||||
log.info("Computed the next log version for " + logFileId + " in " + parentPath + " as " + logVersion
|
||||
+ " with write-token " + logWriteToken);
|
||||
}
|
||||
|
||||
if (logWriteToken == null) {
|
||||
@@ -259,16 +259,15 @@ public interface HoodieLogFormat {
|
||||
return new HoodieLogFileReader(fs, logFile, readerSchema, HoodieLogFileReader.DEFAULT_BUFFER_SIZE, false, false);
|
||||
}
|
||||
|
||||
static HoodieLogFormat.Reader newReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema, boolean
|
||||
readBlockLazily, boolean reverseReader)
|
||||
throws IOException {
|
||||
return new HoodieLogFileReader(fs, logFile, readerSchema, HoodieLogFileReader.DEFAULT_BUFFER_SIZE,
|
||||
readBlockLazily, reverseReader);
|
||||
static HoodieLogFormat.Reader newReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema,
|
||||
boolean readBlockLazily, boolean reverseReader) throws IOException {
|
||||
return new HoodieLogFileReader(fs, logFile, readerSchema, HoodieLogFileReader.DEFAULT_BUFFER_SIZE, readBlockLazily,
|
||||
reverseReader);
|
||||
}
|
||||
|
||||
/**
|
||||
* A set of feature flags associated with a log format. Versions are changed when the log format
|
||||
* changes. TODO(na) - Implement policies around major/minor versions
|
||||
* A set of feature flags associated with a log format. Versions are changed when the log format changes. TODO(na) -
|
||||
* Implement policies around major/minor versions
|
||||
*/
|
||||
abstract class LogFormatVersion {
|
||||
|
||||
|
||||
@@ -43,8 +43,8 @@ public class HoodieLogFormatReader implements HoodieLogFormat.Reader {
|
||||
|
||||
private static final Logger log = LogManager.getLogger(HoodieLogFormatReader.class);
|
||||
|
||||
HoodieLogFormatReader(FileSystem fs, List<HoodieLogFile> logFiles,
|
||||
Schema readerSchema, boolean readBlocksLazily, boolean reverseLogReader, int bufferSize) throws IOException {
|
||||
HoodieLogFormatReader(FileSystem fs, List<HoodieLogFile> logFiles, Schema readerSchema, boolean readBlocksLazily,
|
||||
boolean reverseLogReader, int bufferSize) throws IOException {
|
||||
this.logFiles = logFiles;
|
||||
this.fs = fs;
|
||||
this.readerSchema = readerSchema;
|
||||
@@ -60,10 +60,9 @@ public class HoodieLogFormatReader implements HoodieLogFormat.Reader {
|
||||
|
||||
@Override
|
||||
/**
|
||||
* Note : In lazy mode, clients must ensure close() should be called only after processing
|
||||
* all log-blocks as the underlying inputstream will be closed.
|
||||
* TODO: We can introduce invalidate() API at HoodieLogBlock and this object can call invalidate on
|
||||
* all returned log-blocks so that we check this scenario specifically in HoodieLogBlock
|
||||
* Note : In lazy mode, clients must ensure close() should be called only after processing all log-blocks as the
|
||||
* underlying inputstream will be closed. TODO: We can introduce invalidate() API at HoodieLogBlock and this object
|
||||
* can call invalidate on all returned log-blocks so that we check this scenario specifically in HoodieLogBlock
|
||||
*/
|
||||
public void close() throws IOException {
|
||||
|
||||
@@ -94,8 +93,8 @@ public class HoodieLogFormatReader implements HoodieLogFormat.Reader {
|
||||
} else {
|
||||
this.prevReadersInOpenState.add(currentReader);
|
||||
}
|
||||
this.currentReader = new HoodieLogFileReader(fs, nextLogFile, readerSchema, bufferSize, readBlocksLazily,
|
||||
false);
|
||||
this.currentReader =
|
||||
new HoodieLogFileReader(fs, nextLogFile, readerSchema, bufferSize, readBlocksLazily, false);
|
||||
} catch (IOException io) {
|
||||
throw new HoodieIOException("unable to initialize read with log file ", io);
|
||||
}
|
||||
@@ -116,8 +115,7 @@ public class HoodieLogFormatReader implements HoodieLogFormat.Reader {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
}
|
||||
public void remove() {}
|
||||
|
||||
@Override
|
||||
public boolean hasPrev() {
|
||||
|
||||
@@ -19,8 +19,7 @@
|
||||
package org.apache.hudi.common.table.log;
|
||||
|
||||
/**
|
||||
* Implements logic to determine behavior for feature flags for
|
||||
* {@link HoodieLogFormat.LogFormatVersion}.
|
||||
* Implements logic to determine behavior for feature flags for {@link HoodieLogFormat.LogFormatVersion}.
|
||||
*/
|
||||
final class HoodieLogFormatVersion extends HoodieLogFormat.LogFormatVersion {
|
||||
|
||||
|
||||
@@ -38,8 +38,7 @@ import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
/**
|
||||
* HoodieLogFormatWriter can be used to append blocks to a log file Use
|
||||
* HoodieLogFormat.WriterBuilder to construct
|
||||
* HoodieLogFormatWriter can be used to append blocks to a log file Use HoodieLogFormat.WriterBuilder to construct
|
||||
*/
|
||||
public class HoodieLogFormatWriter implements HoodieLogFormat.Writer {
|
||||
|
||||
@@ -62,9 +61,8 @@ public class HoodieLogFormatWriter implements HoodieLogFormat.Writer {
|
||||
* @param replication
|
||||
* @param sizeThreshold
|
||||
*/
|
||||
HoodieLogFormatWriter(FileSystem fs, HoodieLogFile logFile, Integer bufferSize,
|
||||
Short replication, Long sizeThreshold, String logWriteToken, String rolloverLogWriteToken)
|
||||
throws IOException, InterruptedException {
|
||||
HoodieLogFormatWriter(FileSystem fs, HoodieLogFile logFile, Integer bufferSize, Short replication, Long sizeThreshold,
|
||||
String logWriteToken, String rolloverLogWriteToken) throws IOException, InterruptedException {
|
||||
this.fs = fs;
|
||||
this.logFile = logFile;
|
||||
this.sizeThreshold = sizeThreshold;
|
||||
@@ -116,12 +114,11 @@ public class HoodieLogFormatWriter implements HoodieLogFormat.Writer {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Writer appendBlock(HoodieLogBlock block)
|
||||
throws IOException, InterruptedException {
|
||||
public Writer appendBlock(HoodieLogBlock block) throws IOException, InterruptedException {
|
||||
|
||||
// Find current version
|
||||
HoodieLogFormat.LogFormatVersion currentLogFormatVersion = new HoodieLogFormatVersion(
|
||||
HoodieLogFormat.currentVersion);
|
||||
HoodieLogFormat.LogFormatVersion currentLogFormatVersion =
|
||||
new HoodieLogFormatVersion(HoodieLogFormat.currentVersion);
|
||||
long currentSize = this.output.size();
|
||||
|
||||
// 1. Write the magic header for the start of the block
|
||||
@@ -135,8 +132,7 @@ public class HoodieLogFormatWriter implements HoodieLogFormat.Writer {
|
||||
byte[] footerBytes = HoodieLogBlock.getLogMetadataBytes(block.getLogBlockFooter());
|
||||
|
||||
// 2. Write the total size of the block (excluding Magic)
|
||||
this.output
|
||||
.writeLong(getLogBlockLength(content.length, headerBytes.length, footerBytes.length));
|
||||
this.output.writeLong(getLogBlockLength(content.length, headerBytes.length, footerBytes.length));
|
||||
|
||||
// 3. Write the version of this log block
|
||||
this.output.writeInt(currentLogFormatVersion.getVersion());
|
||||
@@ -162,26 +158,24 @@ public class HoodieLogFormatWriter implements HoodieLogFormat.Writer {
|
||||
}
|
||||
|
||||
/**
|
||||
* This method returns the total LogBlock Length which is the sum of 1. Number of bytes to write
|
||||
* version 2. Number of bytes to write ordinal 3. Length of the headers 4. Number of bytes used to
|
||||
* write content length 5. Length of the content 6. Length of the footers 7. Number of bytes to
|
||||
* write totalLogBlockLength
|
||||
* This method returns the total LogBlock Length which is the sum of 1. Number of bytes to write version 2. Number of
|
||||
* bytes to write ordinal 3. Length of the headers 4. Number of bytes used to write content length 5. Length of the
|
||||
* content 6. Length of the footers 7. Number of bytes to write totalLogBlockLength
|
||||
*/
|
||||
private int getLogBlockLength(int contentLength, int headerLength, int footerLength) {
|
||||
return
|
||||
Integer.BYTES + // Number of bytes to write version
|
||||
Integer.BYTES + // Number of bytes to write ordinal
|
||||
headerLength + // Length of the headers
|
||||
Long.BYTES + // Number of bytes used to write content length
|
||||
contentLength + // Length of the content
|
||||
footerLength + // Length of the footers
|
||||
Long.BYTES; // bytes to write totalLogBlockLength at end of block (for reverse ptr)
|
||||
return Integer.BYTES + // Number of bytes to write version
|
||||
Integer.BYTES + // Number of bytes to write ordinal
|
||||
headerLength + // Length of the headers
|
||||
Long.BYTES + // Number of bytes used to write content length
|
||||
contentLength + // Length of the content
|
||||
footerLength + // Length of the footers
|
||||
Long.BYTES; // bytes to write totalLogBlockLength at end of block (for reverse ptr)
|
||||
}
|
||||
|
||||
private Writer rolloverIfNeeded() throws IOException, InterruptedException {
|
||||
// Roll over if the size is past the threshold
|
||||
if (getCurrentSize() > sizeThreshold) {
|
||||
//TODO - make an end marker which seals the old log file (no more appends possible to that
|
||||
// TODO - make an end marker which seals the old log file (no more appends possible to that
|
||||
// file).
|
||||
log.info("CurrentSize " + getCurrentSize() + " has reached threshold " + sizeThreshold
|
||||
+ ". Rolling over to the next version");
|
||||
@@ -195,8 +189,8 @@ public class HoodieLogFormatWriter implements HoodieLogFormat.Writer {
|
||||
}
|
||||
|
||||
private void createNewFile() throws IOException {
|
||||
this.output = fs.create(this.logFile.getPath(), false, bufferSize, replication,
|
||||
WriterBuilder.DEFAULT_SIZE_THRESHOLD, null);
|
||||
this.output =
|
||||
fs.create(this.logFile.getPath(), false, bufferSize, replication, WriterBuilder.DEFAULT_SIZE_THRESHOLD, null);
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -218,14 +212,13 @@ public class HoodieLogFormatWriter implements HoodieLogFormat.Writer {
|
||||
|
||||
public long getCurrentSize() throws IOException {
|
||||
if (output == null) {
|
||||
throw new IllegalStateException(
|
||||
"Cannot get current size as the underlying stream has been closed already");
|
||||
throw new IllegalStateException("Cannot get current size as the underlying stream has been closed already");
|
||||
}
|
||||
return output.getPos();
|
||||
}
|
||||
|
||||
private void handleAppendExceptionOrRecoverLease(Path path, RemoteException e) throws IOException,
|
||||
InterruptedException {
|
||||
private void handleAppendExceptionOrRecoverLease(Path path, RemoteException e)
|
||||
throws IOException, InterruptedException {
|
||||
if (e.getMessage().contains(APPEND_UNAVAILABLE_EXCEPTION_MESSAGE)) {
|
||||
// This issue happens when all replicas for a file are down and/or being decommissioned.
|
||||
// The fs.append() API could append to the last block for a file. If the last block is full, a new block is
|
||||
|
||||
@@ -40,12 +40,13 @@ import org.apache.log4j.Logger;
|
||||
* Scans through all the blocks in a list of HoodieLogFile and builds up a compacted/merged list of records which will
|
||||
* be used as a lookup table when merging the base columnar file with the redo log file.
|
||||
*
|
||||
* NOTE: If readBlockLazily is
|
||||
* turned on, does not merge, instead keeps reading log blocks and merges everything at once This is an optimization to
|
||||
* avoid seek() back and forth to read new block (forward seek()) and lazily read content of seen block (reverse and
|
||||
* forward seek()) during merge | | Read Block 1 Metadata | | Read Block 1 Data | | | Read Block 2
|
||||
* Metadata | | Read Block 2 Data | | I/O Pass 1 | ..................... | I/O Pass 2 | ................. | |
|
||||
* | Read Block N Metadata | | Read Block N Data | <p> This results in two I/O passes over the log file.
|
||||
* NOTE: If readBlockLazily is turned on, does not merge, instead keeps reading log blocks and merges everything at once
|
||||
* This is an optimization to avoid seek() back and forth to read new block (forward seek()) and lazily read content of
|
||||
* seen block (reverse and forward seek()) during merge | | Read Block 1 Metadata | | Read Block 1 Data | | | Read Block
|
||||
* 2 Metadata | | Read Block 2 Data | | I/O Pass 1 | ..................... | I/O Pass 2 | ................. | | | Read
|
||||
* Block N Metadata | | Read Block N Data |
|
||||
* <p>
|
||||
* This results in two I/O passes over the log file.
|
||||
*/
|
||||
|
||||
public class HoodieMergedLogRecordScanner extends AbstractHoodieLogRecordScanner
|
||||
@@ -65,26 +66,24 @@ public class HoodieMergedLogRecordScanner extends AbstractHoodieLogRecordScanner
|
||||
public final HoodieTimer timer = new HoodieTimer();
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public HoodieMergedLogRecordScanner(FileSystem fs, String basePath, List<String> logFilePaths,
|
||||
Schema readerSchema, String latestInstantTime, Long maxMemorySizeInBytes,
|
||||
boolean readBlocksLazily, boolean reverseReader, int bufferSize, String spillableMapBasePath) {
|
||||
public HoodieMergedLogRecordScanner(FileSystem fs, String basePath, List<String> logFilePaths, Schema readerSchema,
|
||||
String latestInstantTime, Long maxMemorySizeInBytes, boolean readBlocksLazily, boolean reverseReader,
|
||||
int bufferSize, String spillableMapBasePath) {
|
||||
super(fs, basePath, logFilePaths, readerSchema, latestInstantTime, readBlocksLazily, reverseReader, bufferSize);
|
||||
try {
|
||||
// Store merged records for all versions for this log file, set the in-memory footprint to maxInMemoryMapSize
|
||||
this.records = new ExternalSpillableMap<>(maxMemorySizeInBytes, spillableMapBasePath,
|
||||
new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(readerSchema));
|
||||
this.records = new ExternalSpillableMap<>(maxMemorySizeInBytes, spillableMapBasePath, new DefaultSizeEstimator(),
|
||||
new HoodieRecordSizeEstimator(readerSchema));
|
||||
// Do the scan and merge
|
||||
timer.startTimer();
|
||||
scan();
|
||||
this.totalTimeTakenToReadAndMergeBlocks = timer.endTimer();
|
||||
this.numMergedRecordsInLog = records.size();
|
||||
log.info("MaxMemoryInBytes allowed for compaction => " + maxMemorySizeInBytes);
|
||||
log.info("Number of entries in MemoryBasedMap in ExternalSpillableMap => " + records
|
||||
.getInMemoryMapNumEntries());
|
||||
log.info("Total size in bytes of MemoryBasedMap in ExternalSpillableMap => " + records
|
||||
.getCurrentInMemoryMapSize());
|
||||
log.info("Number of entries in DiskBasedMap in ExternalSpillableMap => " + records
|
||||
.getDiskBasedMapNumEntries());
|
||||
log.info("Number of entries in MemoryBasedMap in ExternalSpillableMap => " + records.getInMemoryMapNumEntries());
|
||||
log.info(
|
||||
"Total size in bytes of MemoryBasedMap in ExternalSpillableMap => " + records.getCurrentInMemoryMapSize());
|
||||
log.info("Number of entries in DiskBasedMap in ExternalSpillableMap => " + records.getDiskBasedMapNumEntries());
|
||||
log.info("Size of file spilled to disk => " + records.getSizeOfFileOnDiskInBytes());
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("IOException when reading log file ");
|
||||
|
||||
@@ -29,9 +29,8 @@ public class HoodieUnMergedLogRecordScanner extends AbstractHoodieLogRecordScann
|
||||
|
||||
private final LogRecordScannerCallback callback;
|
||||
|
||||
public HoodieUnMergedLogRecordScanner(FileSystem fs, String basePath,
|
||||
List<String> logFilePaths, Schema readerSchema, String latestInstantTime,
|
||||
boolean readBlocksLazily, boolean reverseReader, int bufferSize,
|
||||
public HoodieUnMergedLogRecordScanner(FileSystem fs, String basePath, List<String> logFilePaths, Schema readerSchema,
|
||||
String latestInstantTime, boolean readBlocksLazily, boolean reverseReader, int bufferSize,
|
||||
LogRecordScannerCallback callback) {
|
||||
super(fs, basePath, logFilePaths, readerSchema, latestInstantTime, readBlocksLazily, reverseReader, bufferSize);
|
||||
this.callback = callback;
|
||||
|
||||
@@ -48,9 +48,8 @@ import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.exception.HoodieIOException;
|
||||
|
||||
/**
|
||||
* DataBlock contains a list of records serialized using Avro. The Datablock contains 1. Data Block
|
||||
* version 2. Total number of records in the block 3. Size of a record 4. Actual avro serialized
|
||||
* content of the record
|
||||
* DataBlock contains a list of records serialized using Avro. The Datablock contains 1. Data Block version 2. Total
|
||||
* number of records in the block 3. Size of a record 4. Actual avro serialized content of the record
|
||||
*/
|
||||
public class HoodieAvroDataBlock extends HoodieLogBlock {
|
||||
|
||||
@@ -59,41 +58,31 @@ public class HoodieAvroDataBlock extends HoodieLogBlock {
|
||||
private ThreadLocal<BinaryEncoder> encoderCache = new ThreadLocal<>();
|
||||
private ThreadLocal<BinaryDecoder> decoderCache = new ThreadLocal<>();
|
||||
|
||||
public HoodieAvroDataBlock(@Nonnull List<IndexedRecord> records,
|
||||
@Nonnull Map<HeaderMetadataType, String> header,
|
||||
public HoodieAvroDataBlock(@Nonnull List<IndexedRecord> records, @Nonnull Map<HeaderMetadataType, String> header,
|
||||
@Nonnull Map<HeaderMetadataType, String> footer) {
|
||||
super(header, footer, Option.empty(), Option.empty(), null, false);
|
||||
this.records = records;
|
||||
this.schema = Schema.parse(super.getLogBlockHeader().get(HeaderMetadataType.SCHEMA));
|
||||
}
|
||||
|
||||
public HoodieAvroDataBlock(@Nonnull List<IndexedRecord> records,
|
||||
@Nonnull Map<HeaderMetadataType, String> header) {
|
||||
public HoodieAvroDataBlock(@Nonnull List<IndexedRecord> records, @Nonnull Map<HeaderMetadataType, String> header) {
|
||||
this(records, header, new HashMap<>());
|
||||
}
|
||||
|
||||
private HoodieAvroDataBlock(Option<byte[]> content, @Nonnull FSDataInputStream inputStream,
|
||||
boolean readBlockLazily, Option<HoodieLogBlockContentLocation> blockContentLocation,
|
||||
Schema readerSchema, @Nonnull Map<HeaderMetadataType, String> headers,
|
||||
@Nonnull Map<HeaderMetadataType, String> footer) {
|
||||
private HoodieAvroDataBlock(Option<byte[]> content, @Nonnull FSDataInputStream inputStream, boolean readBlockLazily,
|
||||
Option<HoodieLogBlockContentLocation> blockContentLocation, Schema readerSchema,
|
||||
@Nonnull Map<HeaderMetadataType, String> headers, @Nonnull Map<HeaderMetadataType, String> footer) {
|
||||
super(headers, footer, blockContentLocation, content, inputStream, readBlockLazily);
|
||||
this.schema = readerSchema;
|
||||
}
|
||||
|
||||
public static HoodieLogBlock getBlock(HoodieLogFile logFile,
|
||||
FSDataInputStream inputStream,
|
||||
Option<byte[]> content,
|
||||
boolean readBlockLazily,
|
||||
long position,
|
||||
long blockSize,
|
||||
long blockEndpos,
|
||||
Schema readerSchema,
|
||||
Map<HeaderMetadataType, String> header,
|
||||
Map<HeaderMetadataType, String> footer) {
|
||||
public static HoodieLogBlock getBlock(HoodieLogFile logFile, FSDataInputStream inputStream, Option<byte[]> content,
|
||||
boolean readBlockLazily, long position, long blockSize, long blockEndpos, Schema readerSchema,
|
||||
Map<HeaderMetadataType, String> header, Map<HeaderMetadataType, String> footer) {
|
||||
|
||||
return new HoodieAvroDataBlock(content, inputStream, readBlockLazily,
|
||||
Option.of(new HoodieLogBlockContentLocation(logFile, position, blockSize, blockEndpos)),
|
||||
readerSchema, header, footer);
|
||||
Option.of(new HoodieLogBlockContentLocation(logFile, position, blockSize, blockEndpos)), readerSchema, header,
|
||||
footer);
|
||||
|
||||
}
|
||||
|
||||
@@ -171,8 +160,8 @@ public class HoodieAvroDataBlock extends HoodieLogBlock {
|
||||
return schema;
|
||||
}
|
||||
|
||||
//TODO (na) - Break down content into smaller chunks of byte [] to be GC as they are used
|
||||
//TODO (na) - Implement a recordItr instead of recordList
|
||||
// TODO (na) - Break down content into smaller chunks of byte [] to be GC as they are used
|
||||
// TODO (na) - Implement a recordItr instead of recordList
|
||||
private void createRecordsFromContentBytes() throws IOException {
|
||||
|
||||
if (readBlockLazily && !getContent().isPresent()) {
|
||||
@@ -181,16 +170,14 @@ public class HoodieAvroDataBlock extends HoodieLogBlock {
|
||||
}
|
||||
|
||||
SizeAwareDataInputStream dis =
|
||||
new SizeAwareDataInputStream(
|
||||
new DataInputStream(new ByteArrayInputStream(getContent().get())));
|
||||
new SizeAwareDataInputStream(new DataInputStream(new ByteArrayInputStream(getContent().get())));
|
||||
|
||||
// 1. Read version for this data block
|
||||
int version = dis.readInt();
|
||||
HoodieAvroDataBlockVersion logBlockVersion = new HoodieAvroDataBlockVersion(version);
|
||||
|
||||
// Get schema from the header
|
||||
Schema writerSchema = new Schema.Parser()
|
||||
.parse(super.getLogBlockHeader().get(HeaderMetadataType.SCHEMA));
|
||||
Schema writerSchema = new Schema.Parser().parse(super.getLogBlockHeader().get(HeaderMetadataType.SCHEMA));
|
||||
|
||||
// If readerSchema was not present, use writerSchema
|
||||
if (schema == null) {
|
||||
@@ -208,8 +195,8 @@ public class HoodieAvroDataBlock extends HoodieLogBlock {
|
||||
// 3. Read the content
|
||||
for (int i = 0; i < totalRecords; i++) {
|
||||
int recordLength = dis.readInt();
|
||||
BinaryDecoder decoder = DecoderFactory.get()
|
||||
.binaryDecoder(getContent().get(), dis.getNumberOfBytesRead(), recordLength, decoderCache.get());
|
||||
BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(getContent().get(), dis.getNumberOfBytesRead(),
|
||||
recordLength, decoderCache.get());
|
||||
decoderCache.set(decoder);
|
||||
IndexedRecord record = reader.read(null, decoder);
|
||||
records.add(record);
|
||||
@@ -221,13 +208,13 @@ public class HoodieAvroDataBlock extends HoodieLogBlock {
|
||||
deflate();
|
||||
}
|
||||
|
||||
/*********************************DEPRECATED METHODS***********************************/
|
||||
/********************************* DEPRECATED METHODS ***********************************/
|
||||
|
||||
@Deprecated
|
||||
@VisibleForTesting
|
||||
/**
|
||||
* This constructor is retained to provide backwards compatibility to HoodieArchivedLogs
|
||||
* which were written using HoodieLogFormat V1
|
||||
* This constructor is retained to provide backwards compatibility to HoodieArchivedLogs which were written using
|
||||
* HoodieLogFormat V1
|
||||
*/
|
||||
public HoodieAvroDataBlock(List<IndexedRecord> records, Schema schema) {
|
||||
super(new HashMap<>(), new HashMap<>(), Option.empty(), Option.empty(), null, false);
|
||||
@@ -237,13 +224,12 @@ public class HoodieAvroDataBlock extends HoodieLogBlock {
|
||||
|
||||
@Deprecated
|
||||
/**
|
||||
* This method is retained to provide backwards compatibility to HoodieArchivedLogs which
|
||||
* were written using HoodieLogFormat V1
|
||||
* This method is retained to provide backwards compatibility to HoodieArchivedLogs which were written using
|
||||
* HoodieLogFormat V1
|
||||
*/
|
||||
public static HoodieLogBlock getBlock(byte[] content, Schema readerSchema) throws IOException {
|
||||
|
||||
SizeAwareDataInputStream dis = new SizeAwareDataInputStream(
|
||||
new DataInputStream(new ByteArrayInputStream(content)));
|
||||
SizeAwareDataInputStream dis = new SizeAwareDataInputStream(new DataInputStream(new ByteArrayInputStream(content)));
|
||||
|
||||
// 1. Read the schema written out
|
||||
int schemaLength = dis.readInt();
|
||||
@@ -263,8 +249,7 @@ public class HoodieAvroDataBlock extends HoodieLogBlock {
|
||||
// 3. Read the content
|
||||
for (int i = 0; i < totalRecords; i++) {
|
||||
int recordLength = dis.readInt();
|
||||
Decoder decoder = DecoderFactory.get()
|
||||
.binaryDecoder(content, dis.getNumberOfBytesRead(), recordLength, null);
|
||||
Decoder decoder = DecoderFactory.get().binaryDecoder(content, dis.getNumberOfBytesRead(), recordLength, null);
|
||||
IndexedRecord record = reader.read(null, decoder);
|
||||
records.add(record);
|
||||
dis.skipBytes(recordLength);
|
||||
|
||||
@@ -19,8 +19,8 @@
|
||||
package org.apache.hudi.common.table.log.block;
|
||||
|
||||
/**
|
||||
* A set of feature flags associated with a data log block format. Versions are changed when the log
|
||||
* block format changes. TODO(na) - Implement policies around major/minor versions
|
||||
* A set of feature flags associated with a data log block format. Versions are changed when the log block format
|
||||
* changes. TODO(na) - Implement policies around major/minor versions
|
||||
*/
|
||||
final class HoodieAvroDataBlockVersion extends HoodieLogBlockVersion {
|
||||
|
||||
|
||||
@@ -39,12 +39,12 @@ public class HoodieCommandBlock extends HoodieLogBlock {
|
||||
this(Option.empty(), null, false, Option.empty(), header, new HashMap<>());
|
||||
}
|
||||
|
||||
private HoodieCommandBlock(Option<byte[]> content, FSDataInputStream inputStream,
|
||||
boolean readBlockLazily, Option<HoodieLogBlockContentLocation> blockContentLocation,
|
||||
Map<HeaderMetadataType, String> header, Map<HeaderMetadataType, String> footer) {
|
||||
private HoodieCommandBlock(Option<byte[]> content, FSDataInputStream inputStream, boolean readBlockLazily,
|
||||
Option<HoodieLogBlockContentLocation> blockContentLocation, Map<HeaderMetadataType, String> header,
|
||||
Map<HeaderMetadataType, String> footer) {
|
||||
super(header, footer, blockContentLocation, content, inputStream, readBlockLazily);
|
||||
this.type = HoodieCommandBlockTypeEnum.values()[Integer
|
||||
.parseInt(header.get(HeaderMetadataType.COMMAND_BLOCK_TYPE))];
|
||||
this.type =
|
||||
HoodieCommandBlockTypeEnum.values()[Integer.parseInt(header.get(HeaderMetadataType.COMMAND_BLOCK_TYPE))];
|
||||
}
|
||||
|
||||
public HoodieCommandBlockTypeEnum getType() {
|
||||
@@ -61,18 +61,11 @@ public class HoodieCommandBlock extends HoodieLogBlock {
|
||||
return new byte[0];
|
||||
}
|
||||
|
||||
public static HoodieLogBlock getBlock(HoodieLogFile logFile,
|
||||
FSDataInputStream inputStream,
|
||||
Option<byte[]> content,
|
||||
boolean readBlockLazily,
|
||||
long position,
|
||||
long blockSize,
|
||||
long blockEndpos,
|
||||
Map<HeaderMetadataType, String> header,
|
||||
public static HoodieLogBlock getBlock(HoodieLogFile logFile, FSDataInputStream inputStream, Option<byte[]> content,
|
||||
boolean readBlockLazily, long position, long blockSize, long blockEndpos, Map<HeaderMetadataType, String> header,
|
||||
Map<HeaderMetadataType, String> footer) {
|
||||
|
||||
return new HoodieCommandBlock(content, inputStream, readBlockLazily,
|
||||
Option.of(new HoodieLogBlockContentLocation(logFile, position, blockSize, blockEndpos)),
|
||||
header, footer);
|
||||
Option.of(new HoodieLogBlockContentLocation(logFile, position, blockSize, blockEndpos)), header, footer);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,8 +19,8 @@
|
||||
package org.apache.hudi.common.table.log.block;
|
||||
|
||||
/**
|
||||
* A set of feature flags associated with a command log block format. Versions are changed when the
|
||||
* log block format changes. TODO(na) - Implement policies around major/minor versions
|
||||
* A set of feature flags associated with a command log block format. Versions are changed when the log block format
|
||||
* changes. TODO(na) - Implement policies around major/minor versions
|
||||
*/
|
||||
final class HoodieCommandBlockVersion extends HoodieLogBlockVersion {
|
||||
|
||||
|
||||
@@ -25,14 +25,14 @@ import org.apache.hudi.common.model.HoodieLogFile;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
|
||||
/**
|
||||
* Corrupt block is emitted whenever the scanner finds the length of the block written at the
|
||||
* beginning does not match (did not find a EOF or a sync marker after the length)
|
||||
* Corrupt block is emitted whenever the scanner finds the length of the block written at the beginning does not match
|
||||
* (did not find a EOF or a sync marker after the length)
|
||||
*/
|
||||
public class HoodieCorruptBlock extends HoodieLogBlock {
|
||||
|
||||
private HoodieCorruptBlock(Option<byte[]> corruptedBytes, FSDataInputStream inputStream,
|
||||
boolean readBlockLazily, Option<HoodieLogBlockContentLocation> blockContentLocation,
|
||||
Map<HeaderMetadataType, String> header, Map<HeaderMetadataType, String> footer) {
|
||||
private HoodieCorruptBlock(Option<byte[]> corruptedBytes, FSDataInputStream inputStream, boolean readBlockLazily,
|
||||
Option<HoodieLogBlockContentLocation> blockContentLocation, Map<HeaderMetadataType, String> header,
|
||||
Map<HeaderMetadataType, String> footer) {
|
||||
super(header, footer, blockContentLocation, corruptedBytes, inputStream, readBlockLazily);
|
||||
}
|
||||
|
||||
@@ -51,18 +51,11 @@ public class HoodieCorruptBlock extends HoodieLogBlock {
|
||||
return HoodieLogBlockType.CORRUPT_BLOCK;
|
||||
}
|
||||
|
||||
public static HoodieLogBlock getBlock(HoodieLogFile logFile,
|
||||
FSDataInputStream inputStream,
|
||||
Option<byte[]> corruptedBytes,
|
||||
boolean readBlockLazily,
|
||||
long position,
|
||||
long blockSize,
|
||||
long blockEndPos,
|
||||
Map<HeaderMetadataType, String> header,
|
||||
Map<HeaderMetadataType, String> footer) {
|
||||
public static HoodieLogBlock getBlock(HoodieLogFile logFile, FSDataInputStream inputStream,
|
||||
Option<byte[]> corruptedBytes, boolean readBlockLazily, long position, long blockSize, long blockEndPos,
|
||||
Map<HeaderMetadataType, String> header, Map<HeaderMetadataType, String> footer) {
|
||||
|
||||
return new HoodieCorruptBlock(corruptedBytes, inputStream, readBlockLazily,
|
||||
Option.of(new HoodieLogBlockContentLocation(logFile, position, blockSize, blockEndPos)),
|
||||
header, footer);
|
||||
Option.of(new HoodieLogBlockContentLocation(logFile, position, blockSize, blockEndPos)), header, footer);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -40,16 +40,15 @@ public class HoodieDeleteBlock extends HoodieLogBlock {
|
||||
|
||||
private HoodieKey[] keysToDelete;
|
||||
|
||||
public HoodieDeleteBlock(HoodieKey[] keysToDelete,
|
||||
Map<HeaderMetadataType, String> header) {
|
||||
public HoodieDeleteBlock(HoodieKey[] keysToDelete, Map<HeaderMetadataType, String> header) {
|
||||
this(Option.empty(), null, false, Option.empty(), header, new HashMap<>());
|
||||
this.keysToDelete = keysToDelete;
|
||||
}
|
||||
|
||||
|
||||
private HoodieDeleteBlock(Option<byte[]> content, FSDataInputStream inputStream,
|
||||
boolean readBlockLazily, Option<HoodieLogBlockContentLocation> blockContentLocation,
|
||||
Map<HeaderMetadataType, String> header, Map<HeaderMetadataType, String> footer) {
|
||||
private HoodieDeleteBlock(Option<byte[]> content, FSDataInputStream inputStream, boolean readBlockLazily,
|
||||
Option<HoodieLogBlockContentLocation> blockContentLocation, Map<HeaderMetadataType, String> header,
|
||||
Map<HeaderMetadataType, String> footer) {
|
||||
super(header, footer, blockContentLocation, content, inputStream, readBlockLazily);
|
||||
}
|
||||
|
||||
@@ -81,8 +80,7 @@ public class HoodieDeleteBlock extends HoodieLogBlock {
|
||||
inflate();
|
||||
}
|
||||
SizeAwareDataInputStream dis =
|
||||
new SizeAwareDataInputStream(
|
||||
new DataInputStream(new ByteArrayInputStream(getContent().get())));
|
||||
new SizeAwareDataInputStream(new DataInputStream(new ByteArrayInputStream(getContent().get())));
|
||||
int version = dis.readInt();
|
||||
int dataLength = dis.readInt();
|
||||
byte[] data = new byte[dataLength];
|
||||
@@ -101,18 +99,11 @@ public class HoodieDeleteBlock extends HoodieLogBlock {
|
||||
return HoodieLogBlockType.DELETE_BLOCK;
|
||||
}
|
||||
|
||||
public static HoodieLogBlock getBlock(HoodieLogFile logFile,
|
||||
FSDataInputStream inputStream,
|
||||
Option<byte[]> content,
|
||||
boolean readBlockLazily,
|
||||
long position,
|
||||
long blockSize,
|
||||
long blockEndPos,
|
||||
Map<HeaderMetadataType, String> header,
|
||||
public static HoodieLogBlock getBlock(HoodieLogFile logFile, FSDataInputStream inputStream, Option<byte[]> content,
|
||||
boolean readBlockLazily, long position, long blockSize, long blockEndPos, Map<HeaderMetadataType, String> header,
|
||||
Map<HeaderMetadataType, String> footer) throws IOException {
|
||||
|
||||
return new HoodieDeleteBlock(content, inputStream, readBlockLazily,
|
||||
Option.of(new HoodieLogBlockContentLocation(logFile, position, blockSize, blockEndPos)),
|
||||
header, footer);
|
||||
Option.of(new HoodieLogBlockContentLocation(logFile, position, blockSize, blockEndPos)), header, footer);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,8 +19,8 @@
|
||||
package org.apache.hudi.common.table.log.block;
|
||||
|
||||
/**
|
||||
* A set of feature flags associated with a delete log block format. Versions are changed when the
|
||||
* log block format changes. TODO(na) - Implement policies around major/minor versions
|
||||
* A set of feature flags associated with a delete log block format. Versions are changed when the log block format
|
||||
* changes. TODO(na) - Implement policies around major/minor versions
|
||||
*/
|
||||
final class HoodieDeleteBlockVersion extends HoodieLogBlockVersion {
|
||||
|
||||
|
||||
@@ -40,10 +40,9 @@ import org.apache.hudi.exception.HoodieIOException;
|
||||
public abstract class HoodieLogBlock {
|
||||
|
||||
/**
|
||||
* The current version of the log block. Anytime the logBlock format changes this version needs to
|
||||
* be bumped and corresponding changes need to be made to {@link HoodieLogBlockVersion} TODO :
|
||||
* Change this to a class, something like HoodieLogBlockVersionV1/V2 and implement/override
|
||||
* operations there
|
||||
* The current version of the log block. Anytime the logBlock format changes this version needs to be bumped and
|
||||
* corresponding changes need to be made to {@link HoodieLogBlockVersion} TODO : Change this to a class, something
|
||||
* like HoodieLogBlockVersionV1/V2 and implement/override operations there
|
||||
*/
|
||||
public static int version = 1;
|
||||
// Header for each log block
|
||||
@@ -63,10 +62,8 @@ public abstract class HoodieLogBlock {
|
||||
|
||||
public HoodieLogBlock(@Nonnull Map<HeaderMetadataType, String> logBlockHeader,
|
||||
@Nonnull Map<HeaderMetadataType, String> logBlockFooter,
|
||||
@Nonnull Option<HoodieLogBlockContentLocation> blockContentLocation,
|
||||
@Nonnull Option<byte[]> content,
|
||||
FSDataInputStream inputStream,
|
||||
boolean readBlockLazily) {
|
||||
@Nonnull Option<HoodieLogBlockContentLocation> blockContentLocation, @Nonnull Option<byte[]> content,
|
||||
FSDataInputStream inputStream, boolean readBlockLazily) {
|
||||
this.logBlockHeader = logBlockHeader;
|
||||
this.logBlockFooter = logBlockFooter;
|
||||
this.blockContentLocation = blockContentLocation;
|
||||
@@ -109,38 +106,30 @@ public abstract class HoodieLogBlock {
|
||||
}
|
||||
|
||||
/**
|
||||
* Type of the log block WARNING: This enum is serialized as the ordinal. Only add new enums at
|
||||
* the end.
|
||||
* Type of the log block WARNING: This enum is serialized as the ordinal. Only add new enums at the end.
|
||||
*/
|
||||
public enum HoodieLogBlockType {
|
||||
COMMAND_BLOCK,
|
||||
DELETE_BLOCK,
|
||||
CORRUPT_BLOCK,
|
||||
AVRO_DATA_BLOCK
|
||||
COMMAND_BLOCK, DELETE_BLOCK, CORRUPT_BLOCK, AVRO_DATA_BLOCK
|
||||
}
|
||||
|
||||
/**
|
||||
* Log Metadata headers abstraction for a HoodieLogBlock WARNING : This enum is serialized as the
|
||||
* ordinal. Only add new enums at the end.
|
||||
* Log Metadata headers abstraction for a HoodieLogBlock WARNING : This enum is serialized as the ordinal. Only add
|
||||
* new enums at the end.
|
||||
*/
|
||||
public enum HeaderMetadataType {
|
||||
INSTANT_TIME,
|
||||
TARGET_INSTANT_TIME,
|
||||
SCHEMA,
|
||||
COMMAND_BLOCK_TYPE
|
||||
INSTANT_TIME, TARGET_INSTANT_TIME, SCHEMA, COMMAND_BLOCK_TYPE
|
||||
}
|
||||
|
||||
/**
|
||||
* Log Metadata footers abstraction for a HoodieLogBlock WARNING : This enum is serialized as the
|
||||
* ordinal. Only add new enums at the end.
|
||||
* Log Metadata footers abstraction for a HoodieLogBlock WARNING : This enum is serialized as the ordinal. Only add
|
||||
* new enums at the end.
|
||||
*/
|
||||
public enum FooterMetadataType {
|
||||
}
|
||||
|
||||
/**
|
||||
* This class is used to store the Location of the Content of a Log Block. It's used when a client
|
||||
* chooses for a IO intensive CompactedScanner, the location helps to lazily read contents from
|
||||
* the log file
|
||||
* This class is used to store the Location of the Content of a Log Block. It's used when a client chooses for a IO
|
||||
* intensive CompactedScanner, the location helps to lazily read contents from the log file
|
||||
*/
|
||||
public static final class HoodieLogBlockContentLocation {
|
||||
|
||||
@@ -153,8 +142,8 @@ public abstract class HoodieLogBlock {
|
||||
// The final position where the complete block ends
|
||||
private final long blockEndPos;
|
||||
|
||||
HoodieLogBlockContentLocation(HoodieLogFile logFile, long contentPositionInLogFile,
|
||||
long blockSize, long blockEndPos) {
|
||||
HoodieLogBlockContentLocation(HoodieLogFile logFile, long contentPositionInLogFile, long blockSize,
|
||||
long blockEndPos) {
|
||||
this.logFile = logFile;
|
||||
this.contentPositionInLogFile = contentPositionInLogFile;
|
||||
this.blockSize = blockSize;
|
||||
@@ -179,11 +168,9 @@ public abstract class HoodieLogBlock {
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert log metadata to bytes 1. Write size of metadata 2. Write enum ordinal 3. Write actual
|
||||
* bytes
|
||||
* Convert log metadata to bytes 1. Write size of metadata 2. Write enum ordinal 3. Write actual bytes
|
||||
*/
|
||||
public static byte[] getLogMetadataBytes(Map<HeaderMetadataType, String> metadata)
|
||||
throws IOException {
|
||||
public static byte[] getLogMetadataBytes(Map<HeaderMetadataType, String> metadata) throws IOException {
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
DataOutputStream output = new DataOutputStream(baos);
|
||||
output.writeInt(metadata.size());
|
||||
@@ -197,11 +184,9 @@ public abstract class HoodieLogBlock {
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert bytes to LogMetadata, follow the same order as
|
||||
* {@link HoodieLogBlock#getLogMetadataBytes}
|
||||
* Convert bytes to LogMetadata, follow the same order as {@link HoodieLogBlock#getLogMetadataBytes}
|
||||
*/
|
||||
public static Map<HeaderMetadataType, String> getLogMetadata(DataInputStream dis)
|
||||
throws IOException {
|
||||
public static Map<HeaderMetadataType, String> getLogMetadata(DataInputStream dis) throws IOException {
|
||||
|
||||
Map<HeaderMetadataType, String> metadata = Maps.newHashMap();
|
||||
// 1. Read the metadata written out
|
||||
@@ -225,8 +210,8 @@ public abstract class HoodieLogBlock {
|
||||
* Read or Skip block content of a log block in the log file. Depends on lazy reading enabled in
|
||||
* {@link HoodieMergedLogRecordScanner}
|
||||
*/
|
||||
public static byte[] readOrSkipContent(FSDataInputStream inputStream,
|
||||
Integer contentLength, boolean readBlockLazily) throws IOException {
|
||||
public static byte[] readOrSkipContent(FSDataInputStream inputStream, Integer contentLength, boolean readBlockLazily)
|
||||
throws IOException {
|
||||
byte[] content = null;
|
||||
if (!readBlockLazily) {
|
||||
// Read the contents in memory
|
||||
@@ -261,9 +246,8 @@ public abstract class HoodieLogBlock {
|
||||
}
|
||||
|
||||
/**
|
||||
* After the content bytes is converted into the required DataStructure by a logBlock, deflate the
|
||||
* content to release byte [] and relieve memory pressure when GC kicks in. NOTE: This still
|
||||
* leaves the heap fragmented
|
||||
* After the content bytes is converted into the required DataStructure by a logBlock, deflate the content to release
|
||||
* byte [] and relieve memory pressure when GC kicks in. NOTE: This still leaves the heap fragmented
|
||||
*/
|
||||
protected void deflate() {
|
||||
content = Option.empty();
|
||||
@@ -271,8 +255,9 @@ public abstract class HoodieLogBlock {
|
||||
|
||||
/**
|
||||
* Handles difference in seek behavior for GCS and non-GCS input stream
|
||||
*
|
||||
* @param inputStream Input Stream
|
||||
* @param pos Position to seek
|
||||
* @param pos Position to seek
|
||||
* @throws IOException
|
||||
*/
|
||||
private static void safeSeek(FSDataInputStream inputStream, long pos) throws IOException {
|
||||
|
||||
@@ -43,22 +43,25 @@ import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
/**
|
||||
* Represents the Active Timeline for the HoodieDataset. Instants for the last 12 hours
|
||||
* (configurable) is in the ActiveTimeline and the rest are Archived. ActiveTimeline is a special
|
||||
* timeline that allows for creation of instants on the timeline. <p></p> The timeline is not
|
||||
* automatically reloaded on any mutation operation, clients have to manually call reload() so that
|
||||
* they can chain multiple mutations to the timeline and then call reload() once. <p></p> This class
|
||||
* can be serialized and de-serialized and on de-serialization the FileSystem is re-initialized.
|
||||
* Represents the Active Timeline for the HoodieDataset. Instants for the last 12 hours (configurable) is in the
|
||||
* ActiveTimeline and the rest are Archived. ActiveTimeline is a special timeline that allows for creation of instants
|
||||
* on the timeline.
|
||||
* <p>
|
||||
* </p>
|
||||
* The timeline is not automatically reloaded on any mutation operation, clients have to manually call reload() so that
|
||||
* they can chain multiple mutations to the timeline and then call reload() once.
|
||||
* <p>
|
||||
* </p>
|
||||
* This class can be serialized and de-serialized and on de-serialization the FileSystem is re-initialized.
|
||||
*/
|
||||
public class HoodieActiveTimeline extends HoodieDefaultTimeline {
|
||||
|
||||
public static final SimpleDateFormat COMMIT_FORMATTER = new SimpleDateFormat("yyyyMMddHHmmss");
|
||||
|
||||
public static final Set<String> VALID_EXTENSIONS_IN_ACTIVE_TIMELINE = new HashSet<>(Arrays.asList(
|
||||
new String[]{COMMIT_EXTENSION, INFLIGHT_COMMIT_EXTENSION, DELTA_COMMIT_EXTENSION,
|
||||
INFLIGHT_DELTA_COMMIT_EXTENSION, SAVEPOINT_EXTENSION, INFLIGHT_SAVEPOINT_EXTENSION,
|
||||
CLEAN_EXTENSION, INFLIGHT_CLEAN_EXTENSION, INFLIGHT_COMPACTION_EXTENSION, REQUESTED_COMPACTION_EXTENSION,
|
||||
INFLIGHT_RESTORE_EXTENSION, RESTORE_EXTENSION}));
|
||||
public static final Set<String> VALID_EXTENSIONS_IN_ACTIVE_TIMELINE = new HashSet<>(Arrays.asList(new String[] {
|
||||
COMMIT_EXTENSION, INFLIGHT_COMMIT_EXTENSION, DELTA_COMMIT_EXTENSION, INFLIGHT_DELTA_COMMIT_EXTENSION,
|
||||
SAVEPOINT_EXTENSION, INFLIGHT_SAVEPOINT_EXTENSION, CLEAN_EXTENSION, INFLIGHT_CLEAN_EXTENSION,
|
||||
INFLIGHT_COMPACTION_EXTENSION, REQUESTED_COMPACTION_EXTENSION, INFLIGHT_RESTORE_EXTENSION, RESTORE_EXTENSION}));
|
||||
|
||||
private static final transient Logger log = LogManager.getLogger(HoodieActiveTimeline.class);
|
||||
protected HoodieTableMetaClient metaClient;
|
||||
@@ -83,14 +86,11 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
|
||||
this.metaClient = metaClient;
|
||||
// multiple casts will make this lambda serializable -
|
||||
// http://docs.oracle.com/javase/specs/jls/se8/html/jls-15.html#jls-15.16
|
||||
this.details =
|
||||
(Function<HoodieInstant, Option<byte[]>> & Serializable) this::getInstantDetails;
|
||||
this.details = (Function<HoodieInstant, Option<byte[]>> & Serializable) this::getInstantDetails;
|
||||
}
|
||||
|
||||
public HoodieActiveTimeline(HoodieTableMetaClient metaClient) {
|
||||
this(metaClient,
|
||||
new ImmutableSet.Builder<String>()
|
||||
.addAll(VALID_EXTENSIONS_IN_ACTIVE_TIMELINE).build());
|
||||
this(metaClient, new ImmutableSet.Builder<String>().addAll(VALID_EXTENSIONS_IN_ACTIVE_TIMELINE).build());
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -98,16 +98,14 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
|
||||
*
|
||||
* @deprecated
|
||||
*/
|
||||
public HoodieActiveTimeline() {
|
||||
}
|
||||
public HoodieActiveTimeline() {}
|
||||
|
||||
/**
|
||||
* This method is only used when this object is deserialized in a spark executor.
|
||||
*
|
||||
* @deprecated
|
||||
*/
|
||||
private void readObject(java.io.ObjectInputStream in)
|
||||
throws IOException, ClassNotFoundException {
|
||||
private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException {
|
||||
in.defaultReadObject();
|
||||
}
|
||||
|
||||
@@ -116,29 +114,25 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
|
||||
*
|
||||
*/
|
||||
public HoodieTimeline getCommitsTimeline() {
|
||||
return getTimelineOfActions(
|
||||
Sets.newHashSet(COMMIT_ACTION, DELTA_COMMIT_ACTION));
|
||||
return getTimelineOfActions(Sets.newHashSet(COMMIT_ACTION, DELTA_COMMIT_ACTION));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all instants (commits, delta commits, in-flight/request compaction) that produce new data, in the active
|
||||
* timeline *
|
||||
* With Async compaction a requested/inflight compaction-instant is a valid baseInstant for a file-slice as there
|
||||
* could be delta-commits with that baseInstant.
|
||||
* timeline * With Async compaction a requested/inflight compaction-instant is a valid baseInstant for a file-slice as
|
||||
* there could be delta-commits with that baseInstant.
|
||||
*/
|
||||
public HoodieTimeline getCommitsAndCompactionTimeline() {
|
||||
return getTimelineOfActions(
|
||||
Sets.newHashSet(COMMIT_ACTION, DELTA_COMMIT_ACTION, COMPACTION_ACTION));
|
||||
return getTimelineOfActions(Sets.newHashSet(COMMIT_ACTION, DELTA_COMMIT_ACTION, COMPACTION_ACTION));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all instants (commits, delta commits, clean, savepoint, rollback) that result in actions,
|
||||
* in the active timeline *
|
||||
* Get all instants (commits, delta commits, clean, savepoint, rollback) that result in actions, in the active
|
||||
* timeline *
|
||||
*/
|
||||
public HoodieTimeline getAllCommitsTimeline() {
|
||||
return getTimelineOfActions(
|
||||
Sets.newHashSet(COMMIT_ACTION, DELTA_COMMIT_ACTION, CLEAN_ACTION, COMPACTION_ACTION,
|
||||
SAVEPOINT_ACTION, ROLLBACK_ACTION));
|
||||
return getTimelineOfActions(Sets.newHashSet(COMMIT_ACTION, DELTA_COMMIT_ACTION, CLEAN_ACTION, COMPACTION_ACTION,
|
||||
SAVEPOINT_ACTION, ROLLBACK_ACTION));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -157,8 +151,7 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a timeline of a specific set of actions. useful to create a merged timeline of multiple
|
||||
* actions
|
||||
* Get a timeline of a specific set of actions. useful to create a merged timeline of multiple actions
|
||||
*
|
||||
* @param actions actions allowed in the timeline
|
||||
*/
|
||||
@@ -246,8 +239,7 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
|
||||
throw new HoodieIOException("Could not delete in-flight instant " + instant);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException(
|
||||
"Could not remove inflight commit " + inFlightCommitFilePath, e);
|
||||
throw new HoodieIOException("Could not remove inflight commit " + inFlightCommitFilePath, e);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -299,7 +291,7 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
|
||||
* Transition Compaction State from inflight to Committed
|
||||
*
|
||||
* @param inflightInstant Inflight instant
|
||||
* @param data Extra Metadata
|
||||
* @param data Extra Metadata
|
||||
* @return commit instant
|
||||
*/
|
||||
public HoodieInstant transitionCompactionInflightToComplete(HoodieInstant inflightInstant, Option<byte[]> data) {
|
||||
@@ -319,8 +311,7 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
|
||||
* END - COMPACTION RELATED META-DATA MANAGEMENT
|
||||
**/
|
||||
|
||||
private void transitionState(HoodieInstant fromInstant, HoodieInstant toInstant,
|
||||
Option<byte[]> data) {
|
||||
private void transitionState(HoodieInstant fromInstant, HoodieInstant toInstant, Option<byte[]> data) {
|
||||
Preconditions.checkArgument(fromInstant.getTimestamp().equals(toInstant.getTimestamp()));
|
||||
Path commitFilePath = new Path(metaClient.getMetaPath(), toInstant.getFileName());
|
||||
try {
|
||||
@@ -329,8 +320,7 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
|
||||
createFileInMetaPath(fromInstant.getFileName(), data);
|
||||
boolean success = metaClient.getFs().rename(inflightCommitFile, commitFilePath);
|
||||
if (!success) {
|
||||
throw new HoodieIOException(
|
||||
"Could not rename " + inflightCommitFile + " to " + commitFilePath);
|
||||
throw new HoodieIOException("Could not rename " + inflightCommitFile + " to " + commitFilePath);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Could not complete " + fromInstant, e);
|
||||
@@ -345,8 +335,7 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
|
||||
Path commitFilePath = new Path(metaClient.getMetaPath(), completed.getFileName());
|
||||
boolean success = metaClient.getFs().rename(commitFilePath, inFlightCommitFilePath);
|
||||
if (!success) {
|
||||
throw new HoodieIOException(
|
||||
"Could not rename " + commitFilePath + " to " + inFlightCommitFilePath);
|
||||
throw new HoodieIOException("Could not rename " + commitFilePath + " to " + inFlightCommitFilePath);
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
|
||||
@@ -36,11 +36,15 @@ import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
/**
|
||||
* Represents the Archived Timeline for the HoodieDataset. Instants for the last 12 hours
|
||||
* (configurable) is in the ActiveTimeline and the rest are in ArchivedTimeline. <p></p> Instants
|
||||
* are read from the archive file during initialization and never refreshed. To refresh, clients
|
||||
* need to call reload() <p></p> This class can be serialized and de-serialized and on
|
||||
* de-serialization the FileSystem is re-initialized.
|
||||
* Represents the Archived Timeline for the HoodieDataset. Instants for the last 12 hours (configurable) is in the
|
||||
* ActiveTimeline and the rest are in ArchivedTimeline.
|
||||
* <p>
|
||||
* </p>
|
||||
* Instants are read from the archive file during initialization and never refreshed. To refresh, clients need to call
|
||||
* reload()
|
||||
* <p>
|
||||
* </p>
|
||||
* This class can be serialized and de-serialized and on de-serialization the FileSystem is re-initialized.
|
||||
*/
|
||||
public class HoodieArchivedTimeline extends HoodieDefaultTimeline {
|
||||
|
||||
@@ -54,8 +58,7 @@ public class HoodieArchivedTimeline extends HoodieDefaultTimeline {
|
||||
// Read back the commits to make sure
|
||||
Path archiveLogPath = HoodieArchivedTimeline.getArchiveLogPath(metaClient.getArchivePath());
|
||||
try (SequenceFile.Reader reader =
|
||||
new SequenceFile.Reader(metaClient.getHadoopConf(),
|
||||
SequenceFile.Reader.file(archiveLogPath))) {
|
||||
new SequenceFile.Reader(metaClient.getHadoopConf(), SequenceFile.Reader.file(archiveLogPath))) {
|
||||
Text key = new Text();
|
||||
Text val = new Text();
|
||||
while (reader.next(key, val)) {
|
||||
@@ -63,17 +66,14 @@ public class HoodieArchivedTimeline extends HoodieDefaultTimeline {
|
||||
// This is okay because only tooling will load the archived commit timeline today
|
||||
readCommits.put(key.toString(), Arrays.copyOf(val.getBytes(), val.getLength()));
|
||||
}
|
||||
this.setInstants(readCommits.keySet().stream().map(
|
||||
s -> new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, s)).collect(
|
||||
Collectors.toList()));
|
||||
this.setInstants(readCommits.keySet().stream().map(s -> new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, s))
|
||||
.collect(Collectors.toList()));
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException(
|
||||
"Could not load archived commit timeline from path " + archiveLogPath, e);
|
||||
throw new HoodieIOException("Could not load archived commit timeline from path " + archiveLogPath, e);
|
||||
}
|
||||
// multiple casts will make this lambda serializable -
|
||||
// http://docs.oracle.com/javase/specs/jls/se8/html/jls-15.html#jls-15.16
|
||||
this.details =
|
||||
(Function<HoodieInstant, Option<byte[]>> & Serializable) this::getInstantDetails;
|
||||
this.details = (Function<HoodieInstant, Option<byte[]>> & Serializable) this::getInstantDetails;
|
||||
this.metaClient = metaClient;
|
||||
}
|
||||
|
||||
@@ -82,16 +82,14 @@ public class HoodieArchivedTimeline extends HoodieDefaultTimeline {
|
||||
*
|
||||
* @deprecated
|
||||
*/
|
||||
public HoodieArchivedTimeline() {
|
||||
}
|
||||
public HoodieArchivedTimeline() {}
|
||||
|
||||
/**
|
||||
* This method is only used when this object is deserialized in a spark executor.
|
||||
*
|
||||
* @deprecated
|
||||
*/
|
||||
private void readObject(java.io.ObjectInputStream in)
|
||||
throws IOException, ClassNotFoundException {
|
||||
private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException {
|
||||
in.defaultReadObject();
|
||||
}
|
||||
|
||||
|
||||
@@ -37,9 +37,8 @@ import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
/**
|
||||
* HoodieDefaultTimeline is a default implementation of the HoodieTimeline. It provides methods to
|
||||
* inspect a List[HoodieInstant]. Function to get the details of the instant is passed in as a
|
||||
* lamdba.
|
||||
* HoodieDefaultTimeline is a default implementation of the HoodieTimeline. It provides methods to inspect a
|
||||
* List[HoodieInstant]. Function to get the details of the instant is passed in as a lamdba.
|
||||
*
|
||||
* @see HoodieTimeline
|
||||
*/
|
||||
@@ -53,8 +52,7 @@ public class HoodieDefaultTimeline implements HoodieTimeline {
|
||||
private List<HoodieInstant> instants;
|
||||
private String timelineHash;
|
||||
|
||||
public HoodieDefaultTimeline(Stream<HoodieInstant> instants,
|
||||
Function<HoodieInstant, Option<byte[]>> details) {
|
||||
public HoodieDefaultTimeline(Stream<HoodieInstant> instants, Function<HoodieInstant, Option<byte[]>> details) {
|
||||
this.details = details;
|
||||
setInstants(instants.collect(Collectors.toList()));
|
||||
}
|
||||
@@ -64,8 +62,8 @@ public class HoodieDefaultTimeline implements HoodieTimeline {
|
||||
final MessageDigest md;
|
||||
try {
|
||||
md = MessageDigest.getInstance(HASHING_ALGORITHM);
|
||||
this.instants.stream().forEach(i -> md.update(
|
||||
StringUtils.joinUsingDelim("_", i.getTimestamp(), i.getAction(), i.getState().name()).getBytes()));
|
||||
this.instants.stream().forEach(i -> md
|
||||
.update(StringUtils.joinUsingDelim("_", i.getTimestamp(), i.getAction(), i.getState().name()).getBytes()));
|
||||
} catch (NoSuchAlgorithmException nse) {
|
||||
throw new HoodieException(nse);
|
||||
}
|
||||
@@ -78,13 +76,11 @@ public class HoodieDefaultTimeline implements HoodieTimeline {
|
||||
*
|
||||
* @deprecated
|
||||
*/
|
||||
public HoodieDefaultTimeline() {
|
||||
}
|
||||
public HoodieDefaultTimeline() {}
|
||||
|
||||
@Override
|
||||
public HoodieTimeline filterInflights() {
|
||||
return new HoodieDefaultTimeline(instants.stream().filter(HoodieInstant::isInflight),
|
||||
details);
|
||||
return new HoodieDefaultTimeline(instants.stream().filter(HoodieInstant::isInflight), details);
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -115,24 +111,22 @@ public class HoodieDefaultTimeline implements HoodieTimeline {
|
||||
@Override
|
||||
public HoodieTimeline filterPendingCompactionTimeline() {
|
||||
return new HoodieDefaultTimeline(
|
||||
instants.stream().filter(s -> s.getAction().equals(HoodieTimeline.COMPACTION_ACTION)),
|
||||
details);
|
||||
instants.stream().filter(s -> s.getAction().equals(HoodieTimeline.COMPACTION_ACTION)), details);
|
||||
}
|
||||
|
||||
@Override
|
||||
public HoodieDefaultTimeline findInstantsInRange(String startTs, String endTs) {
|
||||
return new HoodieDefaultTimeline(instants.stream().filter(
|
||||
s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), startTs, GREATER)
|
||||
&& HoodieTimeline.compareTimestamps(
|
||||
s.getTimestamp(), endTs, LESSER_OR_EQUAL)), details);
|
||||
return new HoodieDefaultTimeline(
|
||||
instants.stream().filter(s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), startTs, GREATER)
|
||||
&& HoodieTimeline.compareTimestamps(s.getTimestamp(), endTs, LESSER_OR_EQUAL)),
|
||||
details);
|
||||
}
|
||||
|
||||
@Override
|
||||
public HoodieDefaultTimeline findInstantsAfter(String commitTime, int numCommits) {
|
||||
return new HoodieDefaultTimeline(
|
||||
instants.stream()
|
||||
.filter(s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), commitTime, GREATER))
|
||||
.limit(numCommits), details);
|
||||
return new HoodieDefaultTimeline(instants.stream()
|
||||
.filter(s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), commitTime, GREATER)).limit(numCommits),
|
||||
details);
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -183,8 +177,7 @@ public class HoodieDefaultTimeline implements HoodieTimeline {
|
||||
|
||||
@Override
|
||||
public boolean containsOrBeforeTimelineStarts(String instant) {
|
||||
return instants.stream().anyMatch(s -> s.getTimestamp().equals(instant))
|
||||
|| isBeforeTimelineStarts(instant);
|
||||
return instants.stream().anyMatch(s -> s.getTimestamp().equals(instant)) || isBeforeTimelineStarts(instant);
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -218,8 +211,7 @@ public class HoodieDefaultTimeline implements HoodieTimeline {
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return this.getClass().getName() + ": " + instants.stream().map(Object::toString)
|
||||
.collect(Collectors.joining(","));
|
||||
return this.getClass().getName() + ": " + instants.stream().map(Object::toString).collect(Collectors.joining(","));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -25,8 +25,8 @@ import org.apache.hudi.common.table.HoodieTimeline;
|
||||
import org.apache.hudi.common.util.FSUtils;
|
||||
|
||||
/**
|
||||
* A Hoodie Instant represents a action done on a hoodie dataset. All actions start with a inflight
|
||||
* instant and then create a completed instant after done.
|
||||
* A Hoodie Instant represents a action done on a hoodie dataset. All actions start with a inflight instant and then
|
||||
* create a completed instant after done.
|
||||
*
|
||||
* @see HoodieTimeline
|
||||
*/
|
||||
@@ -76,7 +76,7 @@ public class HoodieInstant implements Serializable {
|
||||
}
|
||||
|
||||
public HoodieInstant(boolean isInflight, String action, String timestamp) {
|
||||
//TODO: vb - Preserving for avoiding cascading changes. This constructor will be updated in subsequent PR
|
||||
// TODO: vb - Preserving for avoiding cascading changes. This constructor will be updated in subsequent PR
|
||||
this.state = isInflight ? State.INFLIGHT : State.COMPLETED;
|
||||
this.action = action;
|
||||
this.timestamp = timestamp;
|
||||
@@ -151,9 +151,7 @@ public class HoodieInstant implements Serializable {
|
||||
return false;
|
||||
}
|
||||
HoodieInstant that = (HoodieInstant) o;
|
||||
return state == that.state
|
||||
&& Objects.equals(action, that.action)
|
||||
&& Objects.equals(timestamp, that.timestamp);
|
||||
return state == that.state && Objects.equals(action, that.action) && Objects.equals(timestamp, that.timestamp);
|
||||
}
|
||||
|
||||
public State getState() {
|
||||
|
||||
@@ -55,8 +55,7 @@ public class CompactionOpDTO {
|
||||
@JsonProperty("metrics")
|
||||
private Map<String, Double> metrics;
|
||||
|
||||
public static CompactionOpDTO fromCompactionOperation(String compactionInstantTime,
|
||||
CompactionOperation op) {
|
||||
public static CompactionOpDTO fromCompactionOperation(String compactionInstantTime, CompactionOperation op) {
|
||||
CompactionOpDTO dto = new CompactionOpDTO();
|
||||
dto.fileId = op.getFileId();
|
||||
dto.compactionInstantTime = compactionInstantTime;
|
||||
@@ -70,8 +69,9 @@ public class CompactionOpDTO {
|
||||
}
|
||||
|
||||
public static Pair<String, CompactionOperation> toCompactionOperation(CompactionOpDTO dto) {
|
||||
return Pair.of(dto.compactionInstantTime, new CompactionOperation(dto.fileId, dto.partitionPath,
|
||||
dto.baseInstantTime, Option.ofNullable(dto.dataFileCommitTime), dto.deltaFilePaths,
|
||||
Option.ofNullable(dto.dataFilePath), dto.metrics));
|
||||
return Pair.of(dto.compactionInstantTime,
|
||||
new CompactionOperation(dto.fileId, dto.partitionPath, dto.baseInstantTime,
|
||||
Option.ofNullable(dto.dataFileCommitTime), dto.deltaFilePaths, Option.ofNullable(dto.dataFilePath),
|
||||
dto.metrics));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -50,8 +50,8 @@ public class FileGroupDTO {
|
||||
}
|
||||
|
||||
public static HoodieFileGroup toFileGroup(FileGroupDTO dto, HoodieTableMetaClient metaClient) {
|
||||
HoodieFileGroup fileGroup = new HoodieFileGroup(dto.partition, dto.id,
|
||||
TimelineDTO.toTimeline(dto.timeline, metaClient));
|
||||
HoodieFileGroup fileGroup =
|
||||
new HoodieFileGroup(dto.partition, dto.id, TimelineDTO.toTimeline(dto.timeline, metaClient));
|
||||
dto.slices.stream().map(FileSliceDTO::toFileSlice).forEach(fileSlice -> fileGroup.addFileSlice(fileSlice));
|
||||
return fileGroup;
|
||||
}
|
||||
|
||||
@@ -39,7 +39,7 @@ public class TimelineDTO {
|
||||
}
|
||||
|
||||
public static HoodieTimeline toTimeline(TimelineDTO dto, HoodieTableMetaClient metaClient) {
|
||||
//TODO: For Now, we will assume, only active-timeline will be transferred.
|
||||
// TODO: For Now, we will assume, only active-timeline will be transferred.
|
||||
return new HoodieDefaultTimeline(dto.instants.stream().map(InstantDTO::toInstant),
|
||||
metaClient.getActiveTimeline()::getInstantDetails);
|
||||
}
|
||||
|
||||
@@ -56,13 +56,11 @@ import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
/**
|
||||
* Common thread-safe implementation for multiple TableFileSystemView Implementations.
|
||||
* Provides uniform handling of
|
||||
* (a) Loading file-system views from underlying file-system
|
||||
* (b) Pending compaction operations and changing file-system views based on that
|
||||
* (c) Thread-safety in loading and managing file system views for this dataset.
|
||||
* (d) resetting file-system views
|
||||
* The actual mechanism of fetching file slices from different view storages is delegated to sub-classes.
|
||||
* Common thread-safe implementation for multiple TableFileSystemView Implementations. Provides uniform handling of (a)
|
||||
* Loading file-system views from underlying file-system (b) Pending compaction operations and changing file-system
|
||||
* views based on that (c) Thread-safety in loading and managing file system views for this dataset. (d) resetting
|
||||
* file-system views The actual mechanism of fetching file slices from different view storages is delegated to
|
||||
* sub-classes.
|
||||
*/
|
||||
public abstract class AbstractTableFileSystemView implements SyncableFileSystemView, Serializable {
|
||||
|
||||
@@ -94,15 +92,14 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
refreshTimeline(visibleActiveTimeline);
|
||||
|
||||
// Load Pending Compaction Operations
|
||||
resetPendingCompactionOperations(
|
||||
CompactionUtils.getAllPendingCompactionOperations(metaClient).values()
|
||||
.stream().map(e -> Pair.of(e.getKey(),
|
||||
CompactionOperation.convertFromAvroRecordInstance(e.getValue()))));
|
||||
resetPendingCompactionOperations(CompactionUtils.getAllPendingCompactionOperations(metaClient).values().stream()
|
||||
.map(e -> Pair.of(e.getKey(), CompactionOperation.convertFromAvroRecordInstance(e.getValue()))));
|
||||
}
|
||||
|
||||
/**
|
||||
* Refresh commits timeline
|
||||
* @param visibleActiveTimeline Visible Active Timeline
|
||||
*
|
||||
* @param visibleActiveTimeline Visible Active Timeline
|
||||
*/
|
||||
protected void refreshTimeline(HoodieTimeline visibleActiveTimeline) {
|
||||
this.visibleCommitsAndCompactionTimeline = visibleActiveTimeline.getCommitsAndCompactionTimeline();
|
||||
@@ -117,13 +114,12 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
long fgBuildTimeTakenMs = timer.endTimer();
|
||||
timer.startTimer();
|
||||
// Group by partition for efficient updates for both InMemory and DiskBased stuctures.
|
||||
fileGroups.stream().collect(Collectors.groupingBy(HoodieFileGroup::getPartitionPath)).entrySet()
|
||||
.forEach(entry -> {
|
||||
String partition = entry.getKey();
|
||||
if (!isPartitionAvailableInStore(partition)) {
|
||||
storePartitionView(partition, entry.getValue());
|
||||
}
|
||||
});
|
||||
fileGroups.stream().collect(Collectors.groupingBy(HoodieFileGroup::getPartitionPath)).entrySet().forEach(entry -> {
|
||||
String partition = entry.getKey();
|
||||
if (!isPartitionAvailableInStore(partition)) {
|
||||
storePartitionView(partition, entry.getValue());
|
||||
}
|
||||
});
|
||||
long storePartitionsTs = timer.endTimer();
|
||||
log.info("addFilesToView: NumFiles=" + statuses.length + ", FileGroupsCreationTime=" + fgBuildTimeTakenMs
|
||||
+ ", StoreTimeTaken=" + storePartitionsTs);
|
||||
@@ -141,19 +137,17 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
|
||||
protected List<HoodieFileGroup> buildFileGroups(Stream<HoodieDataFile> dataFileStream,
|
||||
Stream<HoodieLogFile> logFileStream, HoodieTimeline timeline, boolean addPendingCompactionFileSlice) {
|
||||
Map<Pair<String, String>, List<HoodieDataFile>> dataFiles = dataFileStream
|
||||
.collect(Collectors.groupingBy((dataFile) -> {
|
||||
Map<Pair<String, String>, List<HoodieDataFile>> dataFiles =
|
||||
dataFileStream.collect(Collectors.groupingBy((dataFile) -> {
|
||||
String partitionPathStr = getPartitionPathFromFilePath(dataFile.getPath());
|
||||
return Pair.of(partitionPathStr, dataFile.getFileId());
|
||||
}));
|
||||
|
||||
Map<Pair<String, String>, List<HoodieLogFile>> logFiles = logFileStream
|
||||
.collect(Collectors.groupingBy((logFile) -> {
|
||||
String partitionPathStr = FSUtils.getRelativePartitionPath(
|
||||
new Path(metaClient.getBasePath()),
|
||||
logFile.getPath().getParent());
|
||||
return Pair.of(partitionPathStr, logFile.getFileId());
|
||||
}));
|
||||
Map<Pair<String, String>, List<HoodieLogFile>> logFiles = logFileStream.collect(Collectors.groupingBy((logFile) -> {
|
||||
String partitionPathStr =
|
||||
FSUtils.getRelativePartitionPath(new Path(metaClient.getBasePath()), logFile.getPath().getParent());
|
||||
return Pair.of(partitionPathStr, logFile.getFileId());
|
||||
}));
|
||||
|
||||
Set<Pair<String, String>> fileIdSet = new HashSet<>(dataFiles.keySet());
|
||||
fileIdSet.addAll(logFiles.keySet());
|
||||
@@ -228,8 +222,8 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
long beginLsTs = System.currentTimeMillis();
|
||||
FileStatus[] statuses = metaClient.getFs().listStatus(partitionPath);
|
||||
long endLsTs = System.currentTimeMillis();
|
||||
log.info("#files found in partition (" + partitionPathStr + ") =" + statuses.length
|
||||
+ ", Time taken =" + (endLsTs - beginLsTs));
|
||||
log.info("#files found in partition (" + partitionPathStr + ") =" + statuses.length + ", Time taken ="
|
||||
+ (endLsTs - beginLsTs));
|
||||
List<HoodieFileGroup> groups = addFilesToView(statuses);
|
||||
|
||||
if (groups.isEmpty()) {
|
||||
@@ -253,9 +247,8 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
* @param statuses List of File-Status
|
||||
*/
|
||||
private Stream<HoodieDataFile> convertFileStatusesToDataFiles(FileStatus[] statuses) {
|
||||
Predicate<FileStatus> roFilePredicate = fileStatus ->
|
||||
fileStatus.getPath().getName()
|
||||
.contains(metaClient.getTableConfig().getROFileFormat().getFileExtension());
|
||||
Predicate<FileStatus> roFilePredicate = fileStatus -> fileStatus.getPath().getName()
|
||||
.contains(metaClient.getTableConfig().getROFileFormat().getFileExtension());
|
||||
return Arrays.stream(statuses).filter(roFilePredicate).map(HoodieDataFile::new);
|
||||
}
|
||||
|
||||
@@ -265,9 +258,8 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
* @param statuses List of FIle-Status
|
||||
*/
|
||||
private Stream<HoodieLogFile> convertFileStatusesToLogFiles(FileStatus[] statuses) {
|
||||
Predicate<FileStatus> rtFilePredicate = fileStatus ->
|
||||
fileStatus.getPath().getName()
|
||||
.contains(metaClient.getTableConfig().getRTFileFormat().getFileExtension());
|
||||
Predicate<FileStatus> rtFilePredicate = fileStatus -> fileStatus.getPath().getName()
|
||||
.contains(metaClient.getTableConfig().getRTFileFormat().getFileExtension());
|
||||
return Arrays.stream(statuses).filter(rtFilePredicate).map(HoodieLogFile::new);
|
||||
}
|
||||
|
||||
@@ -311,8 +303,8 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
log.info("File Slice (" + fileSlice + ") is in pending compaction");
|
||||
// Data file is filtered out of the file-slice as the corresponding compaction
|
||||
// instant not completed yet.
|
||||
FileSlice transformed = new FileSlice(fileSlice.getPartitionPath(),
|
||||
fileSlice.getBaseInstantTime(), fileSlice.getFileId());
|
||||
FileSlice transformed =
|
||||
new FileSlice(fileSlice.getPartitionPath(), fileSlice.getBaseInstantTime(), fileSlice.getFileId());
|
||||
fileSlice.getLogFiles().forEach(transformed::addLogFile);
|
||||
return transformed;
|
||||
}
|
||||
@@ -359,14 +351,10 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
ensurePartitionLoadedCorrectly(partitionPath);
|
||||
return fetchAllStoredFileGroups(partitionPath)
|
||||
.map(fileGroup -> Option.fromJavaOptional(fileGroup.getAllDataFiles()
|
||||
.filter(dataFile ->
|
||||
HoodieTimeline.compareTimestamps(dataFile.getCommitTime(),
|
||||
maxCommitTime,
|
||||
HoodieTimeline.LESSER_OR_EQUAL))
|
||||
.filter(df -> !isDataFileDueToPendingCompaction(df))
|
||||
.findFirst()))
|
||||
.filter(Option::isPresent)
|
||||
.map(Option::get);
|
||||
.filter(dataFile -> HoodieTimeline.compareTimestamps(dataFile.getCommitTime(), maxCommitTime,
|
||||
HoodieTimeline.LESSER_OR_EQUAL))
|
||||
.filter(df -> !isDataFileDueToPendingCompaction(df)).findFirst()))
|
||||
.filter(Option::isPresent).map(Option::get);
|
||||
} finally {
|
||||
readLock.unlock();
|
||||
}
|
||||
@@ -378,13 +366,10 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
readLock.lock();
|
||||
String partitionPath = formatPartitionKey(partitionStr);
|
||||
ensurePartitionLoadedCorrectly(partitionPath);
|
||||
return fetchHoodieFileGroup(partitionPath, fileId)
|
||||
.map(fileGroup -> fileGroup.getAllDataFiles()
|
||||
.filter(dataFile ->
|
||||
HoodieTimeline.compareTimestamps(dataFile.getCommitTime(),
|
||||
instantTime, HoodieTimeline.EQUAL))
|
||||
.filter(df -> !isDataFileDueToPendingCompaction(df))
|
||||
.findFirst().orElse(null));
|
||||
return fetchHoodieFileGroup(partitionPath, fileId).map(fileGroup -> fileGroup.getAllDataFiles()
|
||||
.filter(
|
||||
dataFile -> HoodieTimeline.compareTimestamps(dataFile.getCommitTime(), instantTime, HoodieTimeline.EQUAL))
|
||||
.filter(df -> !isDataFileDueToPendingCompaction(df)).findFirst().orElse(null));
|
||||
} finally {
|
||||
readLock.unlock();
|
||||
}
|
||||
@@ -409,10 +394,9 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
try {
|
||||
readLock.lock();
|
||||
return fetchAllStoredFileGroups().map(fileGroup -> {
|
||||
return Option.fromJavaOptional(fileGroup.getAllDataFiles()
|
||||
.filter(dataFile -> commitsToReturn.contains(dataFile.getCommitTime())
|
||||
&& !isDataFileDueToPendingCompaction(dataFile))
|
||||
.findFirst());
|
||||
return Option.fromJavaOptional(
|
||||
fileGroup.getAllDataFiles().filter(dataFile -> commitsToReturn.contains(dataFile.getCommitTime())
|
||||
&& !isDataFileDueToPendingCompaction(dataFile)).findFirst());
|
||||
}).filter(Option::isPresent).map(Option::get);
|
||||
} finally {
|
||||
readLock.unlock();
|
||||
@@ -466,19 +450,17 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
readLock.lock();
|
||||
String partitionPath = formatPartitionKey(partitionStr);
|
||||
ensurePartitionLoadedCorrectly(partitionPath);
|
||||
return fetchAllStoredFileGroups(partitionPath)
|
||||
.map(fileGroup -> {
|
||||
FileSlice fileSlice = fileGroup.getLatestFileSlice().get();
|
||||
// if the file-group is under compaction, pick the latest before compaction instant time.
|
||||
Option<Pair<String, CompactionOperation>> compactionWithInstantPair =
|
||||
getPendingCompactionOperationWithInstant(fileSlice.getFileGroupId());
|
||||
if (compactionWithInstantPair.isPresent()) {
|
||||
String compactionInstantTime = compactionWithInstantPair.get().getLeft();
|
||||
return fileGroup.getLatestFileSliceBefore(compactionInstantTime);
|
||||
}
|
||||
return Option.of(fileSlice);
|
||||
})
|
||||
.map(Option::get);
|
||||
return fetchAllStoredFileGroups(partitionPath).map(fileGroup -> {
|
||||
FileSlice fileSlice = fileGroup.getLatestFileSlice().get();
|
||||
// if the file-group is under compaction, pick the latest before compaction instant time.
|
||||
Option<Pair<String, CompactionOperation>> compactionWithInstantPair =
|
||||
getPendingCompactionOperationWithInstant(fileSlice.getFileGroupId());
|
||||
if (compactionWithInstantPair.isPresent()) {
|
||||
String compactionInstantTime = compactionWithInstantPair.get().getLeft();
|
||||
return fileGroup.getLatestFileSliceBefore(compactionInstantTime);
|
||||
}
|
||||
return Option.of(fileSlice);
|
||||
}).map(Option::get);
|
||||
} finally {
|
||||
readLock.unlock();
|
||||
}
|
||||
@@ -491,8 +473,7 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
readLock.lock();
|
||||
String partitionPath = formatPartitionKey(partitionStr);
|
||||
ensurePartitionLoadedCorrectly(partitionPath);
|
||||
Stream<FileSlice> fileSliceStream =
|
||||
fetchLatestFileSlicesBeforeOrOn(partitionPath, maxCommitTime);
|
||||
Stream<FileSlice> fileSliceStream = fetchLatestFileSlicesBeforeOrOn(partitionPath, maxCommitTime);
|
||||
if (includeFileSlicesInPendingCompaction) {
|
||||
return fileSliceStream.map(fs -> filterDataFileAfterPendingCompaction(fs));
|
||||
} else {
|
||||
@@ -509,17 +490,14 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
readLock.lock();
|
||||
String partition = formatPartitionKey(partitionStr);
|
||||
ensurePartitionLoadedCorrectly(partition);
|
||||
return fetchAllStoredFileGroups(partition)
|
||||
.map(fileGroup -> {
|
||||
Option<FileSlice> fileSlice = fileGroup.getLatestFileSliceBeforeOrOn(maxInstantTime);
|
||||
// if the file-group is under construction, pick the latest before compaction instant time.
|
||||
if (fileSlice.isPresent()) {
|
||||
fileSlice = Option.of(fetchMergedFileSlice(fileGroup, fileSlice.get()));
|
||||
}
|
||||
return fileSlice;
|
||||
})
|
||||
.filter(Option::isPresent)
|
||||
.map(Option::get);
|
||||
return fetchAllStoredFileGroups(partition).map(fileGroup -> {
|
||||
Option<FileSlice> fileSlice = fileGroup.getLatestFileSliceBeforeOrOn(maxInstantTime);
|
||||
// if the file-group is under construction, pick the latest before compaction instant time.
|
||||
if (fileSlice.isPresent()) {
|
||||
fileSlice = Option.of(fetchMergedFileSlice(fileGroup, fileSlice.get()));
|
||||
}
|
||||
return fileSlice;
|
||||
}).filter(Option::isPresent).map(Option::get);
|
||||
} finally {
|
||||
readLock.unlock();
|
||||
}
|
||||
@@ -665,8 +643,7 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
* @return file-slice stream
|
||||
*/
|
||||
Stream<FileSlice> fetchAllFileSlices(String partitionPath) {
|
||||
return fetchAllStoredFileGroups(partitionPath)
|
||||
.map(HoodieFileGroup::getAllFileSlices)
|
||||
return fetchAllStoredFileGroups(partitionPath).map(HoodieFileGroup::getAllFileSlices)
|
||||
.flatMap(sliceList -> sliceList);
|
||||
}
|
||||
|
||||
@@ -674,26 +651,21 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
* Default implementation for fetching latest data-files for the partition-path
|
||||
*/
|
||||
Stream<HoodieDataFile> fetchLatestDataFiles(final String partitionPath) {
|
||||
return fetchAllStoredFileGroups(partitionPath)
|
||||
.map(this::getLatestDataFile)
|
||||
.filter(Option::isPresent)
|
||||
return fetchAllStoredFileGroups(partitionPath).map(this::getLatestDataFile).filter(Option::isPresent)
|
||||
.map(Option::get);
|
||||
}
|
||||
|
||||
|
||||
protected Option<HoodieDataFile> getLatestDataFile(HoodieFileGroup fileGroup) {
|
||||
return Option.fromJavaOptional(
|
||||
fileGroup.getAllDataFiles().filter(df -> !isDataFileDueToPendingCompaction(df)).findFirst());
|
||||
return Option
|
||||
.fromJavaOptional(fileGroup.getAllDataFiles().filter(df -> !isDataFileDueToPendingCompaction(df)).findFirst());
|
||||
}
|
||||
|
||||
/**
|
||||
* Default implementation for fetching latest data-files across all partitions
|
||||
*/
|
||||
Stream<HoodieDataFile> fetchLatestDataFiles() {
|
||||
return fetchAllStoredFileGroups()
|
||||
.map(this::getLatestDataFile)
|
||||
.filter(Option::isPresent)
|
||||
.map(Option::get);
|
||||
return fetchAllStoredFileGroups().map(this::getLatestDataFile).filter(Option::isPresent).map(Option::get);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -702,8 +674,7 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
* @param partitionPath partition-path
|
||||
*/
|
||||
Stream<HoodieDataFile> fetchAllDataFiles(String partitionPath) {
|
||||
return fetchAllStoredFileGroups(partitionPath)
|
||||
.map(HoodieFileGroup::getAllDataFiles)
|
||||
return fetchAllStoredFileGroups(partitionPath).map(HoodieFileGroup::getAllDataFiles)
|
||||
.flatMap(dataFileList -> dataFileList);
|
||||
}
|
||||
|
||||
@@ -719,9 +690,7 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
* Default implementation for fetching latest file-slices for a partition path
|
||||
*/
|
||||
Stream<FileSlice> fetchLatestFileSlices(String partitionPath) {
|
||||
return fetchAllStoredFileGroups(partitionPath)
|
||||
.map(HoodieFileGroup::getLatestFileSlice)
|
||||
.filter(Option::isPresent)
|
||||
return fetchAllStoredFileGroups(partitionPath).map(HoodieFileGroup::getLatestFileSlice).filter(Option::isPresent)
|
||||
.map(Option::get);
|
||||
}
|
||||
|
||||
@@ -731,11 +700,9 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
* @param partitionPath Partition Path
|
||||
* @param maxCommitTime Instant Time
|
||||
*/
|
||||
Stream<FileSlice> fetchLatestFileSlicesBeforeOrOn(String partitionPath,
|
||||
String maxCommitTime) {
|
||||
Stream<FileSlice> fetchLatestFileSlicesBeforeOrOn(String partitionPath, String maxCommitTime) {
|
||||
return fetchAllStoredFileGroups(partitionPath)
|
||||
.map(fileGroup -> fileGroup.getLatestFileSliceBeforeOrOn(maxCommitTime))
|
||||
.filter(Option::isPresent)
|
||||
.map(fileGroup -> fileGroup.getLatestFileSliceBeforeOrOn(maxCommitTime)).filter(Option::isPresent)
|
||||
.map(Option::get);
|
||||
}
|
||||
|
||||
@@ -746,8 +713,8 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
* @param penultimateSlice Penultimate file slice for a file-group in commit timeline order
|
||||
*/
|
||||
private static FileSlice mergeCompactionPendingFileSlices(FileSlice lastSlice, FileSlice penultimateSlice) {
|
||||
FileSlice merged = new FileSlice(penultimateSlice.getPartitionPath(),
|
||||
penultimateSlice.getBaseInstantTime(), penultimateSlice.getFileId());
|
||||
FileSlice merged = new FileSlice(penultimateSlice.getPartitionPath(), penultimateSlice.getBaseInstantTime(),
|
||||
penultimateSlice.getFileId());
|
||||
if (penultimateSlice.getDataFile().isPresent()) {
|
||||
merged.setDataFile(penultimateSlice.getDataFile().get());
|
||||
}
|
||||
@@ -782,24 +749,26 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
|
||||
/**
|
||||
* Default implementation for fetching latest data-file
|
||||
*
|
||||
* @param partitionPath Partition path
|
||||
* @param fileId File Id
|
||||
* @return Data File if present
|
||||
*/
|
||||
protected Option<HoodieDataFile> fetchLatestDataFile(String partitionPath, String fileId) {
|
||||
return Option.fromJavaOptional(fetchLatestDataFiles(partitionPath)
|
||||
.filter(fs -> fs.getFileId().equals(fileId)).findFirst());
|
||||
return Option
|
||||
.fromJavaOptional(fetchLatestDataFiles(partitionPath).filter(fs -> fs.getFileId().equals(fileId)).findFirst());
|
||||
}
|
||||
|
||||
/**
|
||||
* Default implementation for fetching file-slice
|
||||
*
|
||||
* @param partitionPath Partition path
|
||||
* @param fileId File Id
|
||||
* @return File Slice if present
|
||||
*/
|
||||
protected Option<FileSlice> fetchLatestFileSlice(String partitionPath, String fileId) {
|
||||
return Option.fromJavaOptional(fetchLatestFileSlices(partitionPath)
|
||||
.filter(fs -> fs.getFileId().equals(fileId)).findFirst());
|
||||
return Option
|
||||
.fromJavaOptional(fetchLatestFileSlices(partitionPath).filter(fs -> fs.getFileId().equals(fileId)).findFirst());
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -841,6 +810,7 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
|
||||
/**
|
||||
* Return Only Commits and Compaction timeline for building file-groups
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public HoodieTimeline getVisibleCommitsAndCompactionTimeline() {
|
||||
|
||||
@@ -28,25 +28,21 @@ import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
/**
|
||||
* A container that can potentially hold one or more dataset's
|
||||
* file-system views. There is one view for each dataset. This is a view built against a timeline containing completed
|
||||
* actions. In an embedded timeline-server mode, this typically holds only one dataset's view.
|
||||
* In a stand-alone server mode, this can hold more than one dataset's views.
|
||||
* A container that can potentially hold one or more dataset's file-system views. There is one view for each dataset.
|
||||
* This is a view built against a timeline containing completed actions. In an embedded timeline-server mode, this
|
||||
* typically holds only one dataset's view. In a stand-alone server mode, this can hold more than one dataset's views.
|
||||
*
|
||||
* FileSystemView can be stored "locally" using the following storage mechanisms:
|
||||
* a. In Memory
|
||||
* b. Spillable Map
|
||||
* c. RocksDB
|
||||
* FileSystemView can be stored "locally" using the following storage mechanisms: a. In Memory b. Spillable Map c.
|
||||
* RocksDB
|
||||
*
|
||||
* But there can be cases where the file-system view is managed remoted. For example : Embedded Timeline Server). In
|
||||
* this case, the clients will configure a remote filesystem view client (RemoteHoodieTableFileSystemView) for the
|
||||
* dataset which can connect to the remote file system view and fetch views. THere are 2 modes here : REMOTE_FIRST and
|
||||
* REMOTE_ONLY
|
||||
* REMOTE_FIRST : The file-system view implementation on client side will act as a remote proxy. In case, if there
|
||||
* is problem (or exceptions) querying remote file-system view, a backup local file-system view(using
|
||||
* either one of in-memory, spillable, rocksDB) is used to server file-system view queries
|
||||
* REMOTE_ONLY : In this case, there is no backup local file-system view. If there is problem (or exceptions)
|
||||
* querying remote file-system view, then the exceptions are percolated back to client.
|
||||
* REMOTE_ONLY REMOTE_FIRST : The file-system view implementation on client side will act as a remote proxy. In case, if
|
||||
* there is problem (or exceptions) querying remote file-system view, a backup local file-system view(using either one
|
||||
* of in-memory, spillable, rocksDB) is used to server file-system view queries REMOTE_ONLY : In this case, there is no
|
||||
* backup local file-system view. If there is problem (or exceptions) querying remote file-system view, then the
|
||||
* exceptions are percolated back to client.
|
||||
*
|
||||
* FileSystemViewManager is designed to encapsulate the file-system view storage from clients using the file-system
|
||||
* view. FileSystemViewManager uses a factory to construct specific implementation of file-system view and passes it to
|
||||
@@ -73,6 +69,7 @@ public class FileSystemViewManager {
|
||||
|
||||
/**
|
||||
* Drops reference to File-System Views. Future calls to view results in creating a new view
|
||||
*
|
||||
* @param basePath
|
||||
*/
|
||||
public void clearFileSystemView(String basePath) {
|
||||
@@ -84,12 +81,12 @@ public class FileSystemViewManager {
|
||||
|
||||
/**
|
||||
* Main API to get the file-system view for the base-path
|
||||
*
|
||||
* @param basePath
|
||||
* @return
|
||||
*/
|
||||
public SyncableFileSystemView getFileSystemView(String basePath) {
|
||||
return globalViewMap.computeIfAbsent(basePath,
|
||||
(path) -> viewCreator.apply(path, viewStorageConfig));
|
||||
return globalViewMap.computeIfAbsent(basePath, (path) -> viewCreator.apply(path, viewStorageConfig));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -104,9 +101,10 @@ public class FileSystemViewManager {
|
||||
|
||||
/**
|
||||
* Create RocksDB based file System view for a dataset
|
||||
*
|
||||
* @param conf Hadoop Configuration
|
||||
* @param viewConf View Storage Configuration
|
||||
* @param basePath Base Path of dataset
|
||||
* @param viewConf View Storage Configuration
|
||||
* @param basePath Base Path of dataset
|
||||
* @return
|
||||
*/
|
||||
private static RocksDbBasedFileSystemView createRocksDBBasedFileSystemView(SerializableConfiguration conf,
|
||||
@@ -118,9 +116,10 @@ public class FileSystemViewManager {
|
||||
|
||||
/**
|
||||
* Create a spillable Map based file System view for a dataset
|
||||
*
|
||||
* @param conf Hadoop Configuration
|
||||
* @param viewConf View Storage Configuration
|
||||
* @param basePath Base Path of dataset
|
||||
* @param viewConf View Storage Configuration
|
||||
* @param basePath Base Path of dataset
|
||||
* @return
|
||||
*/
|
||||
private static SpillableMapBasedFileSystemView createSpillableMapBasedFileSystemView(SerializableConfiguration conf,
|
||||
@@ -134,9 +133,10 @@ public class FileSystemViewManager {
|
||||
|
||||
/**
|
||||
* Create an in-memory file System view for a dataset
|
||||
*
|
||||
* @param conf Hadoop Configuration
|
||||
* @param viewConf View Storage Configuration
|
||||
* @param basePath Base Path of dataset
|
||||
* @param viewConf View Storage Configuration
|
||||
* @param basePath Base Path of dataset
|
||||
* @return
|
||||
*/
|
||||
private static HoodieTableFileSystemView createInMemoryFileSystemView(SerializableConfiguration conf,
|
||||
@@ -149,27 +149,29 @@ public class FileSystemViewManager {
|
||||
|
||||
/**
|
||||
* Create a remote file System view for a dataset
|
||||
*
|
||||
* @param conf Hadoop Configuration
|
||||
* @param viewConf View Storage Configuration
|
||||
* @param metaClient Hoodie Table MetaClient for the dataset.
|
||||
* @param viewConf View Storage Configuration
|
||||
* @param metaClient Hoodie Table MetaClient for the dataset.
|
||||
* @return
|
||||
*/
|
||||
private static RemoteHoodieTableFileSystemView createRemoteFileSystemView(SerializableConfiguration conf,
|
||||
FileSystemViewStorageConfig viewConf, HoodieTableMetaClient metaClient) {
|
||||
logger.info("Creating remote view for basePath " + metaClient.getBasePath() + ". Server="
|
||||
+ viewConf.getRemoteViewServerHost() + ":" + viewConf.getRemoteViewServerPort());
|
||||
return new RemoteHoodieTableFileSystemView(viewConf.getRemoteViewServerHost(),
|
||||
viewConf.getRemoteViewServerPort(), metaClient);
|
||||
return new RemoteHoodieTableFileSystemView(viewConf.getRemoteViewServerHost(), viewConf.getRemoteViewServerPort(),
|
||||
metaClient);
|
||||
}
|
||||
|
||||
/**
|
||||
* Main Factory method for building file-system views
|
||||
* @param conf Hadoop Configuration
|
||||
*
|
||||
* @param conf Hadoop Configuration
|
||||
* @param config View Storage Configuration
|
||||
* @return
|
||||
*/
|
||||
public static FileSystemViewManager createViewManager(
|
||||
final SerializableConfiguration conf, final FileSystemViewStorageConfig config) {
|
||||
public static FileSystemViewManager createViewManager(final SerializableConfiguration conf,
|
||||
final FileSystemViewStorageConfig config) {
|
||||
logger.info("Creating View Manager with storage type :" + config.getStorageType());
|
||||
switch (config.getStorageType()) {
|
||||
case EMBEDDED_KV_STORE:
|
||||
@@ -186,9 +188,8 @@ public class FileSystemViewManager {
|
||||
(basePath, viewConfig) -> createInMemoryFileSystemView(conf, viewConfig, basePath));
|
||||
case REMOTE_ONLY:
|
||||
logger.info("Creating remote only table view");
|
||||
return new FileSystemViewManager(conf, config,
|
||||
(basePath, viewConfig) -> createRemoteFileSystemView(conf, viewConfig,
|
||||
new HoodieTableMetaClient(conf.newCopy(), basePath)));
|
||||
return new FileSystemViewManager(conf, config, (basePath, viewConfig) -> createRemoteFileSystemView(conf,
|
||||
viewConfig, new HoodieTableMetaClient(conf.newCopy(), basePath)));
|
||||
case REMOTE_FIRST:
|
||||
logger.info("Creating remote first table view");
|
||||
return new FileSystemViewManager(conf, config, (basePath, viewConfig) -> {
|
||||
|
||||
@@ -30,7 +30,7 @@ import org.apache.hudi.config.DefaultHoodieConfig;
|
||||
*/
|
||||
public class FileSystemViewStorageConfig extends DefaultHoodieConfig {
|
||||
|
||||
//Property Names
|
||||
// Property Names
|
||||
public static final String FILESYSTEM_VIEW_STORAGE_TYPE = "hoodie.filesystem.view.type";
|
||||
public static final String FILESYSTEM_VIEW_INCREMENTAL_SYNC_MODE = "hoodie.filesystem.view.incr.timeline.sync.enable";
|
||||
public static final String FILESYSTEM_SECONDARY_VIEW_STORAGE_TYPE = "hoodie.filesystem.view.secondary.type";
|
||||
@@ -85,8 +85,9 @@ public class FileSystemViewStorageConfig extends DefaultHoodieConfig {
|
||||
|
||||
public long getMaxMemoryForPendingCompaction() {
|
||||
long totalMemory = Long.parseLong(props.getProperty(FILESYSTEM_VIEW_SPILLABLE_MEM));
|
||||
long reservedForPendingComaction = new Double(totalMemory * Double.parseDouble(
|
||||
props.getProperty(FILESYSTEM_VIEW_PENDING_COMPACTION_MEM_FRACTION))).longValue();
|
||||
long reservedForPendingComaction =
|
||||
new Double(totalMemory * Double.parseDouble(props.getProperty(FILESYSTEM_VIEW_PENDING_COMPACTION_MEM_FRACTION)))
|
||||
.longValue();
|
||||
return reservedForPendingComaction;
|
||||
}
|
||||
|
||||
@@ -167,26 +168,26 @@ public class FileSystemViewStorageConfig extends DefaultHoodieConfig {
|
||||
}
|
||||
|
||||
public FileSystemViewStorageConfig build() {
|
||||
setDefaultOnCondition(props, !props.containsKey(FILESYSTEM_VIEW_STORAGE_TYPE),
|
||||
FILESYSTEM_VIEW_STORAGE_TYPE, DEFAULT_VIEW_STORAGE_TYPE.name());
|
||||
setDefaultOnCondition(props, !props.containsKey(FILESYSTEM_VIEW_STORAGE_TYPE), FILESYSTEM_VIEW_STORAGE_TYPE,
|
||||
DEFAULT_VIEW_STORAGE_TYPE.name());
|
||||
setDefaultOnCondition(props, !props.containsKey(FILESYSTEM_VIEW_INCREMENTAL_SYNC_MODE),
|
||||
FILESYSTEM_VIEW_INCREMENTAL_SYNC_MODE, DEFAULT_FILESYSTEM_VIEW_INCREMENTAL_SYNC_MODE);
|
||||
setDefaultOnCondition(props, !props.containsKey(FILESYSTEM_SECONDARY_VIEW_STORAGE_TYPE),
|
||||
FILESYSTEM_SECONDARY_VIEW_STORAGE_TYPE, DEFAULT_SECONDARY_VIEW_STORAGE_TYPE.name());
|
||||
setDefaultOnCondition(props, !props.containsKey(FILESYSTEM_VIEW_REMOTE_HOST),
|
||||
FILESYSTEM_VIEW_REMOTE_HOST, DEFUALT_REMOTE_VIEW_SERVER_HOST);
|
||||
setDefaultOnCondition(props, !props.containsKey(FILESYSTEM_VIEW_REMOTE_PORT),
|
||||
FILESYSTEM_VIEW_REMOTE_PORT, DEFAULT_REMOTE_VIEW_SERVER_PORT.toString());
|
||||
setDefaultOnCondition(props, !props.containsKey(FILESYSTEM_VIEW_REMOTE_HOST), FILESYSTEM_VIEW_REMOTE_HOST,
|
||||
DEFUALT_REMOTE_VIEW_SERVER_HOST);
|
||||
setDefaultOnCondition(props, !props.containsKey(FILESYSTEM_VIEW_REMOTE_PORT), FILESYSTEM_VIEW_REMOTE_PORT,
|
||||
DEFAULT_REMOTE_VIEW_SERVER_PORT.toString());
|
||||
|
||||
setDefaultOnCondition(props, !props.containsKey(FILESYSTEM_VIEW_SPILLABLE_DIR),
|
||||
FILESYSTEM_VIEW_SPILLABLE_DIR, DEFAULT_VIEW_SPILLABLE_DIR);
|
||||
setDefaultOnCondition(props, !props.containsKey(FILESYSTEM_VIEW_SPILLABLE_MEM),
|
||||
FILESYSTEM_VIEW_SPILLABLE_MEM, DEFAULT_MAX_MEMORY_FOR_VIEW.toString());
|
||||
setDefaultOnCondition(props, !props.containsKey(FILESYSTEM_VIEW_SPILLABLE_DIR), FILESYSTEM_VIEW_SPILLABLE_DIR,
|
||||
DEFAULT_VIEW_SPILLABLE_DIR);
|
||||
setDefaultOnCondition(props, !props.containsKey(FILESYSTEM_VIEW_SPILLABLE_MEM), FILESYSTEM_VIEW_SPILLABLE_MEM,
|
||||
DEFAULT_MAX_MEMORY_FOR_VIEW.toString());
|
||||
setDefaultOnCondition(props, !props.containsKey(FILESYSTEM_VIEW_PENDING_COMPACTION_MEM_FRACTION),
|
||||
FILESYSTEM_VIEW_PENDING_COMPACTION_MEM_FRACTION, DEFAULT_MEM_FRACTION_FOR_PENDING_COMPACTION.toString());
|
||||
|
||||
setDefaultOnCondition(props, !props.containsKey(ROCKSDB_BASE_PATH_PROP),
|
||||
ROCKSDB_BASE_PATH_PROP, DEFAULT_ROCKSDB_BASE_PATH);
|
||||
setDefaultOnCondition(props, !props.containsKey(ROCKSDB_BASE_PATH_PROP), ROCKSDB_BASE_PATH_PROP,
|
||||
DEFAULT_ROCKSDB_BASE_PATH);
|
||||
|
||||
// Validations
|
||||
FileSystemViewStorageType.valueOf(props.getProperty(FILESYSTEM_VIEW_STORAGE_TYPE));
|
||||
|
||||
@@ -40,6 +40,7 @@ import org.apache.log4j.Logger;
|
||||
|
||||
/**
|
||||
* TableFileSystemView Implementations based on in-memory storage.
|
||||
*
|
||||
* @see TableFileSystemView
|
||||
* @since 0.3.0
|
||||
*/
|
||||
@@ -115,13 +116,11 @@ public class HoodieTableFileSystemView extends IncrementalTimelineSyncFileSystem
|
||||
*
|
||||
* @deprecated
|
||||
*/
|
||||
private void readObject(java.io.ObjectInputStream in)
|
||||
throws IOException, ClassNotFoundException {
|
||||
private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException {
|
||||
in.defaultReadObject();
|
||||
}
|
||||
|
||||
private void writeObject(java.io.ObjectOutputStream out)
|
||||
throws IOException {
|
||||
private void writeObject(java.io.ObjectOutputStream out) throws IOException {
|
||||
out.defaultWriteObject();
|
||||
}
|
||||
|
||||
@@ -133,10 +132,9 @@ public class HoodieTableFileSystemView extends IncrementalTimelineSyncFileSystem
|
||||
@Override
|
||||
protected void resetPendingCompactionOperations(Stream<Pair<String, CompactionOperation>> operations) {
|
||||
// Build fileId to Pending Compaction Instants
|
||||
this.fgIdToPendingCompaction = createFileIdToPendingCompactionMap(
|
||||
operations.map(entry -> {
|
||||
return Pair.of(entry.getValue().getFileGroupId(), Pair.of(entry.getKey(),entry.getValue()));
|
||||
}).collect(Collectors.toMap(Pair::getKey, Pair::getValue)));
|
||||
this.fgIdToPendingCompaction = createFileIdToPendingCompactionMap(operations.map(entry -> {
|
||||
return Pair.of(entry.getValue().getFileGroupId(), Pair.of(entry.getKey(), entry.getValue()));
|
||||
}).collect(Collectors.toMap(Pair::getKey, Pair::getValue)));
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -161,8 +159,8 @@ public class HoodieTableFileSystemView extends IncrementalTimelineSyncFileSystem
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a partition path, obtain all filegroups within that. All methods, that work at the
|
||||
* partition level go through this.
|
||||
* Given a partition path, obtain all filegroups within that. All methods, that work at the partition level go through
|
||||
* this.
|
||||
*/
|
||||
@Override
|
||||
Stream<HoodieFileGroup> fetchAllStoredFileGroups(String partition) {
|
||||
|
||||
@@ -157,18 +157,19 @@ public abstract class IncrementalTimelineSyncFileSystemView extends AbstractTabl
|
||||
log.info("Syncing pending compaction instant (" + instant + ")");
|
||||
HoodieCompactionPlan compactionPlan = CompactionUtils.getCompactionPlan(metaClient, instant.getTimestamp());
|
||||
List<Pair<String, CompactionOperation>> pendingOps =
|
||||
CompactionUtils.getPendingCompactionOperations(instant, compactionPlan).map(p -> Pair.of(p.getValue().getKey(),
|
||||
CompactionOperation.convertFromAvroRecordInstance(p.getValue().getValue()))).collect(Collectors.toList());
|
||||
CompactionUtils.getPendingCompactionOperations(instant, compactionPlan)
|
||||
.map(p -> Pair.of(p.getValue().getKey(),
|
||||
CompactionOperation.convertFromAvroRecordInstance(p.getValue().getValue())))
|
||||
.collect(Collectors.toList());
|
||||
// First, update Pending compaction instants
|
||||
addPendingCompactionOperations(pendingOps.stream());
|
||||
|
||||
Map<String, List<Pair<String, HoodieFileGroup>>> partitionToFileGroups =
|
||||
pendingOps.stream().map(opPair -> {
|
||||
String compactionInstantTime = opPair.getKey();
|
||||
HoodieFileGroup fileGroup = new HoodieFileGroup(opPair.getValue().getFileGroupId(), timeline);
|
||||
fileGroup.addNewFileSliceAtInstant(compactionInstantTime);
|
||||
return Pair.of(compactionInstantTime, fileGroup);
|
||||
}).collect(Collectors.groupingBy(x -> x.getValue().getPartitionPath()));
|
||||
Map<String, List<Pair<String, HoodieFileGroup>>> partitionToFileGroups = pendingOps.stream().map(opPair -> {
|
||||
String compactionInstantTime = opPair.getKey();
|
||||
HoodieFileGroup fileGroup = new HoodieFileGroup(opPair.getValue().getFileGroupId(), timeline);
|
||||
fileGroup.addNewFileSliceAtInstant(compactionInstantTime);
|
||||
return Pair.of(compactionInstantTime, fileGroup);
|
||||
}).collect(Collectors.groupingBy(x -> x.getValue().getPartitionPath()));
|
||||
partitionToFileGroups.entrySet().forEach(entry -> {
|
||||
if (isPartitionAvailableInStore(entry.getKey())) {
|
||||
applyDeltaFileSlicesToPartitionView(entry.getKey(),
|
||||
@@ -185,8 +186,8 @@ public abstract class IncrementalTimelineSyncFileSystemView extends AbstractTabl
|
||||
*/
|
||||
private void addCommitInstant(HoodieTimeline timeline, HoodieInstant instant) throws IOException {
|
||||
log.info("Syncing committed instant (" + instant + ")");
|
||||
HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(timeline.getInstantDetails(instant).get(),
|
||||
HoodieCommitMetadata.class);
|
||||
HoodieCommitMetadata commitMetadata =
|
||||
HoodieCommitMetadata.fromBytes(timeline.getInstantDetails(instant).get(), HoodieCommitMetadata.class);
|
||||
commitMetadata.getPartitionToWriteStats().entrySet().stream().forEach(entry -> {
|
||||
String partition = entry.getKey();
|
||||
if (isPartitionAvailableInStore(partition)) {
|
||||
@@ -196,8 +197,8 @@ public abstract class IncrementalTimelineSyncFileSystemView extends AbstractTabl
|
||||
new Path(String.format("%s/%s", metaClient.getBasePath(), p.getPath())));
|
||||
return status;
|
||||
}).toArray(FileStatus[]::new);
|
||||
List<HoodieFileGroup> fileGroups = buildFileGroups(statuses, timeline.filterCompletedAndCompactionInstants(),
|
||||
false);
|
||||
List<HoodieFileGroup> fileGroups =
|
||||
buildFileGroups(statuses, timeline.filterCompletedAndCompactionInstants(), false);
|
||||
applyDeltaFileSlicesToPartitionView(partition, fileGroups, DeltaApplyMode.ADD);
|
||||
} else {
|
||||
log.warn("Skipping partition (" + partition + ") when syncing instant (" + instant + ") as it is not loaded");
|
||||
@@ -214,8 +215,8 @@ public abstract class IncrementalTimelineSyncFileSystemView extends AbstractTabl
|
||||
*/
|
||||
private void addRestoreInstant(HoodieTimeline timeline, HoodieInstant instant) throws IOException {
|
||||
log.info("Syncing restore instant (" + instant + ")");
|
||||
HoodieRestoreMetadata metadata = AvroUtils.deserializeAvroMetadata(
|
||||
timeline.getInstantDetails(instant).get(), HoodieRestoreMetadata.class);
|
||||
HoodieRestoreMetadata metadata =
|
||||
AvroUtils.deserializeAvroMetadata(timeline.getInstantDetails(instant).get(), HoodieRestoreMetadata.class);
|
||||
|
||||
Map<String, List<Pair<String, String>>> partitionFiles =
|
||||
metadata.getHoodieRestoreMetadata().entrySet().stream().flatMap(entry -> {
|
||||
@@ -238,8 +239,8 @@ public abstract class IncrementalTimelineSyncFileSystemView extends AbstractTabl
|
||||
*/
|
||||
private void addRollbackInstant(HoodieTimeline timeline, HoodieInstant instant) throws IOException {
|
||||
log.info("Syncing rollback instant (" + instant + ")");
|
||||
HoodieRollbackMetadata metadata = AvroUtils.deserializeAvroMetadata(
|
||||
timeline.getInstantDetails(instant).get(), HoodieRollbackMetadata.class);
|
||||
HoodieRollbackMetadata metadata =
|
||||
AvroUtils.deserializeAvroMetadata(timeline.getInstantDetails(instant).get(), HoodieRollbackMetadata.class);
|
||||
|
||||
metadata.getPartitionMetadata().entrySet().stream().forEach(e -> {
|
||||
removeFileSlicesForPartition(timeline, instant, e.getKey(), e.getValue().getSuccessDeleteFiles());
|
||||
@@ -255,16 +256,16 @@ public abstract class IncrementalTimelineSyncFileSystemView extends AbstractTabl
|
||||
*/
|
||||
private void addCleanInstant(HoodieTimeline timeline, HoodieInstant instant) throws IOException {
|
||||
log.info("Syncing cleaner instant (" + instant + ")");
|
||||
HoodieCleanMetadata cleanMetadata = AvroUtils
|
||||
.deserializeHoodieCleanMetadata(timeline.getInstantDetails(instant).get());
|
||||
HoodieCleanMetadata cleanMetadata =
|
||||
AvroUtils.deserializeHoodieCleanMetadata(timeline.getInstantDetails(instant).get());
|
||||
cleanMetadata.getPartitionMetadata().entrySet().stream().forEach(entry -> {
|
||||
removeFileSlicesForPartition(timeline, instant, entry.getKey(), entry.getValue().getSuccessDeleteFiles());
|
||||
});
|
||||
log.info("Done Syncing cleaner instant (" + instant + ")");
|
||||
}
|
||||
|
||||
private void removeFileSlicesForPartition(HoodieTimeline timeline, HoodieInstant instant,
|
||||
String partition, List<String> paths) {
|
||||
private void removeFileSlicesForPartition(HoodieTimeline timeline, HoodieInstant instant, String partition,
|
||||
List<String> paths) {
|
||||
if (isPartitionAvailableInStore(partition)) {
|
||||
log.info("Removing file slices for partition (" + partition + ") for instant (" + instant + ")");
|
||||
FileStatus[] statuses = paths.stream().map(p -> {
|
||||
@@ -272,8 +273,8 @@ public abstract class IncrementalTimelineSyncFileSystemView extends AbstractTabl
|
||||
status.setPath(new Path(p));
|
||||
return status;
|
||||
}).toArray(FileStatus[]::new);
|
||||
List<HoodieFileGroup> fileGroups = buildFileGroups(statuses,
|
||||
timeline.filterCompletedAndCompactionInstants(), false);
|
||||
List<HoodieFileGroup> fileGroups =
|
||||
buildFileGroups(statuses, timeline.filterCompletedAndCompactionInstants(), false);
|
||||
applyDeltaFileSlicesToPartitionView(partition, fileGroups, DeltaApplyMode.REMOVE);
|
||||
} else {
|
||||
log.warn("Skipping partition (" + partition + ") when syncing instant (" + instant + ") as it is not loaded");
|
||||
@@ -284,8 +285,7 @@ public abstract class IncrementalTimelineSyncFileSystemView extends AbstractTabl
|
||||
* Apply mode whether to add or remove the delta view
|
||||
*/
|
||||
enum DeltaApplyMode {
|
||||
ADD,
|
||||
REMOVE
|
||||
ADD, REMOVE
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -306,27 +306,27 @@ public abstract class IncrementalTimelineSyncFileSystemView extends AbstractTabl
|
||||
|
||||
List<HoodieFileGroup> fileGroups = fetchAllStoredFileGroups(partition).collect(Collectors.toList());
|
||||
/**
|
||||
* Note that while finding the new data/log files added/removed, the path stored in metadata will be missing
|
||||
* the base-path,scheme and authority. Ensure the matching process takes care of this discrepancy.
|
||||
* Note that while finding the new data/log files added/removed, the path stored in metadata will be missing the
|
||||
* base-path,scheme and authority. Ensure the matching process takes care of this discrepancy.
|
||||
*/
|
||||
Map<String, HoodieDataFile> viewDataFiles = fileGroups.stream().flatMap(HoodieFileGroup::getAllRawFileSlices)
|
||||
.map(FileSlice::getDataFile).filter(Option::isPresent).map(Option::get)
|
||||
.map(df -> Pair.of(Path.getPathWithoutSchemeAndAuthority(new Path(df.getPath())).toString(), df))
|
||||
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
|
||||
//Note: Delta Log Files and Data FIles can be empty when adding/removing pending compactions
|
||||
// Note: Delta Log Files and Data FIles can be empty when adding/removing pending compactions
|
||||
Map<String, HoodieDataFile> deltaDataFiles = deltaFileGroups.stream().flatMap(HoodieFileGroup::getAllRawFileSlices)
|
||||
.map(FileSlice::getDataFile).filter(Option::isPresent).map(Option::get)
|
||||
.map(df -> Pair.of(Path.getPathWithoutSchemeAndAuthority(new Path(df.getPath())).toString(), df))
|
||||
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
|
||||
|
||||
Map<String, HoodieLogFile> viewLogFiles = fileGroups.stream().flatMap(HoodieFileGroup::getAllRawFileSlices)
|
||||
.flatMap(FileSlice::getLogFiles)
|
||||
.map(lf -> Pair.of(Path.getPathWithoutSchemeAndAuthority(lf.getPath()).toString(), lf))
|
||||
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
|
||||
Map<String, HoodieLogFile> deltaLogFiles = deltaFileGroups.stream().flatMap(HoodieFileGroup::getAllRawFileSlices)
|
||||
.flatMap(FileSlice::getLogFiles)
|
||||
.map(lf -> Pair.of(Path.getPathWithoutSchemeAndAuthority(lf.getPath()).toString(), lf))
|
||||
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
|
||||
Map<String, HoodieLogFile> viewLogFiles =
|
||||
fileGroups.stream().flatMap(HoodieFileGroup::getAllRawFileSlices).flatMap(FileSlice::getLogFiles)
|
||||
.map(lf -> Pair.of(Path.getPathWithoutSchemeAndAuthority(lf.getPath()).toString(), lf))
|
||||
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
|
||||
Map<String, HoodieLogFile> deltaLogFiles =
|
||||
deltaFileGroups.stream().flatMap(HoodieFileGroup::getAllRawFileSlices).flatMap(FileSlice::getLogFiles)
|
||||
.map(lf -> Pair.of(Path.getPathWithoutSchemeAndAuthority(lf.getPath()).toString(), lf))
|
||||
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
|
||||
|
||||
switch (mode) {
|
||||
case ADD:
|
||||
|
||||
@@ -38,8 +38,8 @@ import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
/**
|
||||
* A file system view which proxies request to a preferred File System View implementation. In case of error,
|
||||
* flip all subsequent calls to a backup file-system view implementation.
|
||||
* A file system view which proxies request to a preferred File System View implementation. In case of error, flip all
|
||||
* subsequent calls to a backup file-system view implementation.
|
||||
*/
|
||||
public class PriorityBasedFileSystemView implements SyncableFileSystemView, Serializable {
|
||||
|
||||
@@ -140,8 +140,7 @@ public class PriorityBasedFileSystemView implements SyncableFileSystemView, Seri
|
||||
|
||||
@Override
|
||||
public Option<HoodieDataFile> getDataFileOn(String partitionPath, String instantTime, String fileId) {
|
||||
return execute(partitionPath, instantTime, fileId, preferredView::getDataFileOn,
|
||||
secondaryView::getDataFileOn);
|
||||
return execute(partitionPath, instantTime, fileId, preferredView::getDataFileOn, secondaryView::getDataFileOn);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@@ -57,31 +57,26 @@ import org.apache.log4j.Logger;
|
||||
public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView, Serializable {
|
||||
|
||||
private static final String BASE_URL = "/v1/hoodie/view";
|
||||
public static final String LATEST_PARTITION_SLICES_URL = String.format("%s/%s", BASE_URL,
|
||||
"slices/partition/latest/");
|
||||
public static final String LATEST_PARTITION_SLICE_URL = String.format("%s/%s", BASE_URL,
|
||||
"slices/file/latest/");
|
||||
public static final String LATEST_PARTITION_UNCOMPACTED_SLICES_URL = String.format("%s/%s", BASE_URL,
|
||||
"slices/uncompacted/partition/latest/");
|
||||
public static final String LATEST_PARTITION_SLICES_URL = String.format("%s/%s", BASE_URL, "slices/partition/latest/");
|
||||
public static final String LATEST_PARTITION_SLICE_URL = String.format("%s/%s", BASE_URL, "slices/file/latest/");
|
||||
public static final String LATEST_PARTITION_UNCOMPACTED_SLICES_URL =
|
||||
String.format("%s/%s", BASE_URL, "slices/uncompacted/partition/latest/");
|
||||
public static final String ALL_SLICES_URL = String.format("%s/%s", BASE_URL, "slices/all");
|
||||
public static final String LATEST_SLICES_MERGED_BEFORE_ON_INSTANT_URL =
|
||||
String.format("%s/%s", BASE_URL, "slices/merged/beforeoron/latest/");
|
||||
public static final String LATEST_SLICES_RANGE_INSTANT_URL =
|
||||
String.format("%s/%s", BASE_URL, "slices/range/latest/");
|
||||
public static final String LATEST_SLICES_RANGE_INSTANT_URL = String.format("%s/%s", BASE_URL, "slices/range/latest/");
|
||||
public static final String LATEST_SLICES_BEFORE_ON_INSTANT_URL =
|
||||
String.format("%s/%s", BASE_URL, "slices/beforeoron/latest/");
|
||||
|
||||
public static final String PENDING_COMPACTION_OPS =
|
||||
String.format("%s/%s", BASE_URL, "compactions/pending/");
|
||||
public static final String PENDING_COMPACTION_OPS = String.format("%s/%s", BASE_URL, "compactions/pending/");
|
||||
|
||||
public static final String LATEST_PARTITION_DATA_FILES_URL = String.format("%s/%s", BASE_URL,
|
||||
"datafiles/latest/partition");
|
||||
public static final String LATEST_PARTITION_DATA_FILE_URL = String.format("%s/%s", BASE_URL,
|
||||
"datafile/latest/partition");
|
||||
public static final String LATEST_PARTITION_DATA_FILES_URL =
|
||||
String.format("%s/%s", BASE_URL, "datafiles/latest/partition");
|
||||
public static final String LATEST_PARTITION_DATA_FILE_URL =
|
||||
String.format("%s/%s", BASE_URL, "datafile/latest/partition");
|
||||
public static final String ALL_DATA_FILES = String.format("%s/%s", BASE_URL, "datafiles/all");
|
||||
public static final String LATEST_ALL_DATA_FILES = String.format("%s/%s", BASE_URL, "datafiles/all/latest/");
|
||||
public static final String LATEST_DATA_FILE_ON_INSTANT_URL =
|
||||
String.format("%s/%s", BASE_URL, "datafile/on/latest/");
|
||||
public static final String LATEST_DATA_FILE_ON_INSTANT_URL = String.format("%s/%s", BASE_URL, "datafile/on/latest/");
|
||||
|
||||
public static final String LATEST_DATA_FILES_RANGE_INSTANT_URL =
|
||||
String.format("%s/%s", BASE_URL, "datafiles/range/latest/");
|
||||
@@ -123,8 +118,7 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
|
||||
private boolean closed = false;
|
||||
|
||||
private enum RequestMethod {
|
||||
GET,
|
||||
POST
|
||||
GET, POST
|
||||
}
|
||||
|
||||
public RemoteHoodieTableFileSystemView(String server, int port, HoodieTableMetaClient metaClient) {
|
||||
@@ -140,8 +134,8 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
|
||||
RequestMethod method) throws IOException {
|
||||
Preconditions.checkArgument(!closed, "View already closed");
|
||||
|
||||
URIBuilder builder = new URIBuilder().setHost(serverHost).setPort(serverPort).setPath(requestPath)
|
||||
.setScheme("http");
|
||||
URIBuilder builder =
|
||||
new URIBuilder().setHost(serverHost).setPort(serverPort).setPath(requestPath).setScheme("http");
|
||||
|
||||
queryParameters.entrySet().stream().forEach(entry -> {
|
||||
builder.addParameter(entry.getKey(), entry.getValue());
|
||||
@@ -213,8 +207,7 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
|
||||
Map<String, String> paramsMap = getParamsWithPartitionPath(partitionPath);
|
||||
try {
|
||||
List<DataFileDTO> dataFiles = executeRequest(LATEST_PARTITION_DATA_FILES_URL, paramsMap,
|
||||
new TypeReference<List<DataFileDTO>>() {
|
||||
}, RequestMethod.GET);
|
||||
new TypeReference<List<DataFileDTO>>() {}, RequestMethod.GET);
|
||||
return dataFiles.stream().map(DataFileDTO::toHoodieDataFile);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieRemoteException(e);
|
||||
@@ -226,8 +219,7 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
|
||||
Map<String, String> paramsMap = getParams();
|
||||
try {
|
||||
List<DataFileDTO> dataFiles = executeRequest(LATEST_ALL_DATA_FILES, paramsMap,
|
||||
new TypeReference<List<DataFileDTO>>() {
|
||||
}, RequestMethod.GET);
|
||||
new TypeReference<List<DataFileDTO>>() {}, RequestMethod.GET);
|
||||
return dataFiles.stream().map(DataFileDTO::toHoodieDataFile);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieRemoteException(e);
|
||||
@@ -239,8 +231,7 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
|
||||
Map<String, String> paramsMap = getParamsWithAdditionalParam(partitionPath, MAX_INSTANT_PARAM, maxCommitTime);
|
||||
try {
|
||||
List<DataFileDTO> dataFiles = executeRequest(LATEST_DATA_FILES_BEFORE_ON_INSTANT_URL, paramsMap,
|
||||
new TypeReference<List<DataFileDTO>>() {
|
||||
}, RequestMethod.GET);
|
||||
new TypeReference<List<DataFileDTO>>() {}, RequestMethod.GET);
|
||||
return dataFiles.stream().map(DataFileDTO::toHoodieDataFile);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieRemoteException(e);
|
||||
@@ -250,12 +241,10 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
|
||||
@Override
|
||||
public Option<HoodieDataFile> getDataFileOn(String partitionPath, String instantTime, String fileId) {
|
||||
Map<String, String> paramsMap = getParamsWithAdditionalParams(partitionPath,
|
||||
new String[]{INSTANT_PARAM, FILEID_PARAM},
|
||||
new String[]{instantTime, fileId});
|
||||
new String[] {INSTANT_PARAM, FILEID_PARAM}, new String[] {instantTime, fileId});
|
||||
try {
|
||||
List<DataFileDTO> dataFiles = executeRequest(LATEST_DATA_FILE_ON_INSTANT_URL, paramsMap,
|
||||
new TypeReference<List<DataFileDTO>>() {
|
||||
}, RequestMethod.GET);
|
||||
new TypeReference<List<DataFileDTO>>() {}, RequestMethod.GET);
|
||||
return Option.fromJavaOptional(dataFiles.stream().map(DataFileDTO::toHoodieDataFile).findFirst());
|
||||
} catch (IOException e) {
|
||||
throw new HoodieRemoteException(e);
|
||||
@@ -264,12 +253,11 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
|
||||
|
||||
@Override
|
||||
public Stream<HoodieDataFile> getLatestDataFilesInRange(List<String> commitsToReturn) {
|
||||
Map<String, String> paramsMap = getParams(INSTANTS_PARAM,
|
||||
StringUtils.join(commitsToReturn.toArray(new String[0]), ","));
|
||||
Map<String, String> paramsMap =
|
||||
getParams(INSTANTS_PARAM, StringUtils.join(commitsToReturn.toArray(new String[0]), ","));
|
||||
try {
|
||||
List<DataFileDTO> dataFiles = executeRequest(LATEST_DATA_FILES_RANGE_INSTANT_URL, paramsMap,
|
||||
new TypeReference<List<DataFileDTO>>() {
|
||||
}, RequestMethod.GET);
|
||||
new TypeReference<List<DataFileDTO>>() {}, RequestMethod.GET);
|
||||
return dataFiles.stream().map(DataFileDTO::toHoodieDataFile);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieRemoteException(e);
|
||||
@@ -280,9 +268,8 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
|
||||
public Stream<HoodieDataFile> getAllDataFiles(String partitionPath) {
|
||||
Map<String, String> paramsMap = getParamsWithPartitionPath(partitionPath);
|
||||
try {
|
||||
List<DataFileDTO> dataFiles = executeRequest(ALL_DATA_FILES, paramsMap,
|
||||
new TypeReference<List<DataFileDTO>>() {
|
||||
}, RequestMethod.GET);
|
||||
List<DataFileDTO> dataFiles =
|
||||
executeRequest(ALL_DATA_FILES, paramsMap, new TypeReference<List<DataFileDTO>>() {}, RequestMethod.GET);
|
||||
return dataFiles.stream().map(DataFileDTO::toHoodieDataFile);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieRemoteException(e);
|
||||
@@ -294,8 +281,7 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
|
||||
Map<String, String> paramsMap = getParamsWithPartitionPath(partitionPath);
|
||||
try {
|
||||
List<FileSliceDTO> dataFiles = executeRequest(LATEST_PARTITION_SLICES_URL, paramsMap,
|
||||
new TypeReference<List<FileSliceDTO>>() {
|
||||
}, RequestMethod.GET);
|
||||
new TypeReference<List<FileSliceDTO>>() {}, RequestMethod.GET);
|
||||
return dataFiles.stream().map(FileSliceDTO::toFileSlice);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieRemoteException(e);
|
||||
@@ -307,8 +293,7 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
|
||||
Map<String, String> paramsMap = getParamsWithAdditionalParam(partitionPath, FILEID_PARAM, fileId);
|
||||
try {
|
||||
List<FileSliceDTO> dataFiles = executeRequest(LATEST_PARTITION_SLICE_URL, paramsMap,
|
||||
new TypeReference<List<FileSliceDTO>>() {
|
||||
}, RequestMethod.GET);
|
||||
new TypeReference<List<FileSliceDTO>>() {}, RequestMethod.GET);
|
||||
return Option.fromJavaOptional(dataFiles.stream().map(FileSliceDTO::toFileSlice).findFirst());
|
||||
} catch (IOException e) {
|
||||
throw new HoodieRemoteException(e);
|
||||
@@ -320,8 +305,7 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
|
||||
Map<String, String> paramsMap = getParamsWithPartitionPath(partitionPath);
|
||||
try {
|
||||
List<FileSliceDTO> dataFiles = executeRequest(LATEST_PARTITION_UNCOMPACTED_SLICES_URL, paramsMap,
|
||||
new TypeReference<List<FileSliceDTO>>() {
|
||||
}, RequestMethod.GET);
|
||||
new TypeReference<List<FileSliceDTO>>() {}, RequestMethod.GET);
|
||||
return dataFiles.stream().map(FileSliceDTO::toFileSlice);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieRemoteException(e);
|
||||
@@ -332,12 +316,11 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
|
||||
public Stream<FileSlice> getLatestFileSlicesBeforeOrOn(String partitionPath, String maxCommitTime,
|
||||
boolean includeFileSlicesInPendingCompaction) {
|
||||
Map<String, String> paramsMap = getParamsWithAdditionalParams(partitionPath,
|
||||
new String[]{MAX_INSTANT_PARAM, INCLUDE_FILES_IN_PENDING_COMPACTION_PARAM},
|
||||
new String[]{maxCommitTime, String.valueOf(includeFileSlicesInPendingCompaction)});
|
||||
new String[] {MAX_INSTANT_PARAM, INCLUDE_FILES_IN_PENDING_COMPACTION_PARAM},
|
||||
new String[] {maxCommitTime, String.valueOf(includeFileSlicesInPendingCompaction)});
|
||||
try {
|
||||
List<FileSliceDTO> dataFiles = executeRequest(LATEST_SLICES_BEFORE_ON_INSTANT_URL, paramsMap,
|
||||
new TypeReference<List<FileSliceDTO>>() {
|
||||
}, RequestMethod.GET);
|
||||
new TypeReference<List<FileSliceDTO>>() {}, RequestMethod.GET);
|
||||
return dataFiles.stream().map(FileSliceDTO::toFileSlice);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieRemoteException(e);
|
||||
@@ -349,8 +332,7 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
|
||||
Map<String, String> paramsMap = getParamsWithAdditionalParam(partitionPath, MAX_INSTANT_PARAM, maxInstantTime);
|
||||
try {
|
||||
List<FileSliceDTO> dataFiles = executeRequest(LATEST_SLICES_MERGED_BEFORE_ON_INSTANT_URL, paramsMap,
|
||||
new TypeReference<List<FileSliceDTO>>() {
|
||||
}, RequestMethod.GET);
|
||||
new TypeReference<List<FileSliceDTO>>() {}, RequestMethod.GET);
|
||||
return dataFiles.stream().map(FileSliceDTO::toFileSlice);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieRemoteException(e);
|
||||
@@ -359,12 +341,11 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
|
||||
|
||||
@Override
|
||||
public Stream<FileSlice> getLatestFileSliceInRange(List<String> commitsToReturn) {
|
||||
Map<String, String> paramsMap = getParams(INSTANTS_PARAM,
|
||||
StringUtils.join(commitsToReturn.toArray(new String[0]), ","));
|
||||
Map<String, String> paramsMap =
|
||||
getParams(INSTANTS_PARAM, StringUtils.join(commitsToReturn.toArray(new String[0]), ","));
|
||||
try {
|
||||
List<FileSliceDTO> dataFiles = executeRequest(LATEST_SLICES_RANGE_INSTANT_URL, paramsMap,
|
||||
new TypeReference<List<FileSliceDTO>>() {
|
||||
}, RequestMethod.GET);
|
||||
new TypeReference<List<FileSliceDTO>>() {}, RequestMethod.GET);
|
||||
return dataFiles.stream().map(FileSliceDTO::toFileSlice);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieRemoteException(e);
|
||||
@@ -375,9 +356,8 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
|
||||
public Stream<FileSlice> getAllFileSlices(String partitionPath) {
|
||||
Map<String, String> paramsMap = getParamsWithPartitionPath(partitionPath);
|
||||
try {
|
||||
List<FileSliceDTO> dataFiles = executeRequest(ALL_SLICES_URL, paramsMap,
|
||||
new TypeReference<List<FileSliceDTO>>() {
|
||||
}, RequestMethod.GET);
|
||||
List<FileSliceDTO> dataFiles =
|
||||
executeRequest(ALL_SLICES_URL, paramsMap, new TypeReference<List<FileSliceDTO>>() {}, RequestMethod.GET);
|
||||
return dataFiles.stream().map(FileSliceDTO::toFileSlice);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieRemoteException(e);
|
||||
@@ -389,8 +369,7 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
|
||||
Map<String, String> paramsMap = getParamsWithPartitionPath(partitionPath);
|
||||
try {
|
||||
List<FileGroupDTO> fileGroups = executeRequest(ALL_FILEGROUPS_FOR_PARTITION_URL, paramsMap,
|
||||
new TypeReference<List<FileGroupDTO>>() {
|
||||
}, RequestMethod.GET);
|
||||
new TypeReference<List<FileGroupDTO>>() {}, RequestMethod.GET);
|
||||
return fileGroups.stream().map(dto -> FileGroupDTO.toFileGroup(dto, metaClient));
|
||||
} catch (IOException e) {
|
||||
throw new HoodieRemoteException(e);
|
||||
@@ -400,8 +379,7 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
|
||||
public boolean refresh() {
|
||||
Map<String, String> paramsMap = getParams();
|
||||
try {
|
||||
return executeRequest(REFRESH_DATASET, paramsMap, new TypeReference<Boolean>() {
|
||||
}, RequestMethod.POST);
|
||||
return executeRequest(REFRESH_DATASET, paramsMap, new TypeReference<Boolean>() {}, RequestMethod.POST);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieRemoteException(e);
|
||||
}
|
||||
@@ -412,8 +390,7 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
|
||||
Map<String, String> paramsMap = getParams();
|
||||
try {
|
||||
List<CompactionOpDTO> dtos = executeRequest(PENDING_COMPACTION_OPS, paramsMap,
|
||||
new TypeReference<List<CompactionOpDTO>>() {
|
||||
}, RequestMethod.GET);
|
||||
new TypeReference<List<CompactionOpDTO>>() {}, RequestMethod.GET);
|
||||
return dtos.stream().map(CompactionOpDTO::toCompactionOperation);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieRemoteException(e);
|
||||
@@ -434,9 +411,8 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
|
||||
public Option<HoodieInstant> getLastInstant() {
|
||||
Map<String, String> paramsMap = getParams();
|
||||
try {
|
||||
List<InstantDTO> instants = executeRequest(LAST_INSTANT, paramsMap,
|
||||
new TypeReference<List<InstantDTO>>() {
|
||||
}, RequestMethod.GET);
|
||||
List<InstantDTO> instants =
|
||||
executeRequest(LAST_INSTANT, paramsMap, new TypeReference<List<InstantDTO>>() {}, RequestMethod.GET);
|
||||
return Option.fromJavaOptional(instants.stream().map(InstantDTO::toInstant).findFirst());
|
||||
} catch (IOException e) {
|
||||
throw new HoodieRemoteException(e);
|
||||
@@ -447,9 +423,8 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
|
||||
public HoodieTimeline getTimeline() {
|
||||
Map<String, String> paramsMap = getParams();
|
||||
try {
|
||||
TimelineDTO timeline = executeRequest(TIMELINE, paramsMap,
|
||||
new TypeReference<TimelineDTO>() {
|
||||
}, RequestMethod.GET);
|
||||
TimelineDTO timeline =
|
||||
executeRequest(TIMELINE, paramsMap, new TypeReference<TimelineDTO>() {}, RequestMethod.GET);
|
||||
return TimelineDTO.toTimeline(timeline, metaClient);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieRemoteException(e);
|
||||
@@ -458,7 +433,7 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
|
||||
|
||||
@Override
|
||||
public void sync() {
|
||||
//noop
|
||||
// noop
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -466,8 +441,7 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
|
||||
Map<String, String> paramsMap = getParamsWithAdditionalParam(partitionPath, FILEID_PARAM, fileId);
|
||||
try {
|
||||
List<DataFileDTO> dataFiles = executeRequest(LATEST_PARTITION_DATA_FILE_URL, paramsMap,
|
||||
new TypeReference<List<DataFileDTO>>() {
|
||||
}, RequestMethod.GET);
|
||||
new TypeReference<List<DataFileDTO>>() {}, RequestMethod.GET);
|
||||
return Option.fromJavaOptional(dataFiles.stream().map(DataFileDTO::toHoodieDataFile).findFirst());
|
||||
} catch (IOException e) {
|
||||
throw new HoodieRemoteException(e);
|
||||
|
||||
@@ -44,18 +44,15 @@ import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
/**
|
||||
* A file-system view implementation on top of embedded Rocks DB store.
|
||||
* For each DataSet : 3 column Family is added for storing
|
||||
* (1) File-Slices and Data Files for View lookups
|
||||
* (2) Pending compaction operations
|
||||
* (3) Partitions tracked
|
||||
* A file-system view implementation on top of embedded Rocks DB store. For each DataSet : 3 column Family is added for
|
||||
* storing (1) File-Slices and Data Files for View lookups (2) Pending compaction operations (3) Partitions tracked
|
||||
*
|
||||
* Fine-grained retrieval API to fetch latest file-slice and data-file which are common operations
|
||||
* for ingestion/compaction are supported.
|
||||
* Fine-grained retrieval API to fetch latest file-slice and data-file which are common operations for
|
||||
* ingestion/compaction are supported.
|
||||
*
|
||||
* TODO: vb The current implementation works in embedded server mode where each restarts blows away the view stores.
|
||||
* To support view-state preservation across restarts, Hoodie timeline also needs to be stored
|
||||
* inorder to detect changes to timeline across restarts.
|
||||
* TODO: vb The current implementation works in embedded server mode where each restarts blows away the view stores. To
|
||||
* support view-state preservation across restarts, Hoodie timeline also needs to be stored inorder to detect changes to
|
||||
* timeline across restarts.
|
||||
*/
|
||||
public class RocksDbBasedFileSystemView extends IncrementalTimelineSyncFileSystemView {
|
||||
|
||||
@@ -69,8 +66,8 @@ public class RocksDbBasedFileSystemView extends IncrementalTimelineSyncFileSyste
|
||||
|
||||
private boolean closed = false;
|
||||
|
||||
public RocksDbBasedFileSystemView(HoodieTableMetaClient metaClient,
|
||||
HoodieTimeline visibleActiveTimeline, FileSystemViewStorageConfig config) {
|
||||
public RocksDbBasedFileSystemView(HoodieTableMetaClient metaClient, HoodieTimeline visibleActiveTimeline,
|
||||
FileSystemViewStorageConfig config) {
|
||||
super(config.isIncrementalTimelineSyncEnabled());
|
||||
this.config = config;
|
||||
this.schemaHelper = new RocksDBSchemaHelper(metaClient);
|
||||
@@ -78,8 +75,8 @@ public class RocksDbBasedFileSystemView extends IncrementalTimelineSyncFileSyste
|
||||
init(metaClient, visibleActiveTimeline);
|
||||
}
|
||||
|
||||
public RocksDbBasedFileSystemView(HoodieTableMetaClient metaClient,
|
||||
HoodieTimeline visibleActiveTimeline, FileStatus[] fileStatuses, FileSystemViewStorageConfig config) {
|
||||
public RocksDbBasedFileSystemView(HoodieTableMetaClient metaClient, HoodieTimeline visibleActiveTimeline,
|
||||
FileStatus[] fileStatuses, FileSystemViewStorageConfig config) {
|
||||
this(metaClient, visibleActiveTimeline, config);
|
||||
addFilesToView(fileStatuses);
|
||||
}
|
||||
@@ -212,9 +209,9 @@ public class RocksDbBasedFileSystemView extends IncrementalTimelineSyncFileSyste
|
||||
Map<String, HoodieLogFile> logFiles = oldSlice.getLogFiles()
|
||||
.map(lf -> Pair.of(Path.getPathWithoutSchemeAndAuthority(lf.getPath()).toString(), lf))
|
||||
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
|
||||
Map<String, HoodieLogFile> deltaLogFiles = fs.getLogFiles()
|
||||
.map(lf -> Pair.of(Path.getPathWithoutSchemeAndAuthority(lf.getPath()).toString(), lf))
|
||||
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
|
||||
Map<String, HoodieLogFile> deltaLogFiles =
|
||||
fs.getLogFiles().map(lf -> Pair.of(Path.getPathWithoutSchemeAndAuthority(lf.getPath()).toString(), lf))
|
||||
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
|
||||
|
||||
switch (mode) {
|
||||
case ADD: {
|
||||
@@ -237,7 +234,7 @@ public class RocksDbBasedFileSystemView extends IncrementalTimelineSyncFileSyste
|
||||
});
|
||||
|
||||
deltaLogFiles.keySet().stream().forEach(p -> logFiles.remove(p));
|
||||
//Add remaining log files back
|
||||
// Add remaining log files back
|
||||
logFiles.values().stream().forEach(lf -> newFileSlice.addLogFile(lf));
|
||||
if (newFileSlice.getDataFile().isPresent() || (newFileSlice.getLogFiles().count() > 0)) {
|
||||
log.info("Adding back new file-slice after remove FS=" + newFileSlice);
|
||||
@@ -262,15 +259,14 @@ public class RocksDbBasedFileSystemView extends IncrementalTimelineSyncFileSyste
|
||||
|
||||
@Override
|
||||
Stream<Pair<String, CompactionOperation>> fetchPendingCompactionOperations() {
|
||||
return rocksDB.<Pair<String, CompactionOperation>>prefixSearch(
|
||||
schemaHelper.getColFamilyForPendingCompaction(), "").map(Pair::getValue);
|
||||
return rocksDB.<Pair<String, CompactionOperation>>prefixSearch(schemaHelper.getColFamilyForPendingCompaction(), "")
|
||||
.map(Pair::getValue);
|
||||
}
|
||||
|
||||
@Override
|
||||
Stream<HoodieDataFile> fetchAllDataFiles(String partitionPath) {
|
||||
return rocksDB.<HoodieDataFile>prefixSearch(schemaHelper.getColFamilyForView(),
|
||||
schemaHelper.getPrefixForDataFileViewByPartition(partitionPath))
|
||||
.map(Pair::getValue);
|
||||
schemaHelper.getPrefixForDataFileViewByPartition(partitionPath)).map(Pair::getValue);
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -281,46 +277,50 @@ public class RocksDbBasedFileSystemView extends IncrementalTimelineSyncFileSyste
|
||||
|
||||
@Override
|
||||
Stream<HoodieFileGroup> fetchAllStoredFileGroups() {
|
||||
return getFileGroups(rocksDB.<FileSlice>prefixSearch(schemaHelper.getColFamilyForView(),
|
||||
schemaHelper.getPrefixForSliceView()).map(Pair::getValue));
|
||||
return getFileGroups(
|
||||
rocksDB.<FileSlice>prefixSearch(schemaHelper.getColFamilyForView(), schemaHelper.getPrefixForSliceView())
|
||||
.map(Pair::getValue));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Option<FileSlice> fetchLatestFileSlice(String partitionPath, String fileId) {
|
||||
// Retries only file-slices of the file and filters for the latest
|
||||
return Option.ofNullable(rocksDB.<FileSlice>prefixSearch(schemaHelper.getColFamilyForView(),
|
||||
schemaHelper.getPrefixForSliceViewByPartitionFile(partitionPath, fileId))
|
||||
.map(Pair::getValue)
|
||||
.reduce(null, (x, y) ->
|
||||
((x == null) ? y : (y == null) ? null : HoodieTimeline.compareTimestamps(x.getBaseInstantTime(),
|
||||
y.getBaseInstantTime(), HoodieTimeline.GREATER) ? x : y)));
|
||||
return Option.ofNullable(rocksDB
|
||||
.<FileSlice>prefixSearch(schemaHelper.getColFamilyForView(),
|
||||
schemaHelper.getPrefixForSliceViewByPartitionFile(partitionPath, fileId))
|
||||
.map(Pair::getValue).reduce(null,
|
||||
(x, y) -> ((x == null) ? y
|
||||
: (y == null) ? null
|
||||
: HoodieTimeline.compareTimestamps(x.getBaseInstantTime(), y.getBaseInstantTime(),
|
||||
HoodieTimeline.GREATER) ? x : y)));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Option<HoodieDataFile> fetchLatestDataFile(String partitionPath, String fileId) {
|
||||
// Retries only file-slices of the file and filters for the latest
|
||||
return Option.ofNullable(rocksDB.<HoodieDataFile>prefixSearch(schemaHelper.getColFamilyForView(),
|
||||
schemaHelper.getPrefixForDataFileViewByPartitionFile(partitionPath, fileId))
|
||||
.map(Pair::getValue)
|
||||
.reduce(null, (x, y) ->
|
||||
((x == null) ? y : (y == null) ? null : HoodieTimeline.compareTimestamps(x.getCommitTime(),
|
||||
y.getCommitTime(), HoodieTimeline.GREATER) ? x : y)));
|
||||
return Option
|
||||
.ofNullable(rocksDB
|
||||
.<HoodieDataFile>prefixSearch(schemaHelper.getColFamilyForView(),
|
||||
schemaHelper.getPrefixForDataFileViewByPartitionFile(partitionPath, fileId))
|
||||
.map(Pair::getValue).reduce(null,
|
||||
(x, y) -> ((x == null) ? y
|
||||
: (y == null) ? null
|
||||
: HoodieTimeline.compareTimestamps(x.getCommitTime(), y.getCommitTime(), HoodieTimeline.GREATER)
|
||||
? x
|
||||
: y)));
|
||||
}
|
||||
|
||||
@Override
|
||||
Option<HoodieFileGroup> fetchHoodieFileGroup(String partitionPath, String fileId) {
|
||||
return Option.fromJavaOptional(
|
||||
getFileGroups(rocksDB.<FileSlice>prefixSearch(schemaHelper.getColFamilyForView(),
|
||||
schemaHelper.getPrefixForSliceViewByPartitionFile(partitionPath, fileId))
|
||||
.map(Pair::getValue)).findFirst());
|
||||
Option<HoodieFileGroup> fetchHoodieFileGroup(String partitionPath, String fileId) {
|
||||
return Option.fromJavaOptional(getFileGroups(rocksDB.<FileSlice>prefixSearch(schemaHelper.getColFamilyForView(),
|
||||
schemaHelper.getPrefixForSliceViewByPartitionFile(partitionPath, fileId)).map(Pair::getValue)).findFirst());
|
||||
}
|
||||
|
||||
private Stream<HoodieFileGroup> getFileGroups(Stream<FileSlice> sliceStream) {
|
||||
return sliceStream.map(s -> Pair.of(Pair.of(s.getPartitionPath(), s.getFileId()), s))
|
||||
.collect(Collectors.groupingBy(Pair::getKey)).entrySet().stream().map(slicePair -> {
|
||||
HoodieFileGroup fg =
|
||||
new HoodieFileGroup(slicePair.getKey().getKey(), slicePair.getKey().getValue(),
|
||||
getVisibleCommitsAndCompactionTimeline());
|
||||
HoodieFileGroup fg = new HoodieFileGroup(slicePair.getKey().getKey(), slicePair.getKey().getValue(),
|
||||
getVisibleCommitsAndCompactionTimeline());
|
||||
slicePair.getValue().forEach(e -> fg.addFileSlice(e.getValue()));
|
||||
return fg;
|
||||
});
|
||||
|
||||
@@ -46,8 +46,8 @@ public class SpillableMapBasedFileSystemView extends HoodieTableFileSystemView {
|
||||
private final long maxMemoryForPendingCompaction;
|
||||
private final String baseStoreDir;
|
||||
|
||||
public SpillableMapBasedFileSystemView(HoodieTableMetaClient metaClient,
|
||||
HoodieTimeline visibleActiveTimeline, FileSystemViewStorageConfig config) {
|
||||
public SpillableMapBasedFileSystemView(HoodieTableMetaClient metaClient, HoodieTimeline visibleActiveTimeline,
|
||||
FileSystemViewStorageConfig config) {
|
||||
super(config.isIncrementalTimelineSyncEnabled());
|
||||
this.maxMemoryForFileGroupMap = config.getMaxMemoryForFileGroupMap();
|
||||
this.maxMemoryForPendingCompaction = config.getMaxMemoryForPendingCompaction();
|
||||
@@ -55,8 +55,8 @@ public class SpillableMapBasedFileSystemView extends HoodieTableFileSystemView {
|
||||
init(metaClient, visibleActiveTimeline);
|
||||
}
|
||||
|
||||
public SpillableMapBasedFileSystemView(HoodieTableMetaClient metaClient,
|
||||
HoodieTimeline visibleActiveTimeline, FileStatus[] fileStatuses, FileSystemViewStorageConfig config) {
|
||||
public SpillableMapBasedFileSystemView(HoodieTableMetaClient metaClient, HoodieTimeline visibleActiveTimeline,
|
||||
FileStatus[] fileStatuses, FileSystemViewStorageConfig config) {
|
||||
this(metaClient, visibleActiveTimeline, config);
|
||||
addFilesToView(fileStatuses);
|
||||
}
|
||||
@@ -64,12 +64,11 @@ public class SpillableMapBasedFileSystemView extends HoodieTableFileSystemView {
|
||||
@Override
|
||||
protected Map<String, List<HoodieFileGroup>> createPartitionToFileGroups() {
|
||||
try {
|
||||
log.info("Creating Partition To File groups map using external spillable Map. Max Mem="
|
||||
+ maxMemoryForFileGroupMap + ", BaseDir=" + baseStoreDir);
|
||||
log.info("Creating Partition To File groups map using external spillable Map. Max Mem=" + maxMemoryForFileGroupMap
|
||||
+ ", BaseDir=" + baseStoreDir);
|
||||
new File(baseStoreDir).mkdirs();
|
||||
return (Map<String, List<HoodieFileGroup>>)
|
||||
(new ExternalSpillableMap<>(maxMemoryForFileGroupMap, baseStoreDir, new DefaultSizeEstimator(),
|
||||
new DefaultSizeEstimator<>()));
|
||||
return (Map<String, List<HoodieFileGroup>>) (new ExternalSpillableMap<>(maxMemoryForFileGroupMap, baseStoreDir,
|
||||
new DefaultSizeEstimator(), new DefaultSizeEstimator<>()));
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
@@ -78,12 +77,11 @@ public class SpillableMapBasedFileSystemView extends HoodieTableFileSystemView {
|
||||
protected Map<HoodieFileGroupId, Pair<String, CompactionOperation>> createFileIdToPendingCompactionMap(
|
||||
Map<HoodieFileGroupId, Pair<String, CompactionOperation>> fgIdToPendingCompaction) {
|
||||
try {
|
||||
log.info("Creating Pending Compaction map using external spillable Map. Max Mem="
|
||||
+ maxMemoryForPendingCompaction + ", BaseDir=" + baseStoreDir);
|
||||
log.info("Creating Pending Compaction map using external spillable Map. Max Mem=" + maxMemoryForPendingCompaction
|
||||
+ ", BaseDir=" + baseStoreDir);
|
||||
new File(baseStoreDir).mkdirs();
|
||||
Map<HoodieFileGroupId, Pair<String, CompactionOperation>> pendingMap =
|
||||
new ExternalSpillableMap<>(maxMemoryForPendingCompaction, baseStoreDir, new DefaultSizeEstimator(),
|
||||
new DefaultSizeEstimator<>());
|
||||
Map<HoodieFileGroupId, Pair<String, CompactionOperation>> pendingMap = new ExternalSpillableMap<>(
|
||||
maxMemoryForPendingCompaction, baseStoreDir, new DefaultSizeEstimator(), new DefaultSizeEstimator<>());
|
||||
pendingMap.putAll(fgIdToPendingCompaction);
|
||||
return pendingMap;
|
||||
} catch (IOException e) {
|
||||
@@ -92,20 +90,20 @@ public class SpillableMapBasedFileSystemView extends HoodieTableFileSystemView {
|
||||
}
|
||||
|
||||
public Stream<HoodieFileGroup> getAllFileGroups() {
|
||||
return ((ExternalSpillableMap)partitionToFileGroupsMap).valueStream()
|
||||
.flatMap(fg -> ((List<HoodieFileGroup>)fg).stream());
|
||||
return ((ExternalSpillableMap) partitionToFileGroupsMap).valueStream()
|
||||
.flatMap(fg -> ((List<HoodieFileGroup>) fg).stream());
|
||||
}
|
||||
|
||||
@Override
|
||||
Stream<Pair<String, CompactionOperation>> fetchPendingCompactionOperations() {
|
||||
return ((ExternalSpillableMap)fgIdToPendingCompaction).valueStream();
|
||||
return ((ExternalSpillableMap) fgIdToPendingCompaction).valueStream();
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<HoodieFileGroup> fetchAllStoredFileGroups() {
|
||||
return ((ExternalSpillableMap)partitionToFileGroupsMap).valueStream().flatMap(fg -> {
|
||||
return ((List<HoodieFileGroup>)fg).stream();
|
||||
return ((ExternalSpillableMap) partitionToFileGroupsMap).valueStream().flatMap(fg -> {
|
||||
return ((List<HoodieFileGroup>) fg).stream();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -47,17 +47,15 @@ import org.apache.hudi.common.HoodieRollbackStat;
|
||||
|
||||
public class AvroUtils {
|
||||
|
||||
public static HoodieCleanMetadata convertCleanMetadata(String startCleanTime,
|
||||
Option<Long> durationInMs, List<HoodieCleanStat> cleanStats) {
|
||||
ImmutableMap.Builder<String, HoodieCleanPartitionMetadata> partitionMetadataBuilder =
|
||||
ImmutableMap.builder();
|
||||
public static HoodieCleanMetadata convertCleanMetadata(String startCleanTime, Option<Long> durationInMs,
|
||||
List<HoodieCleanStat> cleanStats) {
|
||||
ImmutableMap.Builder<String, HoodieCleanPartitionMetadata> partitionMetadataBuilder = ImmutableMap.builder();
|
||||
int totalDeleted = 0;
|
||||
String earliestCommitToRetain = null;
|
||||
for (HoodieCleanStat stat : cleanStats) {
|
||||
HoodieCleanPartitionMetadata metadata =
|
||||
new HoodieCleanPartitionMetadata(stat.getPartitionPath(), stat.getPolicy().name(),
|
||||
stat.getDeletePathPatterns(), stat.getSuccessDeleteFiles(),
|
||||
stat.getDeletePathPatterns());
|
||||
stat.getDeletePathPatterns(), stat.getSuccessDeleteFiles(), stat.getDeletePathPatterns());
|
||||
partitionMetadataBuilder.put(stat.getPartitionPath(), metadata);
|
||||
totalDeleted += stat.getSuccessDeleteFiles().size();
|
||||
if (earliestCommitToRetain == null) {
|
||||
@@ -65,78 +63,67 @@ public class AvroUtils {
|
||||
earliestCommitToRetain = stat.getEarliestCommitToRetain();
|
||||
}
|
||||
}
|
||||
return new HoodieCleanMetadata(startCleanTime, durationInMs.orElseGet(() -> -1L),
|
||||
totalDeleted, earliestCommitToRetain, partitionMetadataBuilder.build());
|
||||
return new HoodieCleanMetadata(startCleanTime, durationInMs.orElseGet(() -> -1L), totalDeleted,
|
||||
earliestCommitToRetain, partitionMetadataBuilder.build());
|
||||
}
|
||||
|
||||
public static HoodieRestoreMetadata convertRestoreMetadata(String startRestoreTime,
|
||||
Option<Long> durationInMs, List<String> commits, Map<String, List<HoodieRollbackStat>> commitToStats) {
|
||||
public static HoodieRestoreMetadata convertRestoreMetadata(String startRestoreTime, Option<Long> durationInMs,
|
||||
List<String> commits, Map<String, List<HoodieRollbackStat>> commitToStats) {
|
||||
ImmutableMap.Builder<String, List<HoodieRollbackMetadata>> commitToStatBuilder = ImmutableMap.builder();
|
||||
for (Map.Entry<String, List<HoodieRollbackStat>> commitToStat : commitToStats.entrySet()) {
|
||||
commitToStatBuilder.put(commitToStat.getKey(), Arrays.asList(convertRollbackMetadata(startRestoreTime,
|
||||
durationInMs, commits, commitToStat.getValue())));
|
||||
commitToStatBuilder.put(commitToStat.getKey(),
|
||||
Arrays.asList(convertRollbackMetadata(startRestoreTime, durationInMs, commits, commitToStat.getValue())));
|
||||
}
|
||||
return new HoodieRestoreMetadata(startRestoreTime, durationInMs.orElseGet(() -> -1L), commits,
|
||||
commitToStatBuilder.build());
|
||||
}
|
||||
|
||||
public static HoodieRollbackMetadata convertRollbackMetadata(String startRollbackTime,
|
||||
Option<Long> durationInMs, List<String> commits, List<HoodieRollbackStat> rollbackStats) {
|
||||
ImmutableMap.Builder<String, HoodieRollbackPartitionMetadata> partitionMetadataBuilder =
|
||||
ImmutableMap.builder();
|
||||
public static HoodieRollbackMetadata convertRollbackMetadata(String startRollbackTime, Option<Long> durationInMs,
|
||||
List<String> commits, List<HoodieRollbackStat> rollbackStats) {
|
||||
ImmutableMap.Builder<String, HoodieRollbackPartitionMetadata> partitionMetadataBuilder = ImmutableMap.builder();
|
||||
int totalDeleted = 0;
|
||||
for (HoodieRollbackStat stat : rollbackStats) {
|
||||
HoodieRollbackPartitionMetadata metadata =
|
||||
new HoodieRollbackPartitionMetadata(stat.getPartitionPath(),
|
||||
stat.getSuccessDeleteFiles(), stat.getFailedDeleteFiles());
|
||||
partitionMetadataBuilder
|
||||
.put(stat.getPartitionPath(), metadata);
|
||||
HoodieRollbackPartitionMetadata metadata = new HoodieRollbackPartitionMetadata(stat.getPartitionPath(),
|
||||
stat.getSuccessDeleteFiles(), stat.getFailedDeleteFiles());
|
||||
partitionMetadataBuilder.put(stat.getPartitionPath(), metadata);
|
||||
totalDeleted += stat.getSuccessDeleteFiles().size();
|
||||
}
|
||||
return new HoodieRollbackMetadata(startRollbackTime, durationInMs.orElseGet(() -> -1L),
|
||||
totalDeleted, commits, partitionMetadataBuilder.build());
|
||||
return new HoodieRollbackMetadata(startRollbackTime, durationInMs.orElseGet(() -> -1L), totalDeleted, commits,
|
||||
partitionMetadataBuilder.build());
|
||||
}
|
||||
|
||||
public static HoodieSavepointMetadata convertSavepointMetadata(String user, String comment,
|
||||
Map<String, List<String>> latestFiles) {
|
||||
ImmutableMap.Builder<String, HoodieSavepointPartitionMetadata> partitionMetadataBuilder =
|
||||
ImmutableMap.builder();
|
||||
ImmutableMap.Builder<String, HoodieSavepointPartitionMetadata> partitionMetadataBuilder = ImmutableMap.builder();
|
||||
for (Map.Entry<String, List<String>> stat : latestFiles.entrySet()) {
|
||||
HoodieSavepointPartitionMetadata metadata =
|
||||
new HoodieSavepointPartitionMetadata(stat.getKey(), stat.getValue());
|
||||
HoodieSavepointPartitionMetadata metadata = new HoodieSavepointPartitionMetadata(stat.getKey(), stat.getValue());
|
||||
partitionMetadataBuilder.put(stat.getKey(), metadata);
|
||||
}
|
||||
return new HoodieSavepointMetadata(user, System.currentTimeMillis(), comment,
|
||||
partitionMetadataBuilder.build());
|
||||
return new HoodieSavepointMetadata(user, System.currentTimeMillis(), comment, partitionMetadataBuilder.build());
|
||||
}
|
||||
|
||||
public static Option<byte[]> serializeCompactionPlan(HoodieCompactionPlan compactionWorkload)
|
||||
throws IOException {
|
||||
public static Option<byte[]> serializeCompactionPlan(HoodieCompactionPlan compactionWorkload) throws IOException {
|
||||
return serializeAvroMetadata(compactionWorkload, HoodieCompactionPlan.class);
|
||||
}
|
||||
|
||||
public static Option<byte[]> serializeCleanMetadata(HoodieCleanMetadata metadata)
|
||||
throws IOException {
|
||||
public static Option<byte[]> serializeCleanMetadata(HoodieCleanMetadata metadata) throws IOException {
|
||||
return serializeAvroMetadata(metadata, HoodieCleanMetadata.class);
|
||||
}
|
||||
|
||||
public static Option<byte[]> serializeSavepointMetadata(HoodieSavepointMetadata metadata)
|
||||
throws IOException {
|
||||
public static Option<byte[]> serializeSavepointMetadata(HoodieSavepointMetadata metadata) throws IOException {
|
||||
return serializeAvroMetadata(metadata, HoodieSavepointMetadata.class);
|
||||
}
|
||||
|
||||
public static Option<byte[]> serializeRollbackMetadata(
|
||||
HoodieRollbackMetadata rollbackMetadata) throws IOException {
|
||||
public static Option<byte[]> serializeRollbackMetadata(HoodieRollbackMetadata rollbackMetadata) throws IOException {
|
||||
return serializeAvroMetadata(rollbackMetadata, HoodieRollbackMetadata.class);
|
||||
}
|
||||
|
||||
public static Option<byte[]> serializeRestoreMetadata(
|
||||
HoodieRestoreMetadata restoreMetadata) throws IOException {
|
||||
public static Option<byte[]> serializeRestoreMetadata(HoodieRestoreMetadata restoreMetadata) throws IOException {
|
||||
return serializeAvroMetadata(restoreMetadata, HoodieRestoreMetadata.class);
|
||||
}
|
||||
|
||||
public static <T extends SpecificRecordBase> Option<byte[]> serializeAvroMetadata(T metadata,
|
||||
Class<T> clazz) throws IOException {
|
||||
public static <T extends SpecificRecordBase> Option<byte[]> serializeAvroMetadata(T metadata, Class<T> clazz)
|
||||
throws IOException {
|
||||
DatumWriter<T> datumWriter = new SpecificDatumWriter<>(clazz);
|
||||
DataFileWriter<T> fileWriter = new DataFileWriter<>(datumWriter);
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
@@ -146,28 +133,23 @@ public class AvroUtils {
|
||||
return Option.of(baos.toByteArray());
|
||||
}
|
||||
|
||||
public static HoodieCompactionPlan deserializeCompactionPlan(byte[] bytes)
|
||||
throws IOException {
|
||||
public static HoodieCompactionPlan deserializeCompactionPlan(byte[] bytes) throws IOException {
|
||||
return deserializeAvroMetadata(bytes, HoodieCompactionPlan.class);
|
||||
}
|
||||
|
||||
public static HoodieCleanMetadata deserializeHoodieCleanMetadata(byte[] bytes)
|
||||
throws IOException {
|
||||
public static HoodieCleanMetadata deserializeHoodieCleanMetadata(byte[] bytes) throws IOException {
|
||||
return deserializeAvroMetadata(bytes, HoodieCleanMetadata.class);
|
||||
}
|
||||
|
||||
public static HoodieSavepointMetadata deserializeHoodieSavepointMetadata(byte[] bytes)
|
||||
throws IOException {
|
||||
public static HoodieSavepointMetadata deserializeHoodieSavepointMetadata(byte[] bytes) throws IOException {
|
||||
return deserializeAvroMetadata(bytes, HoodieSavepointMetadata.class);
|
||||
}
|
||||
|
||||
public static <T extends SpecificRecordBase> T deserializeAvroMetadata(byte[] bytes,
|
||||
Class<T> clazz) throws IOException {
|
||||
public static <T extends SpecificRecordBase> T deserializeAvroMetadata(byte[] bytes, Class<T> clazz)
|
||||
throws IOException {
|
||||
DatumReader<T> reader = new SpecificDatumReader<>(clazz);
|
||||
FileReader<T> fileReader =
|
||||
DataFileReader.openReader(new SeekableByteArrayInput(bytes), reader);
|
||||
Preconditions
|
||||
.checkArgument(fileReader.hasNext(), "Could not deserialize metadata of type " + clazz);
|
||||
FileReader<T> fileReader = DataFileReader.openReader(new SeekableByteArrayInput(bytes), reader);
|
||||
Preconditions.checkArgument(fileReader.hasNext(), "Could not deserialize metadata of type " + clazz);
|
||||
return fileReader.next();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -48,8 +48,8 @@ public class CompactionUtils {
|
||||
/**
|
||||
* Generate compaction operation from file-slice
|
||||
*
|
||||
* @param partitionPath Partition path
|
||||
* @param fileSlice File Slice
|
||||
* @param partitionPath Partition path
|
||||
* @param fileSlice File Slice
|
||||
* @param metricsCaptureFunction Metrics Capture function
|
||||
* @return Compaction Operation
|
||||
*/
|
||||
@@ -74,17 +74,17 @@ public class CompactionUtils {
|
||||
* Generate compaction plan from file-slices
|
||||
*
|
||||
* @param partitionFileSlicePairs list of partition file-slice pairs
|
||||
* @param extraMetadata Extra Metadata
|
||||
* @param metricsCaptureFunction Metrics Capture function
|
||||
* @param extraMetadata Extra Metadata
|
||||
* @param metricsCaptureFunction Metrics Capture function
|
||||
*/
|
||||
public static HoodieCompactionPlan buildFromFileSlices(
|
||||
List<Pair<String, FileSlice>> partitionFileSlicePairs,
|
||||
public static HoodieCompactionPlan buildFromFileSlices(List<Pair<String, FileSlice>> partitionFileSlicePairs,
|
||||
Option<Map<String, String>> extraMetadata,
|
||||
Option<Function<Pair<String, FileSlice>, Map<String, Double>>> metricsCaptureFunction) {
|
||||
HoodieCompactionPlan.Builder builder = HoodieCompactionPlan.newBuilder();
|
||||
extraMetadata.ifPresent(m -> builder.setExtraMetadata(m));
|
||||
builder.setOperations(partitionFileSlicePairs.stream().map(pfPair ->
|
||||
buildFromFileSlice(pfPair.getKey(), pfPair.getValue(), metricsCaptureFunction)).collect(Collectors.toList()));
|
||||
builder.setOperations(partitionFileSlicePairs.stream()
|
||||
.map(pfPair -> buildFromFileSlice(pfPair.getKey(), pfPair.getValue(), metricsCaptureFunction))
|
||||
.collect(Collectors.toList()));
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
@@ -92,12 +92,10 @@ public class CompactionUtils {
|
||||
* Build Avro generated Compaction operation payload from compaction operation POJO for serialization
|
||||
*/
|
||||
public static HoodieCompactionOperation buildHoodieCompactionOperation(CompactionOperation op) {
|
||||
return HoodieCompactionOperation.newBuilder().setFileId(op.getFileId())
|
||||
.setBaseInstantTime(op.getBaseInstantTime())
|
||||
return HoodieCompactionOperation.newBuilder().setFileId(op.getFileId()).setBaseInstantTime(op.getBaseInstantTime())
|
||||
.setPartitionPath(op.getPartitionPath())
|
||||
.setDataFilePath(op.getDataFilePath().isPresent() ? op.getDataFilePath().get() : null)
|
||||
.setDeltaFilePaths(op.getDeltaFilePaths())
|
||||
.setMetrics(op.getMetrics()).build();
|
||||
.setDeltaFilePaths(op.getDeltaFilePaths()).setMetrics(op.getMetrics()).build();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -127,11 +125,10 @@ public class CompactionUtils {
|
||||
}).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public static HoodieCompactionPlan getCompactionPlan(HoodieTableMetaClient metaClient,
|
||||
String compactionInstant) throws IOException {
|
||||
HoodieCompactionPlan compactionPlan = AvroUtils.deserializeCompactionPlan(
|
||||
metaClient.getActiveTimeline().getInstantAuxiliaryDetails(
|
||||
HoodieTimeline.getCompactionRequestedInstant(compactionInstant)).get());
|
||||
public static HoodieCompactionPlan getCompactionPlan(HoodieTableMetaClient metaClient, String compactionInstant)
|
||||
throws IOException {
|
||||
HoodieCompactionPlan compactionPlan = AvroUtils.deserializeCompactionPlan(metaClient.getActiveTimeline()
|
||||
.getInstantAuxiliaryDetails(HoodieTimeline.getCompactionRequestedInstant(compactionInstant)).get());
|
||||
return compactionPlan;
|
||||
}
|
||||
|
||||
@@ -184,6 +181,7 @@ public class CompactionUtils {
|
||||
|
||||
/**
|
||||
* Return all pending compaction instant times
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public static List<HoodieInstant> getPendingCompactionInstantTimes(HoodieTableMetaClient metaClient) {
|
||||
|
||||
@@ -32,12 +32,12 @@ public interface ConsistencyGuard {
|
||||
* File Visibility
|
||||
*/
|
||||
enum FileVisibility {
|
||||
APPEAR,
|
||||
DISAPPEAR,
|
||||
APPEAR, DISAPPEAR,
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait for file to be listable based on configurable timeout
|
||||
*
|
||||
* @param filePath
|
||||
* @throws IOException when having trouble listing the path
|
||||
* @throws TimeoutException when retries exhausted
|
||||
@@ -46,6 +46,7 @@ public interface ConsistencyGuard {
|
||||
|
||||
/**
|
||||
* Wait for file to be listable based on configurable timeout
|
||||
*
|
||||
* @param filePath
|
||||
* @throws IOException when having trouble listing the path
|
||||
* @throws TimeoutException when retries exhausted
|
||||
@@ -65,8 +66,9 @@ public interface ConsistencyGuard {
|
||||
|
||||
/**
|
||||
* Wait Till target visibility is reached
|
||||
* @param dirPath Directory Path
|
||||
* @param files Files
|
||||
*
|
||||
* @param dirPath Directory Path
|
||||
* @param files Files
|
||||
* @param targetVisibility Target Visibitlity
|
||||
* @throws IOException
|
||||
* @throws TimeoutException
|
||||
|
||||
@@ -106,14 +106,14 @@ public class ConsistencyGuardConfig extends DefaultHoodieConfig {
|
||||
}
|
||||
|
||||
public ConsistencyGuardConfig build() {
|
||||
setDefaultOnCondition(props, !props.containsKey(CONSISTENCY_CHECK_ENABLED_PROP),
|
||||
CONSISTENCY_CHECK_ENABLED_PROP, DEFAULT_CONSISTENCY_CHECK_ENABLED);
|
||||
setDefaultOnCondition(props, !props.containsKey(CONSISTENCY_CHECK_ENABLED_PROP), CONSISTENCY_CHECK_ENABLED_PROP,
|
||||
DEFAULT_CONSISTENCY_CHECK_ENABLED);
|
||||
setDefaultOnCondition(props, !props.containsKey(INITIAL_CONSISTENCY_CHECK_INTERVAL_MS_PROP),
|
||||
INITIAL_CONSISTENCY_CHECK_INTERVAL_MS_PROP, String.valueOf(DEFAULT_INITIAL_CONSISTENCY_CHECK_INTERVAL_MS));
|
||||
setDefaultOnCondition(props, !props.containsKey(MAX_CONSISTENCY_CHECK_INTERVAL_MS_PROP),
|
||||
MAX_CONSISTENCY_CHECK_INTERVAL_MS_PROP, String.valueOf(DEFAULT_MAX_CONSISTENCY_CHECK_INTERVAL_MS));
|
||||
setDefaultOnCondition(props, !props.containsKey(MAX_CONSISTENCY_CHECKS_PROP),
|
||||
MAX_CONSISTENCY_CHECKS_PROP, String.valueOf(DEFAULT_MAX_CONSISTENCY_CHECKS));
|
||||
setDefaultOnCondition(props, !props.containsKey(MAX_CONSISTENCY_CHECKS_PROP), MAX_CONSISTENCY_CHECKS_PROP,
|
||||
String.valueOf(DEFAULT_MAX_CONSISTENCY_CHECKS));
|
||||
|
||||
return new ConsistencyGuardConfig(props);
|
||||
}
|
||||
|
||||
@@ -63,7 +63,7 @@ public class DFSPropertiesConfiguration {
|
||||
int ind = line.indexOf('=');
|
||||
String k = line.substring(0, ind).trim();
|
||||
String v = line.substring(ind + 1).trim();
|
||||
return new String[]{k, v};
|
||||
return new String[] {k, v};
|
||||
}
|
||||
|
||||
private void visitFile(Path file) {
|
||||
@@ -82,6 +82,7 @@ public class DFSPropertiesConfiguration {
|
||||
|
||||
/**
|
||||
* Add properties from input stream
|
||||
*
|
||||
* @param reader Buffered Reader
|
||||
* @throws IOException
|
||||
*/
|
||||
|
||||
@@ -20,12 +20,13 @@ package org.apache.hudi.common.util;
|
||||
|
||||
/**
|
||||
* Default implementation of size-estimator that uses Twitter's ObjectSizeCalculator
|
||||
*
|
||||
* @param <T>
|
||||
*/
|
||||
public class DefaultSizeEstimator<T> implements SizeEstimator<T> {
|
||||
|
||||
@Override
|
||||
public long sizeEstimate(T t) {
|
||||
public long sizeEstimate(T t) {
|
||||
return ObjectSizeCalculator.getObjectSize(t);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -83,10 +83,7 @@ public class FSUtils {
|
||||
for (Entry<String, String> prop : System.getenv().entrySet()) {
|
||||
if (prop.getKey().startsWith(HOODIE_ENV_PROPS_PREFIX)) {
|
||||
LOG.info("Picking up value for hoodie env var :" + prop.getKey());
|
||||
conf.set(prop.getKey()
|
||||
.replace(HOODIE_ENV_PROPS_PREFIX, "")
|
||||
.replaceAll("_DOT_", "."),
|
||||
prop.getValue());
|
||||
conf.set(prop.getKey().replace(HOODIE_ENV_PROPS_PREFIX, "").replaceAll("_DOT_", "."), prop.getValue());
|
||||
}
|
||||
}
|
||||
return conf;
|
||||
@@ -98,12 +95,10 @@ public class FSUtils {
|
||||
try {
|
||||
fs = new Path(path).getFileSystem(conf);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Failed to get instance of " + FileSystem.class.getName(),
|
||||
e);
|
||||
throw new HoodieIOException("Failed to get instance of " + FileSystem.class.getName(), e);
|
||||
}
|
||||
LOG.info(
|
||||
String.format("Hadoop Configuration: fs.defaultFS: [%s], Config:[%s], FileSystem: [%s]",
|
||||
conf.getRaw("fs.defaultFS"), conf.toString(), fs.toString()));
|
||||
LOG.info(String.format("Hadoop Configuration: fs.defaultFS: [%s], Config:[%s], FileSystem: [%s]",
|
||||
conf.getRaw("fs.defaultFS"), conf.toString(), fs.toString()));
|
||||
return fs;
|
||||
}
|
||||
|
||||
@@ -125,11 +120,11 @@ public class FSUtils {
|
||||
|
||||
public static String translateMarkerToDataPath(String basePath, String markerPath, String instantTs) {
|
||||
Preconditions.checkArgument(markerPath.endsWith(HoodieTableMetaClient.MARKER_EXTN));
|
||||
String markerRootPath = Path.getPathWithoutSchemeAndAuthority(new Path(
|
||||
String.format("%s/%s/%s", basePath, HoodieTableMetaClient.TEMPFOLDER_NAME, instantTs))).toString();
|
||||
String markerRootPath = Path.getPathWithoutSchemeAndAuthority(
|
||||
new Path(String.format("%s/%s/%s", basePath, HoodieTableMetaClient.TEMPFOLDER_NAME, instantTs))).toString();
|
||||
int begin = markerPath.indexOf(markerRootPath);
|
||||
Preconditions.checkArgument(begin >= 0, "Not in marker dir. Marker Path=" + markerPath
|
||||
+ ", Expected Marker Root=" + markerRootPath);
|
||||
Preconditions.checkArgument(begin >= 0,
|
||||
"Not in marker dir. Marker Path=" + markerPath + ", Expected Marker Root=" + markerRootPath);
|
||||
String rPath = markerPath.substring(begin + markerRootPath.length() + 1);
|
||||
return String.format("%s/%s%s", basePath, rPath.replace(HoodieTableMetaClient.MARKER_EXTN, ""),
|
||||
HoodieFileFormat.PARQUET.getFileExtension());
|
||||
@@ -159,42 +154,38 @@ public class FSUtils {
|
||||
/**
|
||||
* Gets all partition paths assuming date partitioning (year, month, day) three levels down.
|
||||
*/
|
||||
public static List<String> getAllPartitionFoldersThreeLevelsDown(FileSystem fs, String basePath)
|
||||
throws IOException {
|
||||
public static List<String> getAllPartitionFoldersThreeLevelsDown(FileSystem fs, String basePath) throws IOException {
|
||||
List<String> datePartitions = new ArrayList<>();
|
||||
// Avoid listing and including any folders under the metafolder
|
||||
PathFilter filter = getExcludeMetaPathFilter();
|
||||
FileStatus[] folders = fs.globStatus(new Path(basePath + "/*/*/*"), filter);
|
||||
for (FileStatus status : folders) {
|
||||
Path path = status.getPath();
|
||||
datePartitions.add(String.format("%s/%s/%s", path.getParent().getParent().getName(),
|
||||
path.getParent().getName(), path.getName()));
|
||||
datePartitions.add(String.format("%s/%s/%s", path.getParent().getParent().getName(), path.getParent().getName(),
|
||||
path.getName()));
|
||||
}
|
||||
return datePartitions;
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a base partition and a partition path, return
|
||||
* relative path of partition path to the base path
|
||||
* Given a base partition and a partition path, return relative path of partition path to the base path
|
||||
*/
|
||||
public static String getRelativePartitionPath(Path basePath, Path partitionPath) {
|
||||
basePath = Path.getPathWithoutSchemeAndAuthority(basePath);
|
||||
partitionPath = Path.getPathWithoutSchemeAndAuthority(partitionPath);
|
||||
String partitionFullPath = partitionPath.toString();
|
||||
int partitionStartIndex = partitionFullPath.indexOf(
|
||||
basePath.getName(),
|
||||
int partitionStartIndex = partitionFullPath.indexOf(basePath.getName(),
|
||||
basePath.getParent() == null ? 0 : basePath.getParent().toString().length());
|
||||
// Partition-Path could be empty for non-partitioned tables
|
||||
return partitionStartIndex + basePath.getName().length() == partitionFullPath.length() ? "" :
|
||||
partitionFullPath.substring(partitionStartIndex + basePath.getName().length() + 1);
|
||||
return partitionStartIndex + basePath.getName().length() == partitionFullPath.length() ? ""
|
||||
: partitionFullPath.substring(partitionStartIndex + basePath.getName().length() + 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Obtain all the partition paths, that are present in this table, denoted by presence of {@link
|
||||
* HoodiePartitionMetadata#HOODIE_PARTITION_METAFILE}
|
||||
* Obtain all the partition paths, that are present in this table, denoted by presence of
|
||||
* {@link HoodiePartitionMetadata#HOODIE_PARTITION_METAFILE}
|
||||
*/
|
||||
public static List<String> getAllFoldersWithPartitionMetaFile(FileSystem fs, String basePathStr)
|
||||
throws IOException {
|
||||
public static List<String> getAllFoldersWithPartitionMetaFile(FileSystem fs, String basePathStr) throws IOException {
|
||||
final Path basePath = new Path(basePathStr);
|
||||
final List<String> partitions = new ArrayList<>();
|
||||
processFiles(fs, basePathStr, (locatedFileStatus) -> {
|
||||
@@ -221,17 +212,18 @@ public class FSUtils {
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively processes all files in the base-path. If excludeMetaFolder is set, the meta-folder and all its
|
||||
* subdirs are skipped
|
||||
* @param fs File System
|
||||
* @param basePathStr Base-Path
|
||||
* @param consumer Callback for processing
|
||||
* Recursively processes all files in the base-path. If excludeMetaFolder is set, the meta-folder and all its subdirs
|
||||
* are skipped
|
||||
*
|
||||
* @param fs File System
|
||||
* @param basePathStr Base-Path
|
||||
* @param consumer Callback for processing
|
||||
* @param excludeMetaFolder Exclude .hoodie folder
|
||||
* @throws IOException
|
||||
*/
|
||||
@VisibleForTesting
|
||||
static void processFiles(FileSystem fs, String basePathStr,
|
||||
Function<FileStatus, Boolean> consumer, boolean excludeMetaFolder) throws IOException {
|
||||
static void processFiles(FileSystem fs, String basePathStr, Function<FileStatus, Boolean> consumer,
|
||||
boolean excludeMetaFolder) throws IOException {
|
||||
PathFilter pathFilter = excludeMetaFolder ? getExcludeMetaPathFilter() : ALLOW_ALL_FILTER;
|
||||
FileStatus[] topLevelStatuses = fs.listStatus(new Path(basePathStr));
|
||||
for (int i = 0; i < topLevelStatuses.length; i++) {
|
||||
@@ -254,8 +246,7 @@ public class FSUtils {
|
||||
}
|
||||
}
|
||||
|
||||
public static List<String> getAllPartitionPaths(FileSystem fs, String basePathStr,
|
||||
boolean assumeDatePartitioning)
|
||||
public static List<String> getAllPartitionPaths(FileSystem fs, String basePathStr, boolean assumeDatePartitioning)
|
||||
throws IOException {
|
||||
if (assumeDatePartitioning) {
|
||||
return getAllPartitionFoldersThreeLevelsDown(fs, basePathStr);
|
||||
@@ -304,8 +295,8 @@ public class FSUtils {
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the first part of the file name in the log file. That will be the fileId. Log file do not
|
||||
* have commitTime in the file name.
|
||||
* Get the first part of the file name in the log file. That will be the fileId. Log file do not have commitTime in
|
||||
* the file name.
|
||||
*/
|
||||
public static String getFileIdFromLogPath(Path path) {
|
||||
Matcher matcher = LOG_FILE_PATTERN.matcher(path.getName());
|
||||
@@ -326,8 +317,8 @@ public class FSUtils {
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the first part of the file name in the log file. That will be the fileId. Log file do not
|
||||
* have commitTime in the file name.
|
||||
* Get the first part of the file name in the log file. That will be the fileId. Log file do not have commitTime in
|
||||
* the file name.
|
||||
*/
|
||||
public static String getBaseCommitTimeFromLogPath(Path path) {
|
||||
Matcher matcher = LOG_FILE_PATTERN.matcher(path.getName());
|
||||
@@ -395,10 +386,11 @@ public class FSUtils {
|
||||
return Integer.parseInt(matcher.group(4));
|
||||
}
|
||||
|
||||
public static String makeLogFileName(String fileId, String logFileExtension,
|
||||
String baseCommitTime, int version, String writeToken) {
|
||||
String suffix = (writeToken == null) ? String.format("%s_%s%s.%d",fileId, baseCommitTime, logFileExtension, version)
|
||||
: String.format("%s_%s%s.%d_%s", fileId, baseCommitTime, logFileExtension, version, writeToken);
|
||||
public static String makeLogFileName(String fileId, String logFileExtension, String baseCommitTime, int version,
|
||||
String writeToken) {
|
||||
String suffix =
|
||||
(writeToken == null) ? String.format("%s_%s%s.%d", fileId, baseCommitTime, logFileExtension, version)
|
||||
: String.format("%s_%s%s.%d_%s", fileId, baseCommitTime, logFileExtension, version, writeToken);
|
||||
return LOG_FILE_PREFIX + suffix;
|
||||
}
|
||||
|
||||
@@ -420,12 +412,11 @@ public class FSUtils {
|
||||
/**
|
||||
* Get all the log files for the passed in FileId in the partition path
|
||||
*/
|
||||
public static Stream<HoodieLogFile> getAllLogFiles(FileSystem fs, Path partitionPath,
|
||||
final String fileId, final String logFileExtension, final String baseCommitTime)
|
||||
throws IOException {
|
||||
return Arrays.stream(fs.listStatus(partitionPath,
|
||||
path -> path.getName().startsWith("." + fileId) && path.getName()
|
||||
.contains(logFileExtension)))
|
||||
public static Stream<HoodieLogFile> getAllLogFiles(FileSystem fs, Path partitionPath, final String fileId,
|
||||
final String logFileExtension, final String baseCommitTime) throws IOException {
|
||||
return Arrays
|
||||
.stream(fs.listStatus(partitionPath,
|
||||
path -> path.getName().startsWith("." + fileId) && path.getName().contains(logFileExtension)))
|
||||
.map(HoodieLogFile::new).filter(s -> s.getBaseCommitTime().equals(baseCommitTime));
|
||||
}
|
||||
|
||||
@@ -433,14 +424,12 @@ public class FSUtils {
|
||||
* Get the latest log version for the fileId in the partition path
|
||||
*/
|
||||
public static Option<Pair<Integer, String>> getLatestLogVersion(FileSystem fs, Path partitionPath,
|
||||
final String fileId, final String logFileExtension, final String baseCommitTime)
|
||||
throws IOException {
|
||||
final String fileId, final String logFileExtension, final String baseCommitTime) throws IOException {
|
||||
Option<HoodieLogFile> latestLogFile =
|
||||
getLatestLogFile(
|
||||
getAllLogFiles(fs, partitionPath, fileId, logFileExtension, baseCommitTime));
|
||||
getLatestLogFile(getAllLogFiles(fs, partitionPath, fileId, logFileExtension, baseCommitTime));
|
||||
if (latestLogFile.isPresent()) {
|
||||
return Option.of(Pair.of(latestLogFile.get().getLogVersion(),
|
||||
getWriteTokenFromLogPath(latestLogFile.get().getPath())));
|
||||
return Option
|
||||
.of(Pair.of(latestLogFile.get().getLogVersion(), getWriteTokenFromLogPath(latestLogFile.get().getPath())));
|
||||
}
|
||||
return Option.empty();
|
||||
}
|
||||
@@ -450,7 +439,7 @@ public class FSUtils {
|
||||
*/
|
||||
public static int computeNextLogVersion(FileSystem fs, Path partitionPath, final String fileId,
|
||||
final String logFileExtension, final String baseCommitTime) throws IOException {
|
||||
Option<Pair<Integer, String>> currentVersionWithWriteToken =
|
||||
Option<Pair<Integer, String>> currentVersionWithWriteToken =
|
||||
getLatestLogVersion(fs, partitionPath, fileId, logFileExtension, baseCommitTime);
|
||||
// handle potential overflow
|
||||
return (currentVersionWithWriteToken.isPresent()) ? currentVersionWithWriteToken.get().getKey() + 1
|
||||
@@ -466,10 +455,9 @@ public class FSUtils {
|
||||
}
|
||||
|
||||
/**
|
||||
* When a file was opened and the task died without closing the stream, another task executor
|
||||
* cannot open because the existing lease will be active. We will try to recover the lease, from
|
||||
* HDFS. If a data node went down, it takes about 10 minutes for the lease to be rocovered. But if
|
||||
* the client dies, this should be instant.
|
||||
* When a file was opened and the task died without closing the stream, another task executor cannot open because the
|
||||
* existing lease will be active. We will try to recover the lease, from HDFS. If a data node went down, it takes
|
||||
* about 10 minutes for the lease to be rocovered. But if the client dies, this should be instant.
|
||||
*/
|
||||
public static boolean recoverDFSFileLease(final DistributedFileSystem dfs, final Path p)
|
||||
throws IOException, InterruptedException {
|
||||
@@ -489,44 +477,38 @@ public class FSUtils {
|
||||
return recovered;
|
||||
}
|
||||
|
||||
public static void deleteOlderCleanMetaFiles(FileSystem fs, String metaPath,
|
||||
Stream<HoodieInstant> instants) {
|
||||
//TODO - this should be archived when archival is made general for all meta-data
|
||||
public static void deleteOlderCleanMetaFiles(FileSystem fs, String metaPath, Stream<HoodieInstant> instants) {
|
||||
// TODO - this should be archived when archival is made general for all meta-data
|
||||
// skip MIN_CLEAN_TO_KEEP and delete rest
|
||||
instants.skip(MIN_CLEAN_TO_KEEP).map(s -> {
|
||||
try {
|
||||
return fs.delete(new Path(metaPath, s.getFileName()), false);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Could not delete clean meta files" + s.getFileName(),
|
||||
e);
|
||||
throw new HoodieIOException("Could not delete clean meta files" + s.getFileName(), e);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public static void deleteOlderRollbackMetaFiles(FileSystem fs, String metaPath,
|
||||
Stream<HoodieInstant> instants) {
|
||||
//TODO - this should be archived when archival is made general for all meta-data
|
||||
public static void deleteOlderRollbackMetaFiles(FileSystem fs, String metaPath, Stream<HoodieInstant> instants) {
|
||||
// TODO - this should be archived when archival is made general for all meta-data
|
||||
// skip MIN_ROLLBACK_TO_KEEP and delete rest
|
||||
instants.skip(MIN_ROLLBACK_TO_KEEP).map(s -> {
|
||||
try {
|
||||
return fs.delete(new Path(metaPath, s.getFileName()), false);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException(
|
||||
"Could not delete rollback meta files " + s.getFileName(), e);
|
||||
throw new HoodieIOException("Could not delete rollback meta files " + s.getFileName(), e);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public static void deleteOlderRestoreMetaFiles(FileSystem fs, String metaPath,
|
||||
Stream<HoodieInstant> instants) {
|
||||
//TODO - this should be archived when archival is made general for all meta-data
|
||||
public static void deleteOlderRestoreMetaFiles(FileSystem fs, String metaPath, Stream<HoodieInstant> instants) {
|
||||
// TODO - this should be archived when archival is made general for all meta-data
|
||||
// skip MIN_ROLLBACK_TO_KEEP and delete rest
|
||||
instants.skip(MIN_ROLLBACK_TO_KEEP).map(s -> {
|
||||
try {
|
||||
return fs.delete(new Path(metaPath, s.getFileName()), false);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException(
|
||||
"Could not delete restore meta files " + s.getFileName(), e);
|
||||
throw new HoodieIOException("Could not delete restore meta files " + s.getFileName(), e);
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -547,18 +529,18 @@ public class FSUtils {
|
||||
|
||||
public static Path getPartitionPath(Path basePath, String partitionPath) {
|
||||
// FOr non-partitioned table, return only base-path
|
||||
return ((partitionPath == null) || (partitionPath.isEmpty())) ? basePath :
|
||||
new Path(basePath, partitionPath);
|
||||
return ((partitionPath == null) || (partitionPath.isEmpty())) ? basePath : new Path(basePath, partitionPath);
|
||||
}
|
||||
|
||||
/**
|
||||
* This is due to HUDI-140 GCS has a different behavior for detecting EOF during seek().
|
||||
*
|
||||
* @param inputStream FSDataInputStream
|
||||
* @return true if the inputstream or the wrapped one is of type GoogleHadoopFSInputStream
|
||||
*/
|
||||
public static boolean isGCSInputStream(FSDataInputStream inputStream) {
|
||||
return inputStream.getClass().getCanonicalName().equals("com.google.cloud.hadoop.fs.gcs.GoogleHadoopFSInputStream")
|
||||
|| inputStream.getWrappedStream().getClass().getCanonicalName()
|
||||
.equals("com.google.cloud.hadoop.fs.gcs.GoogleHadoopFSInputStream");
|
||||
.equals("com.google.cloud.hadoop.fs.gcs.GoogleHadoopFSInputStream");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -51,12 +51,11 @@ public class FailSafeConsistencyGuard implements ConsistencyGuard {
|
||||
|
||||
@Override
|
||||
public void waitTillFileAppears(Path filePath) throws TimeoutException {
|
||||
waitForFileVisibility(filePath, FileVisibility.APPEAR);
|
||||
waitForFileVisibility(filePath, FileVisibility.APPEAR);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void waitTillFileDisappears(Path filePath)
|
||||
throws TimeoutException {
|
||||
public void waitTillFileDisappears(Path filePath) throws TimeoutException {
|
||||
waitForFileVisibility(filePath, FileVisibility.DISAPPEAR);
|
||||
}
|
||||
|
||||
@@ -72,13 +71,13 @@ public class FailSafeConsistencyGuard implements ConsistencyGuard {
|
||||
|
||||
/**
|
||||
* Helper function to wait for all files belonging to single directory to appear
|
||||
*
|
||||
* @param dirPath Dir Path
|
||||
* @param files Files to appear/disappear
|
||||
* @param event Appear/Disappear
|
||||
* @throws TimeoutException
|
||||
*/
|
||||
public void waitForFilesVisibility(String dirPath, List<String> files, FileVisibility event)
|
||||
throws TimeoutException {
|
||||
public void waitForFilesVisibility(String dirPath, List<String> files, FileVisibility event) throws TimeoutException {
|
||||
Path dir = new Path(dirPath);
|
||||
List<String> filesWithoutSchemeAndAuthority =
|
||||
files.stream().map(f -> Path.getPathWithoutSchemeAndAuthority(new Path(f))).map(p -> p.toString())
|
||||
@@ -112,6 +111,7 @@ public class FailSafeConsistencyGuard implements ConsistencyGuard {
|
||||
|
||||
/**
|
||||
* Helper to check of file visibility
|
||||
*
|
||||
* @param filePath File Path
|
||||
* @param visibility Visibility
|
||||
* @return
|
||||
@@ -140,6 +140,7 @@ public class FailSafeConsistencyGuard implements ConsistencyGuard {
|
||||
|
||||
/**
|
||||
* Helper function to wait till file either appears/disappears
|
||||
*
|
||||
* @param filePath File Path
|
||||
* @param visibility
|
||||
* @throws TimeoutException
|
||||
@@ -166,6 +167,7 @@ public class FailSafeConsistencyGuard implements ConsistencyGuard {
|
||||
|
||||
/**
|
||||
* Retries the predicate for condfigurable number of times till we the predicate returns success
|
||||
*
|
||||
* @param predicate Predicate Function
|
||||
* @param timedOutMessage Timed-Out message for logging
|
||||
* @throws TimeoutException when retries are exhausted
|
||||
|
||||
@@ -39,10 +39,7 @@ public class FileIOUtils {
|
||||
|
||||
public static void deleteDirectory(File directory) throws IOException {
|
||||
if (directory.exists()) {
|
||||
Files.walk(directory.toPath())
|
||||
.sorted(Comparator.reverseOrder())
|
||||
.map(Path::toFile)
|
||||
.forEach(File::delete);
|
||||
Files.walk(directory.toPath()).sorted(Comparator.reverseOrder()).map(Path::toFile).forEach(File::delete);
|
||||
directory.delete();
|
||||
if (directory.exists()) {
|
||||
throw new IOException("Unable to delete directory " + directory);
|
||||
|
||||
@@ -56,9 +56,8 @@ public class HoodieAvroUtils {
|
||||
private static ThreadLocal<BinaryDecoder> reuseDecoder = ThreadLocal.withInitial(() -> null);
|
||||
|
||||
// All metadata fields are optional strings.
|
||||
private static final Schema METADATA_FIELD_SCHEMA = Schema.createUnion(Arrays.asList(
|
||||
Schema.create(Schema.Type.NULL),
|
||||
Schema.create(Schema.Type.STRING)));
|
||||
private static final Schema METADATA_FIELD_SCHEMA =
|
||||
Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.STRING)));
|
||||
|
||||
private static final Schema RECORD_KEY_SCHEMA = initRecordKeySchema();
|
||||
|
||||
@@ -66,8 +65,7 @@ public class HoodieAvroUtils {
|
||||
* Convert a given avro record to bytes
|
||||
*/
|
||||
public static byte[] avroToBytes(GenericRecord record) throws IOException {
|
||||
GenericDatumWriter<GenericRecord> writer =
|
||||
new GenericDatumWriter<>(record.getSchema());
|
||||
GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<>(record.getSchema());
|
||||
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||
BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, reuseEncoder.get());
|
||||
reuseEncoder.set(encoder);
|
||||
@@ -101,16 +99,16 @@ public class HoodieAvroUtils {
|
||||
public static Schema addMetadataFields(Schema schema) {
|
||||
List<Schema.Field> parentFields = new ArrayList<>();
|
||||
|
||||
Schema.Field commitTimeField = new Schema.Field(HoodieRecord.COMMIT_TIME_METADATA_FIELD,
|
||||
METADATA_FIELD_SCHEMA, "", NullNode.getInstance());
|
||||
Schema.Field commitSeqnoField = new Schema.Field(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD,
|
||||
METADATA_FIELD_SCHEMA, "", NullNode.getInstance());
|
||||
Schema.Field recordKeyField = new Schema.Field(HoodieRecord.RECORD_KEY_METADATA_FIELD,
|
||||
METADATA_FIELD_SCHEMA, "", NullNode.getInstance());
|
||||
Schema.Field partitionPathField = new Schema.Field(HoodieRecord.PARTITION_PATH_METADATA_FIELD,
|
||||
METADATA_FIELD_SCHEMA, "", NullNode.getInstance());
|
||||
Schema.Field fileNameField = new Schema.Field(HoodieRecord.FILENAME_METADATA_FIELD,
|
||||
METADATA_FIELD_SCHEMA, "", NullNode.getInstance());
|
||||
Schema.Field commitTimeField =
|
||||
new Schema.Field(HoodieRecord.COMMIT_TIME_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", NullNode.getInstance());
|
||||
Schema.Field commitSeqnoField =
|
||||
new Schema.Field(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", NullNode.getInstance());
|
||||
Schema.Field recordKeyField =
|
||||
new Schema.Field(HoodieRecord.RECORD_KEY_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", NullNode.getInstance());
|
||||
Schema.Field partitionPathField =
|
||||
new Schema.Field(HoodieRecord.PARTITION_PATH_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", NullNode.getInstance());
|
||||
Schema.Field fileNameField =
|
||||
new Schema.Field(HoodieRecord.FILENAME_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", NullNode.getInstance());
|
||||
|
||||
parentFields.add(commitTimeField);
|
||||
parentFields.add(commitSeqnoField);
|
||||
@@ -127,15 +125,14 @@ public class HoodieAvroUtils {
|
||||
}
|
||||
}
|
||||
|
||||
Schema mergedSchema = Schema
|
||||
.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), false);
|
||||
Schema mergedSchema = Schema.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), false);
|
||||
mergedSchema.setFields(parentFields);
|
||||
return mergedSchema;
|
||||
}
|
||||
|
||||
private static Schema initRecordKeySchema() {
|
||||
Schema.Field recordKeyField = new Schema.Field(HoodieRecord.RECORD_KEY_METADATA_FIELD,
|
||||
METADATA_FIELD_SCHEMA, "", NullNode.getInstance());
|
||||
Schema.Field recordKeyField =
|
||||
new Schema.Field(HoodieRecord.RECORD_KEY_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", NullNode.getInstance());
|
||||
Schema recordKeySchema = Schema.createRecord("HoodieRecordKey", "", "", false);
|
||||
recordKeySchema.setFields(Arrays.asList(recordKeyField));
|
||||
return recordKeySchema;
|
||||
@@ -145,8 +142,8 @@ public class HoodieAvroUtils {
|
||||
return RECORD_KEY_SCHEMA;
|
||||
}
|
||||
|
||||
public static GenericRecord addHoodieKeyToRecord(GenericRecord record, String recordKey,
|
||||
String partitionPath, String fileName) {
|
||||
public static GenericRecord addHoodieKeyToRecord(GenericRecord record, String recordKey, String partitionPath,
|
||||
String fileName) {
|
||||
record.put(HoodieRecord.FILENAME_METADATA_FIELD, fileName);
|
||||
record.put(HoodieRecord.PARTITION_PATH_METADATA_FIELD, partitionPath);
|
||||
record.put(HoodieRecord.RECORD_KEY_METADATA_FIELD, recordKey);
|
||||
@@ -154,9 +151,9 @@ public class HoodieAvroUtils {
|
||||
}
|
||||
|
||||
/**
|
||||
* Add null fields to passed in schema. Caller is responsible for ensuring there is no duplicates.
|
||||
* As different query engines have varying constraints regarding treating the case-sensitivity of fields, its best
|
||||
* to let caller determine that.
|
||||
* Add null fields to passed in schema. Caller is responsible for ensuring there is no duplicates. As different query
|
||||
* engines have varying constraints regarding treating the case-sensitivity of fields, its best to let caller
|
||||
* determine that.
|
||||
*
|
||||
* @param schema Passed in schema
|
||||
* @param newFieldNames Null Field names to be added
|
||||
@@ -176,8 +173,7 @@ public class HoodieAvroUtils {
|
||||
/**
|
||||
* Adds the Hoodie commit metadata into the provided Generic Record.
|
||||
*/
|
||||
public static GenericRecord addCommitMetadataToRecord(GenericRecord record, String commitTime,
|
||||
String commitSeqno) {
|
||||
public static GenericRecord addCommitMetadataToRecord(GenericRecord record, String commitTime, String commitSeqno) {
|
||||
record.put(HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitTime);
|
||||
record.put(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, commitSeqno);
|
||||
return record;
|
||||
@@ -207,8 +203,7 @@ public class HoodieAvroUtils {
|
||||
}
|
||||
if (!GenericData.get().validate(newSchema, newRecord)) {
|
||||
throw new SchemaCompatabilityException(
|
||||
"Unable to validate the rewritten record " + record + " against schema "
|
||||
+ newSchema);
|
||||
"Unable to validate the rewritten record " + record + " against schema " + newSchema);
|
||||
}
|
||||
return newRecord;
|
||||
}
|
||||
|
||||
@@ -27,6 +27,7 @@ import org.apache.log4j.Logger;
|
||||
|
||||
/**
|
||||
* Size Estimator for Hoodie record payload
|
||||
*
|
||||
* @param <T>
|
||||
*/
|
||||
public class HoodieRecordSizeEstimator<T extends HoodieRecordPayload> implements SizeEstimator<HoodieRecord<T>> {
|
||||
|
||||
@@ -23,9 +23,8 @@ import java.util.Deque;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
|
||||
/**
|
||||
* Timing utility to help keep track of execution times of code blocks. This class helps to allow multiple
|
||||
* timers started at the same time and automatically returns the execution time in the order in which the
|
||||
* timers are stopped.
|
||||
* Timing utility to help keep track of execution times of code blocks. This class helps to allow multiple timers
|
||||
* started at the same time and automatically returns the execution time in the order in which the timers are stopped.
|
||||
*/
|
||||
public class HoodieTimer {
|
||||
|
||||
|
||||
@@ -49,8 +49,8 @@ public class LogReaderUtils {
|
||||
HoodieLogBlock block = reader.prev();
|
||||
if (block instanceof HoodieAvroDataBlock && block != null) {
|
||||
HoodieAvroDataBlock lastBlock = (HoodieAvroDataBlock) block;
|
||||
if (completedTimeline.containsOrBeforeTimelineStarts(lastBlock.getLogBlockHeader().get(HeaderMetadataType
|
||||
.INSTANT_TIME))) {
|
||||
if (completedTimeline
|
||||
.containsOrBeforeTimelineStarts(lastBlock.getLogBlockHeader().get(HeaderMetadataType.INSTANT_TIME))) {
|
||||
writerSchema = Schema.parse(lastBlock.getLogBlockHeader().get(HeaderMetadataType.SCHEMA));
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -27,12 +27,10 @@ import org.apache.hadoop.fs.Path;
|
||||
public class NoOpConsistencyGuard implements ConsistencyGuard {
|
||||
|
||||
@Override
|
||||
public void waitTillFileAppears(Path filePath) {
|
||||
}
|
||||
public void waitTillFileAppears(Path filePath) {}
|
||||
|
||||
@Override
|
||||
public void waitTillFileDisappears(Path filePath) {
|
||||
}
|
||||
public void waitTillFileDisappears(Path filePath) {}
|
||||
|
||||
@Override
|
||||
public void waitTillAllFilesAppear(String dirPath, List<String> files) {
|
||||
|
||||
@@ -54,17 +54,13 @@ import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Contains utility methods for calculating the memory usage of objects. It
|
||||
* only works on the HotSpot JVM, and infers the actual memory layout (32 bit
|
||||
* vs. 64 bit word size, compressed object pointers vs. uncompressed) from
|
||||
* best available indicators. It can reliably detect a 32 bit vs. 64 bit JVM.
|
||||
* It can only make an educated guess at whether compressed OOPs are used,
|
||||
* though; specifically, it knows what the JVM's default choice of OOP
|
||||
* compression would be based on HotSpot version and maximum heap sizes, but if
|
||||
* the choice is explicitly overridden with the <tt>-XX:{+|-}UseCompressedOops</tt> command line
|
||||
* switch, it can not detect
|
||||
* this fact and will report incorrect sizes, as it will presume the default JVM
|
||||
* behavior.
|
||||
* Contains utility methods for calculating the memory usage of objects. It only works on the HotSpot JVM, and infers
|
||||
* the actual memory layout (32 bit vs. 64 bit word size, compressed object pointers vs. uncompressed) from best
|
||||
* available indicators. It can reliably detect a 32 bit vs. 64 bit JVM. It can only make an educated guess at whether
|
||||
* compressed OOPs are used, though; specifically, it knows what the JVM's default choice of OOP compression would be
|
||||
* based on HotSpot version and maximum heap sizes, but if the choice is explicitly overridden with the
|
||||
* <tt>-XX:{+|-}UseCompressedOops</tt> command line switch, it can not detect this fact and will report incorrect sizes,
|
||||
* as it will presume the default JVM behavior.
|
||||
*
|
||||
* @author Attila Szegedi
|
||||
*/
|
||||
@@ -104,8 +100,7 @@ public class ObjectSizeCalculator {
|
||||
int getReferenceSize();
|
||||
|
||||
/**
|
||||
* Returns the quantum field size for a field owned by one of an object's ancestor superclasses
|
||||
* in this JVM.
|
||||
* Returns the quantum field size for a field owned by one of an object's ancestor superclasses in this JVM.
|
||||
*
|
||||
* @return the quantum field size for a superclass field.
|
||||
*/
|
||||
@@ -114,24 +109,18 @@ public class ObjectSizeCalculator {
|
||||
|
||||
private static class CurrentLayout {
|
||||
|
||||
private static final MemoryLayoutSpecification SPEC =
|
||||
getEffectiveMemoryLayoutSpecification();
|
||||
private static final MemoryLayoutSpecification SPEC = getEffectiveMemoryLayoutSpecification();
|
||||
}
|
||||
|
||||
/**
|
||||
* Given an object, returns the total allocated size, in bytes, of the object
|
||||
* and all other objects reachable from it. Attempts to to detect the current JVM memory layout,
|
||||
* but may fail with {@link UnsupportedOperationException};
|
||||
* Given an object, returns the total allocated size, in bytes, of the object and all other objects reachable from it.
|
||||
* Attempts to to detect the current JVM memory layout, but may fail with {@link UnsupportedOperationException};
|
||||
*
|
||||
* @param obj the object; can be null. Passing in a {@link java.lang.Class} object doesn't do
|
||||
* anything special, it measures the size of all objects
|
||||
* reachable through it (which will include its class loader, and by
|
||||
* extension, all other Class objects loaded by
|
||||
* the same loader, and all the parent class loaders). It doesn't provide the
|
||||
* size of the static fields in the JVM class that the Class object
|
||||
* represents.
|
||||
* @return the total allocated size of the object and all other objects it
|
||||
* retains.
|
||||
* @param obj the object; can be null. Passing in a {@link java.lang.Class} object doesn't do anything special, it
|
||||
* measures the size of all objects reachable through it (which will include its class loader, and by
|
||||
* extension, all other Class objects loaded by the same loader, and all the parent class loaders). It doesn't
|
||||
* provide the size of the static fields in the JVM class that the Class object represents.
|
||||
* @return the total allocated size of the object and all other objects it retains.
|
||||
* @throws UnsupportedOperationException if the current vm memory layout cannot be detected.
|
||||
*/
|
||||
public static long getObjectSize(Object obj) throws UnsupportedOperationException {
|
||||
@@ -164,8 +153,7 @@ public class ObjectSizeCalculator {
|
||||
private long size;
|
||||
|
||||
/**
|
||||
* Creates an object size calculator that can calculate object sizes for a given
|
||||
* {@code memoryLayoutSpecification}.
|
||||
* Creates an object size calculator that can calculate object sizes for a given {@code memoryLayoutSpecification}.
|
||||
*
|
||||
* @param memoryLayoutSpecification a description of the JVM memory layout.
|
||||
*/
|
||||
@@ -179,24 +167,19 @@ public class ObjectSizeCalculator {
|
||||
}
|
||||
|
||||
/**
|
||||
* Given an object, returns the total allocated size, in bytes, of the object
|
||||
* and all other objects reachable from it.
|
||||
* Given an object, returns the total allocated size, in bytes, of the object and all other objects reachable from it.
|
||||
*
|
||||
* @param obj the object; can be null. Passing in a {@link java.lang.Class} object doesn't do
|
||||
* anything special, it measures the size of all objects
|
||||
* reachable through it (which will include its class loader, and by
|
||||
* extension, all other Class objects loaded by
|
||||
* the same loader, and all the parent class loaders). It doesn't provide the
|
||||
* size of the static fields in the JVM class that the Class object
|
||||
* represents.
|
||||
* @return the total allocated size of the object and all other objects it
|
||||
* retains.
|
||||
* @param obj the object; can be null. Passing in a {@link java.lang.Class} object doesn't do anything special, it
|
||||
* measures the size of all objects reachable through it (which will include its class loader, and by
|
||||
* extension, all other Class objects loaded by the same loader, and all the parent class loaders). It doesn't
|
||||
* provide the size of the static fields in the JVM class that the Class object represents.
|
||||
* @return the total allocated size of the object and all other objects it retains.
|
||||
*/
|
||||
public synchronized long calculateObjectSize(Object obj) {
|
||||
// Breadth-first traversal instead of naive depth-first with recursive
|
||||
// implementation, so we don't blow the stack traversing long linked lists.
|
||||
try {
|
||||
for (; ; ) {
|
||||
for (;;) {
|
||||
visit(obj);
|
||||
if (pending.isEmpty()) {
|
||||
return size;
|
||||
@@ -324,8 +307,7 @@ public class ObjectSizeCalculator {
|
||||
}
|
||||
this.fieldsSize = fieldsSize;
|
||||
this.objectSize = roundTo(objectHeaderSize + fieldsSize, objectPadding);
|
||||
this.referenceFields = referenceFields.toArray(
|
||||
new Field[referenceFields.size()]);
|
||||
this.referenceFields = referenceFields.toArray(new Field[referenceFields.size()]);
|
||||
}
|
||||
|
||||
void visit(Object obj, ObjectSizeCalculator calc) {
|
||||
@@ -338,8 +320,7 @@ public class ObjectSizeCalculator {
|
||||
try {
|
||||
calc.enqueue(f.get(obj));
|
||||
} catch (IllegalAccessException e) {
|
||||
final AssertionError ae = new AssertionError(
|
||||
"Unexpected denial of access to " + f);
|
||||
final AssertionError ae = new AssertionError("Unexpected denial of access to " + f);
|
||||
ae.initCause(e);
|
||||
throw ae;
|
||||
}
|
||||
@@ -360,17 +341,15 @@ public class ObjectSizeCalculator {
|
||||
if (type == long.class || type == double.class) {
|
||||
return 8;
|
||||
}
|
||||
throw new AssertionError("Encountered unexpected primitive type "
|
||||
+ type.getName());
|
||||
throw new AssertionError("Encountered unexpected primitive type " + type.getName());
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
static MemoryLayoutSpecification getEffectiveMemoryLayoutSpecification() {
|
||||
final String vmName = System.getProperty("java.vm.name");
|
||||
if (vmName == null || !(vmName.startsWith("Java HotSpot(TM) ")
|
||||
|| vmName.startsWith("OpenJDK") || vmName.startsWith("TwitterJDK"))) {
|
||||
throw new UnsupportedOperationException(
|
||||
"ObjectSizeCalculator only supported on HotSpot VM");
|
||||
if (vmName == null || !(vmName.startsWith("Java HotSpot(TM) ") || vmName.startsWith("OpenJDK")
|
||||
|| vmName.startsWith("TwitterJDK"))) {
|
||||
throw new UnsupportedOperationException("ObjectSizeCalculator only supported on HotSpot VM");
|
||||
}
|
||||
|
||||
final String dataModel = System.getProperty("sun.arch.data.model");
|
||||
@@ -403,13 +382,12 @@ public class ObjectSizeCalculator {
|
||||
}
|
||||
};
|
||||
} else if (!"64".equals(dataModel)) {
|
||||
throw new UnsupportedOperationException("Unrecognized value '"
|
||||
+ dataModel + "' of sun.arch.data.model system property");
|
||||
throw new UnsupportedOperationException(
|
||||
"Unrecognized value '" + dataModel + "' of sun.arch.data.model system property");
|
||||
}
|
||||
|
||||
final String strVmVersion = System.getProperty("java.vm.version");
|
||||
final int vmVersion = Integer.parseInt(strVmVersion.substring(0,
|
||||
strVmVersion.indexOf('.')));
|
||||
final int vmVersion = Integer.parseInt(strVmVersion.substring(0, strVmVersion.indexOf('.')));
|
||||
if (vmVersion >= 17) {
|
||||
long maxMemory = 0;
|
||||
for (MemoryPoolMXBean mp : ManagementFactory.getMemoryPoolMXBeans()) {
|
||||
|
||||
@@ -54,13 +54,13 @@ public final class Option<T> implements Serializable {
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an empty {@code Option} instance. No value is present for this Option.
|
||||
* Returns an empty {@code Option} instance. No value is present for this Option.
|
||||
*
|
||||
* @param <T> Type of the non-existent value
|
||||
* @return an empty {@code Option}
|
||||
* @apiNote Though it may be tempting to do so, avoid testing if an object is empty by comparing with {@code ==}
|
||||
* against instances returned by {@code Option.empty()}. There is no guarantee that it is a singleton. Instead, use
|
||||
* {@link #isPresent()}.
|
||||
* against instances returned by {@code Option.empty()}. There is no guarantee that it is a singleton.
|
||||
* Instead, use {@link #isPresent()}.
|
||||
*/
|
||||
public static <T> Option<T> empty() {
|
||||
@SuppressWarnings("unchecked")
|
||||
@@ -143,7 +143,7 @@ public final class Option<T> implements Serializable {
|
||||
*
|
||||
* @param predicate a predicate to apply to the value, if present
|
||||
* @return an {@code Option} describing the value of this {@code Option} if a value is present and the value matches
|
||||
* the given predicate, otherwise an empty {@code Option}
|
||||
* the given predicate, otherwise an empty {@code Option}
|
||||
* @throws NullPointerException if the predicate is null
|
||||
*/
|
||||
public Option<T> filter(Predicate<? super T> predicate) {
|
||||
@@ -157,25 +157,27 @@ public final class Option<T> implements Serializable {
|
||||
|
||||
/**
|
||||
* If a value is present, apply the provided mapping function to it, and if the result is non-null, return an {@code
|
||||
* Option} describing the result. Otherwise return an empty {@code Option}.
|
||||
* Option} describing the result. Otherwise return an empty {@code Option}.
|
||||
*
|
||||
* @param <U> The type of the result of the mapping function
|
||||
* @param mapper a mapping function to apply to the value, if present
|
||||
* @return an {@code Option} describing the result of applying a mapping function to the value of this {@code Option},
|
||||
* if a value is present, otherwise an empty {@code Option}
|
||||
* if a value is present, otherwise an empty {@code Option}
|
||||
* @throws NullPointerException if the mapping function is null
|
||||
* @apiNote This method supports post-processing on optional values, without the need to explicitly check for a return
|
||||
* status. For example, the following code traverses a stream of file names, selects one that has not yet been
|
||||
* processed, and then opens that file, returning an {@code Option<FileInputStream>}:
|
||||
* status. For example, the following code traverses a stream of file names, selects one that has not yet
|
||||
* been processed, and then opens that file, returning an {@code Option<FileInputStream>}:
|
||||
*
|
||||
* <pre>{@code
|
||||
* <pre>
|
||||
* {@code
|
||||
* Option<FileInputStream> fis =
|
||||
* names.stream().filter(name -> !isProcessedYet(name))
|
||||
* .findFirst()
|
||||
* .map(name -> new FileInputStream(name));
|
||||
* }</pre>
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
* Here, {@code findFirst} returns an {@code Option<String>}, and then {@code map} returns an {@code
|
||||
* Here, {@code findFirst} returns an {@code Option<String>}, and then {@code map} returns an {@code
|
||||
* Option<FileInputStream>} for the desired file if one exists.
|
||||
*/
|
||||
public <U> Option<U> map(Function<? super T, ? extends U> mapper) {
|
||||
@@ -189,14 +191,14 @@ public final class Option<T> implements Serializable {
|
||||
|
||||
/**
|
||||
* If a value is present, apply the provided {@code Option}-bearing mapping function to it, return that result,
|
||||
* otherwise return an empty {@code Option}. This method is similar to {@link #map(Function)}, but the provided
|
||||
* mapper is one whose result is already an {@code Option}, and if invoked, {@code flatMap} does not wrap it with an
|
||||
* otherwise return an empty {@code Option}. This method is similar to {@link #map(Function)}, but the provided mapper
|
||||
* is one whose result is already an {@code Option}, and if invoked, {@code flatMap} does not wrap it with an
|
||||
* additional {@code Option}.
|
||||
*
|
||||
* @param <U> The type parameter to the {@code Option} returned by
|
||||
* @param mapper a mapping function to apply to the value, if present the mapping function
|
||||
* @return the result of applying an {@code Option}-bearing mapping function to the value of this {@code Option}, if a
|
||||
* value is present, otherwise an empty {@code Option}
|
||||
* value is present, otherwise an empty {@code Option}
|
||||
* @throws NullPointerException if the mapping function is null or returns a null result
|
||||
*/
|
||||
public <U> Option<U> flatMap(Function<? super T, Option<U>> mapper) {
|
||||
@@ -238,7 +240,7 @@ public final class Option<T> implements Serializable {
|
||||
* @throws X if there is no value present
|
||||
* @throws NullPointerException if no value is present and {@code exceptionSupplier} is null
|
||||
* @apiNote A method reference to the exception constructor with an empty argument list can be used as the supplier.
|
||||
* For example, {@code IllegalStateException::new}
|
||||
* For example, {@code IllegalStateException::new}
|
||||
*/
|
||||
public <X extends Throwable> T orElseThrow(Supplier<? extends X> exceptionSupplier) throws X {
|
||||
if (value != null) {
|
||||
@@ -289,13 +291,11 @@ public final class Option<T> implements Serializable {
|
||||
*
|
||||
* @return the string representation of this instance
|
||||
* @implSpec If a value is present the result must include its string representation in the result. Empty and present
|
||||
* Optionals must be unambiguously differentiable.
|
||||
* Optionals must be unambiguously differentiable.
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
return value != null
|
||||
? String.format("Option[%s]", value)
|
||||
: "Option.empty";
|
||||
return value != null ? String.format("Option[%s]", value) : "Option.empty";
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -51,22 +51,22 @@ public class ParquetUtils {
|
||||
/**
|
||||
* Read the rowKey list from the given parquet file.
|
||||
*
|
||||
* @param filePath The parquet file path.
|
||||
* @param filePath The parquet file path.
|
||||
* @param configuration configuration to build fs object
|
||||
* @return Set Set of row keys
|
||||
* @return Set Set of row keys
|
||||
*/
|
||||
public static Set<String> readRowKeysFromParquet(Configuration configuration, Path filePath) {
|
||||
return filterParquetRowKeys(configuration, filePath, new HashSet<>());
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the rowKey list matching the given filter, from the given parquet file. If the filter is empty,
|
||||
* then this will return all the rowkeys.
|
||||
* Read the rowKey list matching the given filter, from the given parquet file. If the filter is empty, then this will
|
||||
* return all the rowkeys.
|
||||
*
|
||||
* @param filePath The parquet file path.
|
||||
* @param configuration configuration to build fs object
|
||||
* @param filter record keys filter
|
||||
* @return Set Set of row keys matching candidateRecordKeys
|
||||
* @param filePath The parquet file path.
|
||||
* @param configuration configuration to build fs object
|
||||
* @param filter record keys filter
|
||||
* @return Set Set of row keys matching candidateRecordKeys
|
||||
*/
|
||||
public static Set<String> filterParquetRowKeys(Configuration configuration, Path filePath, Set<String> filter) {
|
||||
Option<RecordKeysFilterFunction> filterFunction = Option.empty();
|
||||
@@ -102,11 +102,9 @@ public class ParquetUtils {
|
||||
ParquetMetadata footer;
|
||||
try {
|
||||
// TODO(vc): Should we use the parallel reading version here?
|
||||
footer = ParquetFileReader
|
||||
.readFooter(FSUtils.getFs(parquetFilePath.toString(), conf).getConf(), parquetFilePath);
|
||||
footer = ParquetFileReader.readFooter(FSUtils.getFs(parquetFilePath.toString(), conf).getConf(), parquetFilePath);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Failed to read footer for parquet " + parquetFilePath,
|
||||
e);
|
||||
throw new HoodieIOException("Failed to read footer for parquet " + parquetFilePath, e);
|
||||
}
|
||||
return footer;
|
||||
}
|
||||
@@ -127,8 +125,8 @@ public class ParquetUtils {
|
||||
if (metadata.containsKey(footerName)) {
|
||||
footerVals.add(metadata.get(footerName));
|
||||
} else {
|
||||
throw new MetadataNotFoundException("Could not find index in Parquet footer. "
|
||||
+ "Looked for key " + footerName + " in " + parquetFilePath);
|
||||
throw new MetadataNotFoundException(
|
||||
"Could not find index in Parquet footer. " + "Looked for key " + footerName + " in " + parquetFilePath);
|
||||
}
|
||||
}
|
||||
return footerVals;
|
||||
@@ -141,21 +139,20 @@ public class ParquetUtils {
|
||||
/**
|
||||
* Read out the bloom filter from the parquet file meta data.
|
||||
*/
|
||||
public static BloomFilter readBloomFilterFromParquetMetadata(Configuration configuration,
|
||||
Path parquetFilePath) {
|
||||
String footerVal = readParquetFooter(configuration, parquetFilePath,
|
||||
HoodieAvroWriteSupport.HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY).get(0);
|
||||
public static BloomFilter readBloomFilterFromParquetMetadata(Configuration configuration, Path parquetFilePath) {
|
||||
String footerVal =
|
||||
readParquetFooter(configuration, parquetFilePath, HoodieAvroWriteSupport.HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY)
|
||||
.get(0);
|
||||
return new BloomFilter(footerVal);
|
||||
}
|
||||
|
||||
public static String[] readMinMaxRecordKeys(Configuration configuration, Path parquetFilePath) {
|
||||
List<String> minMaxKeys = readParquetFooter(configuration, parquetFilePath,
|
||||
HoodieAvroWriteSupport.HOODIE_MIN_RECORD_KEY_FOOTER,
|
||||
HoodieAvroWriteSupport.HOODIE_MAX_RECORD_KEY_FOOTER);
|
||||
HoodieAvroWriteSupport.HOODIE_MIN_RECORD_KEY_FOOTER, HoodieAvroWriteSupport.HOODIE_MAX_RECORD_KEY_FOOTER);
|
||||
if (minMaxKeys.size() != 2) {
|
||||
throw new HoodieException(String.format(
|
||||
"Could not read min/max record key out of footer correctly from %s. read) : %s",
|
||||
parquetFilePath, minMaxKeys));
|
||||
throw new HoodieException(
|
||||
String.format("Could not read min/max record key out of footer correctly from %s. read) : %s",
|
||||
parquetFilePath, minMaxKeys));
|
||||
}
|
||||
return new String[] {minMaxKeys.get(0), minMaxKeys.get(1)};
|
||||
}
|
||||
|
||||
@@ -56,14 +56,11 @@ public class ReflectionUtils {
|
||||
/**
|
||||
* Instantiate a given class with a generic record payload
|
||||
*/
|
||||
public static <T extends HoodieRecordPayload> T loadPayload(String recordPayloadClass,
|
||||
Object[] payloadArgs,
|
||||
public static <T extends HoodieRecordPayload> T loadPayload(String recordPayloadClass, Object[] payloadArgs,
|
||||
Class<?>... constructorArgTypes) {
|
||||
try {
|
||||
return (T) getClass(recordPayloadClass).getConstructor(constructorArgTypes)
|
||||
.newInstance(payloadArgs);
|
||||
} catch (InstantiationException | IllegalAccessException
|
||||
| InvocationTargetException | NoSuchMethodException e) {
|
||||
return (T) getClass(recordPayloadClass).getConstructor(constructorArgTypes).newInstance(payloadArgs);
|
||||
} catch (InstantiationException | IllegalAccessException | InvocationTargetException | NoSuchMethodException e) {
|
||||
throw new HoodieException("Unable to instantiate payload class ", e);
|
||||
}
|
||||
}
|
||||
@@ -74,8 +71,7 @@ public class ReflectionUtils {
|
||||
public static Object loadClass(String clazz, Class<?>[] constructorArgTypes, Object... constructorArgs) {
|
||||
try {
|
||||
return getClass(clazz).getConstructor(constructorArgTypes).newInstance(constructorArgs);
|
||||
} catch (InstantiationException | IllegalAccessException
|
||||
| InvocationTargetException | NoSuchMethodException e) {
|
||||
} catch (InstantiationException | IllegalAccessException | InvocationTargetException | NoSuchMethodException e) {
|
||||
throw new HoodieException("Unable to instantiate class ", e);
|
||||
}
|
||||
}
|
||||
@@ -84,13 +80,13 @@ public class ReflectionUtils {
|
||||
* Creates an instance of the given class. Constructor arg types are inferred.
|
||||
*/
|
||||
public static Object loadClass(String clazz, Object... constructorArgs) {
|
||||
Class<?>[] constructorArgTypes = Arrays.stream(constructorArgs)
|
||||
.map(Object::getClass).toArray(Class<?>[]::new);
|
||||
Class<?>[] constructorArgTypes = Arrays.stream(constructorArgs).map(Object::getClass).toArray(Class<?>[]::new);
|
||||
return loadClass(clazz, constructorArgTypes, constructorArgs);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return stream of top level class names in the same class path as passed-in class
|
||||
*
|
||||
* @param clazz
|
||||
*/
|
||||
public static Stream<String> getTopLevelClassesInClasspath(Class clazz) {
|
||||
|
||||
@@ -64,8 +64,8 @@ public class RocksDBDAO {
|
||||
|
||||
public RocksDBDAO(String basePath, String rocksDBBasePath) {
|
||||
this.basePath = basePath;
|
||||
this.rocksDBBasePath = String.format("%s/%s/%s", rocksDBBasePath,
|
||||
this.basePath.replace("/", "_"), UUID.randomUUID().toString());
|
||||
this.rocksDBBasePath =
|
||||
String.format("%s/%s/%s", rocksDBBasePath, this.basePath.replace("/", "_"), UUID.randomUUID().toString());
|
||||
init();
|
||||
}
|
||||
|
||||
@@ -137,8 +137,8 @@ public class RocksDBDAO {
|
||||
managedColumnFamilies.add(getColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY));
|
||||
} else {
|
||||
log.info("Loading column families :" + existing.stream().map(String::new).collect(Collectors.toList()));
|
||||
managedColumnFamilies.addAll(existing.stream()
|
||||
.map(RocksDBDAO::getColumnFamilyDescriptor).collect(Collectors.toList()));
|
||||
managedColumnFamilies
|
||||
.addAll(existing.stream().map(RocksDBDAO::getColumnFamilyDescriptor).collect(Collectors.toList()));
|
||||
}
|
||||
return managedColumnFamilies;
|
||||
}
|
||||
@@ -350,9 +350,8 @@ public class RocksDBDAO {
|
||||
}
|
||||
}
|
||||
|
||||
log.info("Prefix Search for (query=" + prefix + ") on " + columnFamilyName
|
||||
+ ". Total Time Taken (msec)=" + timer.endTimer()
|
||||
+ ". Serialization Time taken(micro)=" + timeTakenMicro + ", num entries=" + results.size());
|
||||
log.info("Prefix Search for (query=" + prefix + ") on " + columnFamilyName + ". Total Time Taken (msec)="
|
||||
+ timer.endTimer() + ". Serialization Time taken(micro)=" + timeTakenMicro + ", num entries=" + results.size());
|
||||
return results.stream();
|
||||
}
|
||||
|
||||
@@ -368,7 +367,7 @@ public class RocksDBDAO {
|
||||
log.info("Prefix DELETE (query=" + prefix + ") on " + columnFamilyName);
|
||||
final RocksIterator it = getRocksDB().newIterator(managedHandlesMap.get(columnFamilyName));
|
||||
it.seek(prefix.getBytes());
|
||||
//Find first and last keys to be deleted
|
||||
// Find first and last keys to be deleted
|
||||
String firstEntry = null;
|
||||
String lastEntry = null;
|
||||
while (it.isValid() && new String(it.key()).startsWith(prefix)) {
|
||||
@@ -384,9 +383,8 @@ public class RocksDBDAO {
|
||||
if (null != firstEntry) {
|
||||
try {
|
||||
// This will not delete the last entry
|
||||
getRocksDB().deleteRange(managedHandlesMap.get(columnFamilyName), firstEntry.getBytes(),
|
||||
lastEntry.getBytes());
|
||||
//Delete the last entry
|
||||
getRocksDB().deleteRange(managedHandlesMap.get(columnFamilyName), firstEntry.getBytes(), lastEntry.getBytes());
|
||||
// Delete the last entry
|
||||
getRocksDB().delete(lastEntry.getBytes());
|
||||
} catch (RocksDBException e) {
|
||||
log.error("Got exception performing range delete");
|
||||
|
||||
@@ -28,27 +28,17 @@ import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
/**
|
||||
* Helper class to generate Key and column names for rocksdb based view
|
||||
*
|
||||
* For RocksDB, 3 colFamilies are used for storing file-system view for each dataset.
|
||||
* (a) View
|
||||
* (b) Partitions Cached
|
||||
* (c) Pending Compactions
|
||||
*
|
||||
*
|
||||
* View : Key : Store both slice and Data file stored.
|
||||
* Slice :
|
||||
* Key = "type=slice,part=<PartitionPath>,id=<FileId>,instant=<Timestamp>"
|
||||
* Value = Serialized FileSlice
|
||||
* Data File :
|
||||
* Key = "type=df,part=<PartitionPath>,id=<FileId>,instant=<Timestamp>"
|
||||
* Value = Serialized DataFile
|
||||
*
|
||||
* Partitions :
|
||||
* Key = "part=<PartitionPath>"
|
||||
* Value = Boolean
|
||||
*
|
||||
* For RocksDB, 3 colFamilies are used for storing file-system view for each dataset. (a) View (b) Partitions Cached (c)
|
||||
* Pending Compactions
|
||||
* Key = "part=<PartitionPath>,id=<FileId>"
|
||||
* Value = Pair<CompactionTime, CompactionOperation>
|
||||
*
|
||||
*
|
||||
* View : Key : Store both slice and Data file stored. Slice : Key =
|
||||
* "type=slice,part=<PartitionPath>,id=<FileId>,instant=<Timestamp>" Value = Serialized FileSlice Data File : Key =
|
||||
* "type=df,part=<PartitionPath>,id=<FileId>,instant=<Timestamp>" Value = Serialized DataFile
|
||||
*
|
||||
* Partitions : Key = "part=<PartitionPath>" Value = Boolean
|
||||
*
|
||||
* Pending Compactions Key = "part=<PartitionPath>,id=<FileId>" Value = Pair<CompactionTime, CompactionOperation>
|
||||
*/
|
||||
public class RocksDBSchemaHelper {
|
||||
|
||||
@@ -80,15 +70,15 @@ public class RocksDBSchemaHelper {
|
||||
}
|
||||
|
||||
public String getKeyForSliceView(String partitionPath, String fileId, String instantTime) {
|
||||
return String.format("type=slice,part=%s,id=%s,instant=%s",partitionPath, fileId, instantTime);
|
||||
return String.format("type=slice,part=%s,id=%s,instant=%s", partitionPath, fileId, instantTime);
|
||||
}
|
||||
|
||||
public String getPrefixForSliceViewByPartitionFile(String partitionPath, String fileId) {
|
||||
return String.format("type=slice,part=%s,id=%s,instant=",partitionPath, fileId);
|
||||
return String.format("type=slice,part=%s,id=%s,instant=", partitionPath, fileId);
|
||||
}
|
||||
|
||||
public String getPrefixForDataFileViewByPartitionFile(String partitionPath, String fileId) {
|
||||
return String.format("type=df,part=%s,id=%s,instant=",partitionPath, fileId);
|
||||
return String.format("type=df,part=%s,id=%s,instant=", partitionPath, fileId);
|
||||
}
|
||||
|
||||
public String getKeyForDataFileView(HoodieFileGroup fileGroup, FileSlice slice) {
|
||||
|
||||
@@ -34,8 +34,7 @@ import org.objenesis.instantiator.ObjectInstantiator;
|
||||
|
||||
|
||||
/**
|
||||
* {@link SerializationUtils} class internally uses {@link Kryo} serializer for serializing /
|
||||
* deserializing objects.
|
||||
* {@link SerializationUtils} class internally uses {@link Kryo} serializer for serializing / deserializing objects.
|
||||
*/
|
||||
public class SerializationUtils {
|
||||
|
||||
@@ -44,10 +43,12 @@ public class SerializationUtils {
|
||||
ThreadLocal.withInitial(() -> new KryoSerializerInstance());
|
||||
|
||||
// Serialize
|
||||
//-----------------------------------------------------------------------
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* <p>Serializes an {@code Object} to a byte array for storage/serialization.</p>
|
||||
* <p>
|
||||
* Serializes an {@code Object} to a byte array for storage/serialization.
|
||||
* </p>
|
||||
*
|
||||
* @param obj the object to serialize to bytes
|
||||
* @return a byte[] with the converted Serializable
|
||||
@@ -58,15 +59,18 @@ public class SerializationUtils {
|
||||
}
|
||||
|
||||
// Deserialize
|
||||
//-----------------------------------------------------------------------
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* <p> Deserializes a single {@code Object} from an array of bytes. </p>
|
||||
* <p>
|
||||
* Deserializes a single {@code Object} from an array of bytes.
|
||||
* </p>
|
||||
*
|
||||
* <p> If the call site incorrectly types the return value, a {@link ClassCastException} is thrown
|
||||
* from the call site. Without Generics in this declaration, the call site must type cast and can
|
||||
* cause the same ClassCastException. Note that in both cases, the ClassCastException is in the
|
||||
* call site, not in this method. </p>
|
||||
* <p>
|
||||
* If the call site incorrectly types the return value, a {@link ClassCastException} is thrown from the call site.
|
||||
* Without Generics in this declaration, the call site must type cast and can cause the same ClassCastException. Note
|
||||
* that in both cases, the ClassCastException is in the call site, not in this method.
|
||||
* </p>
|
||||
*
|
||||
* @param <T> the object type to be deserialized
|
||||
* @param objectData the serialized object, must not be null
|
||||
@@ -109,8 +113,8 @@ public class SerializationUtils {
|
||||
}
|
||||
|
||||
/**
|
||||
* This class has a no-arg constructor, suitable for use with reflection instantiation.
|
||||
* For Details checkout com.twitter.chill.KryoBase.
|
||||
* This class has a no-arg constructor, suitable for use with reflection instantiation. For Details checkout
|
||||
* com.twitter.chill.KryoBase.
|
||||
*/
|
||||
private static class KryoInstantiator implements Serializable {
|
||||
|
||||
@@ -153,8 +157,8 @@ public class SerializationUtils {
|
||||
final Constructor constructor = type.getConstructor();
|
||||
constructor.setAccessible(true);
|
||||
return constructor.newInstance();
|
||||
} catch (NoSuchMethodException | IllegalAccessException
|
||||
| InstantiationException | InvocationTargetException e) {
|
||||
} catch (NoSuchMethodException | IllegalAccessException | InstantiationException
|
||||
| InvocationTargetException e) {
|
||||
// ignore this exception. we will fall back to default instantiation strategy.
|
||||
}
|
||||
return super.getInstantiatorStrategy().newInstantiatorOf(type).newInstance();
|
||||
|
||||
@@ -20,14 +20,14 @@ package org.apache.hudi.common.util;
|
||||
|
||||
/**
|
||||
* An interface to estimate the size of payload in memory
|
||||
*
|
||||
* @param <T>
|
||||
*/
|
||||
public interface SizeEstimator<T> {
|
||||
|
||||
/**
|
||||
* This method is used to estimate the size of a payload in memory.
|
||||
* The default implementation returns the total allocated size, in bytes, of the object
|
||||
* and all other objects reachable from it
|
||||
* This method is used to estimate the size of a payload in memory. The default implementation returns the total
|
||||
* allocated size, in bytes, of the object and all other objects reachable from it
|
||||
*/
|
||||
long sizeEstimate(T t);
|
||||
}
|
||||
|
||||
@@ -43,8 +43,7 @@ public class SpillableMapUtils {
|
||||
/**
|
||||
* |crc|timestamp|sizeOfKey|SizeOfValue|key|value|
|
||||
*/
|
||||
private static FileEntry readInternal(RandomAccessFile file, long valuePosition,
|
||||
int valueLength) throws IOException {
|
||||
private static FileEntry readInternal(RandomAccessFile file, long valuePosition, int valueLength) throws IOException {
|
||||
file.seek(valuePosition);
|
||||
long crc = file.readLong();
|
||||
long timestamp = file.readLong();
|
||||
@@ -59,24 +58,22 @@ public class SpillableMapUtils {
|
||||
file.read(value, 0, valueSize);
|
||||
long crcOfReadValue = generateChecksum(value);
|
||||
if (!(crc == crcOfReadValue)) {
|
||||
throw new HoodieCorruptedDataException("checksum of payload written to external disk does not match, "
|
||||
+ "data may be corrupted");
|
||||
throw new HoodieCorruptedDataException(
|
||||
"checksum of payload written to external disk does not match, " + "data may be corrupted");
|
||||
}
|
||||
return new FileEntry(crc, keySize, valueSize, key, value, timestamp);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write Value and other metadata necessary to disk. Each entry has the following sequence of data <p>
|
||||
* Write Value and other metadata necessary to disk. Each entry has the following sequence of data
|
||||
* <p>
|
||||
* |crc|timestamp|sizeOfKey|SizeOfValue|key|value|
|
||||
*/
|
||||
public static long spillToDisk(SizeAwareDataOutputStream outputStream,
|
||||
FileEntry fileEntry) throws IOException {
|
||||
public static long spillToDisk(SizeAwareDataOutputStream outputStream, FileEntry fileEntry) throws IOException {
|
||||
return spill(outputStream, fileEntry);
|
||||
}
|
||||
|
||||
private static long spill(SizeAwareDataOutputStream outputStream,
|
||||
FileEntry fileEntry)
|
||||
throws IOException {
|
||||
private static long spill(SizeAwareDataOutputStream outputStream, FileEntry fileEntry) throws IOException {
|
||||
outputStream.writeLong(fileEntry.getCrc());
|
||||
outputStream.writeLong(fileEntry.getTimestamp());
|
||||
outputStream.writeInt(fileEntry.getSizeOfKey());
|
||||
@@ -107,15 +104,10 @@ public class SpillableMapUtils {
|
||||
* Utility method to convert bytes to HoodieRecord using schema and payload class
|
||||
*/
|
||||
public static <R> R convertToHoodieRecordPayload(GenericRecord rec, String payloadClazz) {
|
||||
String recKey = rec.get(HoodieRecord.RECORD_KEY_METADATA_FIELD)
|
||||
.toString();
|
||||
String partitionPath =
|
||||
rec.get(HoodieRecord.PARTITION_PATH_METADATA_FIELD)
|
||||
.toString();
|
||||
HoodieRecord<? extends HoodieRecordPayload> hoodieRecord = new HoodieRecord<>(
|
||||
new HoodieKey(recKey, partitionPath),
|
||||
ReflectionUtils
|
||||
.loadPayload(payloadClazz, new Object[]{Option.of(rec)}, Option.class));
|
||||
String recKey = rec.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
|
||||
String partitionPath = rec.get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString();
|
||||
HoodieRecord<? extends HoodieRecordPayload> hoodieRecord = new HoodieRecord<>(new HoodieKey(recKey, partitionPath),
|
||||
ReflectionUtils.loadPayload(payloadClazz, new Object[] {Option.of(rec)}, Option.class));
|
||||
return (R) hoodieRecord;
|
||||
}
|
||||
|
||||
@@ -123,10 +115,8 @@ public class SpillableMapUtils {
|
||||
* Utility method to convert bytes to HoodieRecord using schema and payload class
|
||||
*/
|
||||
public static <R> R generateEmptyPayload(String recKey, String partitionPath, String payloadClazz) {
|
||||
HoodieRecord<? extends HoodieRecordPayload> hoodieRecord = new HoodieRecord<>(
|
||||
new HoodieKey(recKey, partitionPath),
|
||||
ReflectionUtils
|
||||
.loadPayload(payloadClazz, new Object[]{Option.empty()}, Option.class));
|
||||
HoodieRecord<? extends HoodieRecordPayload> hoodieRecord = new HoodieRecord<>(new HoodieKey(recKey, partitionPath),
|
||||
ReflectionUtils.loadPayload(payloadClazz, new Object[] {Option.empty()}, Option.class));
|
||||
return (R) hoodieRecord;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -24,12 +24,14 @@ package org.apache.hudi.common.util;
|
||||
public class StringUtils {
|
||||
|
||||
/**
|
||||
* <p>Joins the elements of the provided array into a single String
|
||||
* containing the provided list of elements.</p>
|
||||
* <p>
|
||||
* Joins the elements of the provided array into a single String containing the provided list of elements.
|
||||
* </p>
|
||||
*
|
||||
* <p>No separator is added to the joined String.
|
||||
* Null objects or empty strings within the array are represented by
|
||||
* empty strings.</p>
|
||||
* <p>
|
||||
* No separator is added to the joined String. Null objects or empty strings within the array are represented by empty
|
||||
* strings.
|
||||
* </p>
|
||||
*
|
||||
* <pre>
|
||||
* StringUtils.join(null) = null
|
||||
@@ -56,7 +58,7 @@ public class StringUtils {
|
||||
|
||||
public static String toHexString(byte[] bytes) {
|
||||
StringBuilder sb = new StringBuilder(bytes.length * 2);
|
||||
for (byte b: bytes) {
|
||||
for (byte b : bytes) {
|
||||
sb.append(String.format("%02x", b));
|
||||
}
|
||||
return sb.toString();
|
||||
|
||||
@@ -55,19 +55,19 @@ public class TimelineDiffHelper {
|
||||
|
||||
// Check If any pending compaction is lost. If so, do not allow incremental timeline sync
|
||||
List<Pair<HoodieInstant, HoodieInstant>> compactionInstants = getPendingCompactionTransitions(oldT, newT);
|
||||
List<HoodieInstant> lostPendingCompactions =
|
||||
compactionInstants.stream().filter(instantPair -> instantPair.getValue() == null).map(Pair::getKey)
|
||||
.collect(Collectors.toList());
|
||||
List<HoodieInstant> lostPendingCompactions = compactionInstants.stream()
|
||||
.filter(instantPair -> instantPair.getValue() == null).map(Pair::getKey).collect(Collectors.toList());
|
||||
if (!lostPendingCompactions.isEmpty()) {
|
||||
// If a compaction is unscheduled, fall back to complete refresh of fs view since some log files could have been
|
||||
// moved. Its unsafe to incrementally sync in that case.
|
||||
log.warn("Some pending compactions are no longer in new timeline (unscheduled ?)."
|
||||
+ "They are :" + lostPendingCompactions);
|
||||
log.warn("Some pending compactions are no longer in new timeline (unscheduled ?)." + "They are :"
|
||||
+ lostPendingCompactions);
|
||||
return TimelineDiffResult.UNSAFE_SYNC_RESULT;
|
||||
}
|
||||
List<HoodieInstant> finishedCompactionInstants = compactionInstants.stream().filter(instantPair ->
|
||||
instantPair.getValue().getAction().equals(HoodieTimeline.COMMIT_ACTION)
|
||||
&& instantPair.getValue().isCompleted()).map(Pair::getKey).collect(Collectors.toList());
|
||||
List<HoodieInstant> finishedCompactionInstants = compactionInstants.stream()
|
||||
.filter(instantPair -> instantPair.getValue().getAction().equals(HoodieTimeline.COMMIT_ACTION)
|
||||
&& instantPair.getValue().isCompleted())
|
||||
.map(Pair::getKey).collect(Collectors.toList());
|
||||
|
||||
newT.getInstants().filter(instant -> !oldTimelineInstants.contains(instant)).forEach(newInstants::add);
|
||||
return new TimelineDiffResult(newInstants, finishedCompactionInstants, true);
|
||||
@@ -125,11 +125,8 @@ public class TimelineDiffHelper {
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "TimelineDiffResult{"
|
||||
+ "newlySeenInstants=" + newlySeenInstants
|
||||
+ ", finishedCompactionInstants=" + finishedCompactionInstants
|
||||
+ ", canSyncIncrementally=" + canSyncIncrementally
|
||||
+ '}';
|
||||
return "TimelineDiffResult{" + "newlySeenInstants=" + newlySeenInstants + ", finishedCompactionInstants="
|
||||
+ finishedCompactionInstants + ", canSyncIncrementally=" + canSyncIncrementally + '}';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -83,9 +83,10 @@ public final class DiskBasedMap<T extends Serializable, R extends Serializable>
|
||||
|
||||
/**
|
||||
* RandomAcessFile is not thread-safe. This API opens a new file handle per thread and returns.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
private RandomAccessFile getRandomAccessFile() {
|
||||
private RandomAccessFile getRandomAccessFile() {
|
||||
try {
|
||||
RandomAccessFile readHandle = randomAccessFile.get();
|
||||
if (readHandle == null) {
|
||||
@@ -109,9 +110,9 @@ public final class DiskBasedMap<T extends Serializable, R extends Serializable>
|
||||
writeOnlyFile.getParentFile().mkdir();
|
||||
}
|
||||
writeOnlyFile.createNewFile();
|
||||
log.info(
|
||||
"Spilling to file location " + writeOnlyFile.getAbsolutePath() + " in host (" + InetAddress.getLocalHost()
|
||||
.getHostAddress() + ") with hostname (" + InetAddress.getLocalHost().getHostName() + ")");
|
||||
log.info("Spilling to file location " + writeOnlyFile.getAbsolutePath() + " in host ("
|
||||
+ InetAddress.getLocalHost().getHostAddress() + ") with hostname (" + InetAddress.getLocalHost().getHostName()
|
||||
+ ")");
|
||||
// Make sure file is deleted when JVM exits
|
||||
writeOnlyFile.deleteOnExit();
|
||||
addShutDownHook();
|
||||
@@ -200,8 +201,8 @@ public final class DiskBasedMap<T extends Serializable, R extends Serializable>
|
||||
|
||||
public static <R> R get(ValueMetadata entry, RandomAccessFile file) {
|
||||
try {
|
||||
return SerializationUtils.deserialize(SpillableMapUtils.readBytesFromDisk(file,
|
||||
entry.getOffsetOfValue(), entry.getSizeOfValue()));
|
||||
return SerializationUtils
|
||||
.deserialize(SpillableMapUtils.readBytesFromDisk(file, entry.getOffsetOfValue(), entry.getSizeOfValue()));
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Unable to readFromDisk Hoodie Record from disk", e);
|
||||
}
|
||||
@@ -216,8 +217,8 @@ public final class DiskBasedMap<T extends Serializable, R extends Serializable>
|
||||
this.valueMetadataMap.put(key,
|
||||
new DiskBasedMap.ValueMetadata(this.filePath, valueSize, filePosition.get(), timestamp));
|
||||
byte[] serializedKey = SerializationUtils.serialize(key);
|
||||
filePosition.set(SpillableMapUtils.spillToDisk(writeOnlyFileHandle,
|
||||
new FileEntry(SpillableMapUtils.generateChecksum(val),
|
||||
filePosition
|
||||
.set(SpillableMapUtils.spillToDisk(writeOnlyFileHandle, new FileEntry(SpillableMapUtils.generateChecksum(val),
|
||||
serializedKey.length, valueSize, serializedKey, val, timestamp)));
|
||||
} catch (IOException io) {
|
||||
throw new HoodieIOException("Unable to store data in Disk Based map", io);
|
||||
@@ -258,8 +259,7 @@ public final class DiskBasedMap<T extends Serializable, R extends Serializable>
|
||||
|
||||
public Stream<R> valueStream() {
|
||||
final RandomAccessFile file = getRandomAccessFile();
|
||||
return valueMetadataMap.values().stream().sorted().sequential()
|
||||
.map(valueMetaData -> (R)get(valueMetaData, file));
|
||||
return valueMetadataMap.values().stream().sorted().sequential().map(valueMetaData -> (R) get(valueMetaData, file));
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -286,8 +286,7 @@ public final class DiskBasedMap<T extends Serializable, R extends Serializable>
|
||||
// Current timestamp when the value was written to disk
|
||||
private Long timestamp;
|
||||
|
||||
public FileEntry(long crc, int sizeOfKey, int sizeOfValue, byte[] key, byte[] value,
|
||||
long timestamp) {
|
||||
public FileEntry(long crc, int sizeOfKey, int sizeOfValue, byte[] key, byte[] value, long timestamp) {
|
||||
this.crc = crc;
|
||||
this.sizeOfKey = sizeOfKey;
|
||||
this.sizeOfValue = sizeOfValue;
|
||||
|
||||
@@ -36,13 +36,19 @@ import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
/**
|
||||
* An external map that spills content to disk when there is insufficient space for it to grow. <p> This map holds 2
|
||||
* types of data structures : <p> (1) Key-Value pairs in a in-memory map (2) Key-ValueMetadata pairs in an in-memory map
|
||||
* which keeps a marker to the values spilled to disk <p> NOTE : Values are only appended to disk. If a remove() is
|
||||
* called, the entry is marked removed from the in-memory key-valueMetadata map but it's values will be lying around in
|
||||
* the temp file on disk until the file is cleaned. <p> The setting of the spill threshold faces the following
|
||||
* trade-off: If the spill threshold is too high, the in-memory map may occupy more memory than is available, resulting
|
||||
* in OOM. However, if the spill threshold is too low, we spill frequently and incur unnecessary disk writes.
|
||||
* An external map that spills content to disk when there is insufficient space for it to grow.
|
||||
* <p>
|
||||
* This map holds 2 types of data structures :
|
||||
* <p>
|
||||
* (1) Key-Value pairs in a in-memory map (2) Key-ValueMetadata pairs in an in-memory map which keeps a marker to the
|
||||
* values spilled to disk
|
||||
* <p>
|
||||
* NOTE : Values are only appended to disk. If a remove() is called, the entry is marked removed from the in-memory
|
||||
* key-valueMetadata map but it's values will be lying around in the temp file on disk until the file is cleaned.
|
||||
* <p>
|
||||
* The setting of the spill threshold faces the following trade-off: If the spill threshold is too high, the in-memory
|
||||
* map may occupy more memory than is available, resulting in OOM. However, if the spill threshold is too low, we spill
|
||||
* frequently and incur unnecessary disk writes.
|
||||
*/
|
||||
public class ExternalSpillableMap<T extends Serializable, R extends Serializable> implements Map<T, R> {
|
||||
|
||||
@@ -70,14 +76,13 @@ public class ExternalSpillableMap<T extends Serializable, R extends Serializable
|
||||
private boolean shouldEstimatePayloadSize = true;
|
||||
// Base File Path
|
||||
private final String baseFilePath;
|
||||
|
||||
public ExternalSpillableMap(Long maxInMemorySizeInBytes, String baseFilePath,
|
||||
SizeEstimator<T> keySizeEstimator, SizeEstimator<R> valueSizeEstimator) throws IOException {
|
||||
|
||||
public ExternalSpillableMap(Long maxInMemorySizeInBytes, String baseFilePath, SizeEstimator<T> keySizeEstimator,
|
||||
SizeEstimator<R> valueSizeEstimator) throws IOException {
|
||||
this.inMemoryMap = new HashMap<>();
|
||||
this.baseFilePath = baseFilePath;
|
||||
this.diskBasedMap = new DiskBasedMap<>(baseFilePath);
|
||||
this.maxInMemorySizeInBytes = (long) Math
|
||||
.floor(maxInMemorySizeInBytes * sizingFactorForInMemoryMap);
|
||||
this.maxInMemorySizeInBytes = (long) Math.floor(maxInMemorySizeInBytes * sizingFactorForInMemoryMap);
|
||||
this.currentInMemoryMapSize = 0L;
|
||||
this.keySizeEstimator = keySizeEstimator;
|
||||
this.valueSizeEstimator = valueSizeEstimator;
|
||||
@@ -169,11 +174,9 @@ public class ExternalSpillableMap<T extends Serializable, R extends Serializable
|
||||
if (shouldEstimatePayloadSize && estimatedPayloadSize == 0) {
|
||||
// At first, use the sizeEstimate of a record being inserted into the spillable map.
|
||||
// Note, the converter may over estimate the size of a record in the JVM
|
||||
this.estimatedPayloadSize =
|
||||
keySizeEstimator.sizeEstimate(key) + valueSizeEstimator.sizeEstimate(value);
|
||||
this.estimatedPayloadSize = keySizeEstimator.sizeEstimate(key) + valueSizeEstimator.sizeEstimate(value);
|
||||
log.info("Estimated Payload size => " + estimatedPayloadSize);
|
||||
} else if (shouldEstimatePayloadSize
|
||||
&& inMemoryMap.size() % NUMBER_OF_RECORDS_TO_ESTIMATE_PAYLOAD_SIZE == 0) {
|
||||
} else if (shouldEstimatePayloadSize && inMemoryMap.size() % NUMBER_OF_RECORDS_TO_ESTIMATE_PAYLOAD_SIZE == 0) {
|
||||
// Re-estimate the size of a record by calculating the size of the entire map containing
|
||||
// N entries and then dividing by the number of entries present (N). This helps to get a
|
||||
// correct estimation of the size of each record in the JVM.
|
||||
|
||||
@@ -20,14 +20,19 @@ package org.apache.hudi.common.util.collection;
|
||||
|
||||
/**
|
||||
* (NOTE: Adapted from Apache commons-lang3)
|
||||
* <p>An immutable pair consisting of two {@code Object} elements.</p>
|
||||
* <p>
|
||||
* An immutable pair consisting of two {@code Object} elements.
|
||||
* </p>
|
||||
*
|
||||
* <p>Although the implementation is immutable, there is no restriction on the objects
|
||||
* that may be stored. If mutable objects are stored in the pair, then the pair
|
||||
* itself effectively becomes mutable. The class is also {@code final}, so a subclass
|
||||
* can not add undesirable behaviour.</p>
|
||||
* <p>
|
||||
* Although the implementation is immutable, there is no restriction on the objects that may be stored. If mutable
|
||||
* objects are stored in the pair, then the pair itself effectively becomes mutable. The class is also {@code final}, so
|
||||
* a subclass can not add undesirable behaviour.
|
||||
* </p>
|
||||
*
|
||||
* <p>#ThreadSafe# if both paired objects are thread-safe</p>
|
||||
* <p>
|
||||
* #ThreadSafe# if both paired objects are thread-safe
|
||||
* </p>
|
||||
*
|
||||
* @param <L> the left element type
|
||||
* @param <R> the right element type
|
||||
@@ -49,10 +54,13 @@ public final class ImmutablePair<L, R> extends Pair<L, R> {
|
||||
public final R right;
|
||||
|
||||
/**
|
||||
* <p>Obtains an immutable pair of from two objects inferring the generic types.</p>
|
||||
* <p>
|
||||
* Obtains an immutable pair of from two objects inferring the generic types.
|
||||
* </p>
|
||||
*
|
||||
* <p>This factory allows the pair to be created using inference to
|
||||
* obtain the generic types.</p>
|
||||
* <p>
|
||||
* This factory allows the pair to be created using inference to obtain the generic types.
|
||||
* </p>
|
||||
*
|
||||
* @param <L> the left element type
|
||||
* @param <R> the right element type
|
||||
@@ -76,7 +84,7 @@ public final class ImmutablePair<L, R> extends Pair<L, R> {
|
||||
this.right = right;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
@@ -95,9 +103,13 @@ public final class ImmutablePair<L, R> extends Pair<L, R> {
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Throws {@code UnsupportedOperationException}.</p>
|
||||
* <p>
|
||||
* Throws {@code UnsupportedOperationException}.
|
||||
* </p>
|
||||
*
|
||||
* <p>This pair is immutable, so this operation is not supported.</p>
|
||||
* <p>
|
||||
* This pair is immutable, so this operation is not supported.
|
||||
* </p>
|
||||
*
|
||||
* @param value the value to set
|
||||
* @return never
|
||||
|
||||
@@ -20,14 +20,19 @@ package org.apache.hudi.common.util.collection;
|
||||
|
||||
/**
|
||||
* (NOTE: Adapted from Apache commons-lang3)
|
||||
* <p>An immutable triple consisting of three {@code Object} elements.</p>
|
||||
* <p>
|
||||
* An immutable triple consisting of three {@code Object} elements.
|
||||
* </p>
|
||||
*
|
||||
* <p>Although the implementation is immutable, there is no restriction on the objects
|
||||
* that may be stored. If mutable objects are stored in the triple, then the triple
|
||||
* itself effectively becomes mutable. The class is also {@code final}, so a subclass
|
||||
* can not add undesirable behaviour.</p>
|
||||
* <p>
|
||||
* Although the implementation is immutable, there is no restriction on the objects that may be stored. If mutable
|
||||
* objects are stored in the triple, then the triple itself effectively becomes mutable. The class is also
|
||||
* {@code final}, so a subclass can not add undesirable behaviour.
|
||||
* </p>
|
||||
*
|
||||
* <p>#ThreadSafe# if all three objects are thread-safe</p>
|
||||
* <p>
|
||||
* #ThreadSafe# if all three objects are thread-safe
|
||||
* </p>
|
||||
*
|
||||
* @param <L> the left element type
|
||||
* @param <M> the middle element type
|
||||
@@ -54,10 +59,13 @@ public final class ImmutableTriple<L, M, R> extends Triple<L, M, R> {
|
||||
public final R right;
|
||||
|
||||
/**
|
||||
* <p>Obtains an immutable triple of from three objects inferring the generic types.</p>
|
||||
* <p>
|
||||
* Obtains an immutable triple of from three objects inferring the generic types.
|
||||
* </p>
|
||||
*
|
||||
* <p>This factory allows the triple to be created using inference to
|
||||
* obtain the generic types.</p>
|
||||
* <p>
|
||||
* This factory allows the triple to be created using inference to obtain the generic types.
|
||||
* </p>
|
||||
*
|
||||
* @param <L> the left element type
|
||||
* @param <M> the middle element type
|
||||
@@ -85,7 +93,7 @@ public final class ImmutableTriple<L, M, R> extends Triple<L, M, R> {
|
||||
this.right = right;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
|
||||
@@ -66,12 +66,9 @@ public class LazyFileIterable<T, R> implements Iterable<R> {
|
||||
readOnlyFileHandle.seek(0);
|
||||
|
||||
// sort the map in increasing order of offset of value so disk seek is only in one(forward) direction
|
||||
this.metadataIterator = map
|
||||
.entrySet()
|
||||
.stream()
|
||||
.sorted(
|
||||
(Map.Entry<T, DiskBasedMap.ValueMetadata> o1, Map.Entry<T, DiskBasedMap.ValueMetadata> o2) ->
|
||||
o1.getValue().getOffsetOfValue().compareTo(o2.getValue().getOffsetOfValue()))
|
||||
this.metadataIterator = map.entrySet().stream()
|
||||
.sorted((Map.Entry<T, DiskBasedMap.ValueMetadata> o1, Map.Entry<T, DiskBasedMap.ValueMetadata> o2) -> o1
|
||||
.getValue().getOffsetOfValue().compareTo(o2.getValue().getOffsetOfValue()))
|
||||
.collect(Collectors.toList()).iterator();
|
||||
this.addShutdownHook();
|
||||
}
|
||||
|
||||
@@ -23,15 +23,20 @@ import java.util.Map;
|
||||
|
||||
/**
|
||||
* (NOTE: Adapted from Apache commons-lang3)
|
||||
* <p>A pair consisting of two elements.</p>
|
||||
* <p>
|
||||
* A pair consisting of two elements.
|
||||
* </p>
|
||||
*
|
||||
* <p>This class is an abstract implementation defining the basic API.
|
||||
* It refers to the elements as 'left' and 'right'. It also implements the
|
||||
* {@code Map.Entry} interface where the key is 'left' and the value is 'right'.</p>
|
||||
* <p>
|
||||
* This class is an abstract implementation defining the basic API. It refers to the elements as 'left' and 'right'. It
|
||||
* also implements the {@code Map.Entry} interface where the key is 'left' and the value is 'right'.
|
||||
* </p>
|
||||
*
|
||||
* <p>Subclass implementations may be mutable or immutable.
|
||||
* However, there is no restriction on the type of the stored objects that may be stored.
|
||||
* If mutable objects are stored in the pair, then the pair itself effectively becomes mutable.</p>
|
||||
* <p>
|
||||
* Subclass implementations may be mutable or immutable. However, there is no restriction on the type of the stored
|
||||
* objects that may be stored. If mutable objects are stored in the pair, then the pair itself effectively becomes
|
||||
* mutable.
|
||||
* </p>
|
||||
*
|
||||
* @param <L> the left element type
|
||||
* @param <R> the right element type
|
||||
@@ -44,10 +49,13 @@ public abstract class Pair<L, R> implements Map.Entry<L, R>, Comparable<Pair<L,
|
||||
private static final long serialVersionUID = 4954918890077093841L;
|
||||
|
||||
/**
|
||||
* <p>Obtains an immutable pair of from two objects inferring the generic types.</p>
|
||||
* <p>
|
||||
* Obtains an immutable pair of from two objects inferring the generic types.
|
||||
* </p>
|
||||
*
|
||||
* <p>This factory allows the pair to be created using inference to
|
||||
* obtain the generic types.</p>
|
||||
* <p>
|
||||
* This factory allows the pair to be created using inference to obtain the generic types.
|
||||
* </p>
|
||||
*
|
||||
* @param <L> the left element type
|
||||
* @param <R> the right element type
|
||||
@@ -59,31 +67,42 @@ public abstract class Pair<L, R> implements Map.Entry<L, R>, Comparable<Pair<L,
|
||||
return new ImmutablePair<>(left, right);
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* <p>Gets the left element from this pair.</p>
|
||||
* <p>
|
||||
* Gets the left element from this pair.
|
||||
* </p>
|
||||
*
|
||||
* <p>When treated as a key-value pair, this is the key.</p>
|
||||
* <p>
|
||||
* When treated as a key-value pair, this is the key.
|
||||
* </p>
|
||||
*
|
||||
* @return the left element, may be null
|
||||
*/
|
||||
public abstract L getLeft();
|
||||
|
||||
/**
|
||||
* <p>Gets the right element from this pair.</p>
|
||||
* <p>
|
||||
* Gets the right element from this pair.
|
||||
* </p>
|
||||
*
|
||||
* <p>When treated as a key-value pair, this is the value.</p>
|
||||
* <p>
|
||||
* When treated as a key-value pair, this is the value.
|
||||
* </p>
|
||||
*
|
||||
* @return the right element, may be null
|
||||
*/
|
||||
public abstract R getRight();
|
||||
|
||||
/**
|
||||
* <p>Gets the key from this pair.</p>
|
||||
* <p>
|
||||
* Gets the key from this pair.
|
||||
* </p>
|
||||
*
|
||||
* <p>This method implements the {@code Map.Entry} interface returning the
|
||||
* left element as the key.</p>
|
||||
* <p>
|
||||
* This method implements the {@code Map.Entry} interface returning the left element as the key.
|
||||
* </p>
|
||||
*
|
||||
* @return the left element as the key, may be null
|
||||
*/
|
||||
@@ -93,10 +112,13 @@ public abstract class Pair<L, R> implements Map.Entry<L, R>, Comparable<Pair<L,
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Gets the value from this pair.</p>
|
||||
* <p>
|
||||
* Gets the value from this pair.
|
||||
* </p>
|
||||
*
|
||||
* <p>This method implements the {@code Map.Entry} interface returning the
|
||||
* right element as the value.</p>
|
||||
* <p>
|
||||
* This method implements the {@code Map.Entry} interface returning the right element as the value.
|
||||
* </p>
|
||||
*
|
||||
* @return the right element as the value, may be null
|
||||
*/
|
||||
@@ -105,11 +127,12 @@ public abstract class Pair<L, R> implements Map.Entry<L, R>, Comparable<Pair<L,
|
||||
return getRight();
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* <p>Compares the pair based on the left element followed by the right element.
|
||||
* The types must be {@code Comparable}.</p>
|
||||
* <p>
|
||||
* Compares the pair based on the left element followed by the right element. The types must be {@code Comparable}.
|
||||
* </p>
|
||||
*
|
||||
* @param other the other pair, not null
|
||||
* @return negative if this is less, zero if equal, positive if greater
|
||||
@@ -133,7 +156,9 @@ public abstract class Pair<L, R> implements Map.Entry<L, R>, Comparable<Pair<L,
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Compares this pair to another based on the two elements.</p>
|
||||
* <p>
|
||||
* Compares this pair to another based on the two elements.
|
||||
* </p>
|
||||
*
|
||||
* @param obj the object to compare to, null returns false
|
||||
* @return true if the elements of the pair are equal
|
||||
@@ -145,27 +170,28 @@ public abstract class Pair<L, R> implements Map.Entry<L, R>, Comparable<Pair<L,
|
||||
}
|
||||
if (obj instanceof Map.Entry<?, ?>) {
|
||||
final Map.Entry<?, ?> other = (Map.Entry<?, ?>) obj;
|
||||
return getKey().equals(other.getKey())
|
||||
&& getValue().equals(other.getValue());
|
||||
return getKey().equals(other.getKey()) && getValue().equals(other.getValue());
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Returns a suitable hash code.
|
||||
* The hash code follows the definition in {@code Map.Entry}.</p>
|
||||
* <p>
|
||||
* Returns a suitable hash code. The hash code follows the definition in {@code Map.Entry}.
|
||||
* </p>
|
||||
*
|
||||
* @return the hash code
|
||||
*/
|
||||
@Override
|
||||
public int hashCode() {
|
||||
// see Map.Entry API specification
|
||||
return (getKey() == null ? 0 : getKey().hashCode())
|
||||
^ (getValue() == null ? 0 : getValue().hashCode());
|
||||
return (getKey() == null ? 0 : getKey().hashCode()) ^ (getValue() == null ? 0 : getValue().hashCode());
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Returns a String representation of this pair using the format {@code ($left,$right)}.</p>
|
||||
* <p>
|
||||
* Returns a String representation of this pair using the format {@code ($left,$right)}.
|
||||
* </p>
|
||||
*
|
||||
* @return a string describing this object, not null
|
||||
*/
|
||||
@@ -175,12 +201,15 @@ public abstract class Pair<L, R> implements Map.Entry<L, R>, Comparable<Pair<L,
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Formats the receiver using the given format.</p>
|
||||
* <p>
|
||||
* Formats the receiver using the given format.
|
||||
* </p>
|
||||
*
|
||||
* <p>This uses {@link java.util.Formattable} to perform the formatting. Two variables may
|
||||
* be used to embed the left and right elements. Use {@code %1$s} for the left
|
||||
* element (key) and {@code %2$s} for the right element (value).
|
||||
* The default format used by {@code toString()} is {@code (%1$s,%2$s)}.</p>
|
||||
* <p>
|
||||
* This uses {@link java.util.Formattable} to perform the formatting. Two variables may be used to embed the left and
|
||||
* right elements. Use {@code %1$s} for the left element (key) and {@code %2$s} for the right element (value). The
|
||||
* default format used by {@code toString()} is {@code (%1$s,%2$s)}.
|
||||
* </p>
|
||||
*
|
||||
* @param format the format string, optionally containing {@code %1$s} and {@code %2$s}, not null
|
||||
* @return the formatted string, not null
|
||||
|
||||
@@ -41,7 +41,7 @@ public final class RocksDBBasedMap<K extends Serializable, R extends Serializabl
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return (int)getRocksDBDAO().prefixSearch(columnFamilyName, "").count();
|
||||
return (int) getRocksDBDAO().prefixSearch(columnFamilyName, "").count();
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -62,7 +62,7 @@ public final class RocksDBBasedMap<K extends Serializable, R extends Serializabl
|
||||
|
||||
@Override
|
||||
public R get(Object key) {
|
||||
return getRocksDBDAO().get(columnFamilyName, (Serializable)key);
|
||||
return getRocksDBDAO().get(columnFamilyName, (Serializable) key);
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -119,7 +119,6 @@ public final class RocksDBBasedMap<K extends Serializable, R extends Serializabl
|
||||
}
|
||||
|
||||
public Iterator<R> iterator() {
|
||||
return getRocksDBDAO().prefixSearch(columnFamilyName, "")
|
||||
.map(p -> (R)(p.getValue())).iterator();
|
||||
return getRocksDBDAO().prefixSearch(columnFamilyName, "").map(p -> (R) (p.getValue())).iterator();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,14 +22,20 @@ import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* (NOTE: Adapted from Apache commons-lang3)
|
||||
* <p>A triple consisting of three elements.</p>
|
||||
* <p>
|
||||
* A triple consisting of three elements.
|
||||
* </p>
|
||||
*
|
||||
* <p>This class is an abstract implementation defining the basic API.
|
||||
* It refers to the elements as 'left', 'middle' and 'right'.</p>
|
||||
* <p>
|
||||
* This class is an abstract implementation defining the basic API. It refers to the elements as 'left', 'middle' and
|
||||
* 'right'.
|
||||
* </p>
|
||||
*
|
||||
* <p>Subclass implementations may be mutable or immutable.
|
||||
* However, there is no restriction on the type of the stored objects that may be stored.
|
||||
* If mutable objects are stored in the triple, then the triple itself effectively becomes mutable.</p>
|
||||
* <p>
|
||||
* Subclass implementations may be mutable or immutable. However, there is no restriction on the type of the stored
|
||||
* objects that may be stored. If mutable objects are stored in the triple, then the triple itself effectively becomes
|
||||
* mutable.
|
||||
* </p>
|
||||
*
|
||||
* @param <L> the left element type
|
||||
* @param <M> the middle element type
|
||||
@@ -43,10 +49,13 @@ public abstract class Triple<L, M, R> implements Comparable<Triple<L, M, R>>, Se
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
/**
|
||||
* <p>Obtains an immutable triple of from three objects inferring the generic types.</p>
|
||||
* <p>
|
||||
* Obtains an immutable triple of from three objects inferring the generic types.
|
||||
* </p>
|
||||
*
|
||||
* <p>This factory allows the triple to be created using inference to
|
||||
* obtain the generic types.</p>
|
||||
* <p>
|
||||
* This factory allows the triple to be created using inference to obtain the generic types.
|
||||
* </p>
|
||||
*
|
||||
* @param <L> the left element type
|
||||
* @param <M> the middle element type
|
||||
@@ -60,35 +69,42 @@ public abstract class Triple<L, M, R> implements Comparable<Triple<L, M, R>>, Se
|
||||
return new ImmutableTriple<L, M, R>(left, middle, right);
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* <p>Gets the left element from this triple.</p>
|
||||
* <p>
|
||||
* Gets the left element from this triple.
|
||||
* </p>
|
||||
*
|
||||
* @return the left element, may be null
|
||||
*/
|
||||
public abstract L getLeft();
|
||||
|
||||
/**
|
||||
* <p>Gets the middle element from this triple.</p>
|
||||
* <p>
|
||||
* Gets the middle element from this triple.
|
||||
* </p>
|
||||
*
|
||||
* @return the middle element, may be null
|
||||
*/
|
||||
public abstract M getMiddle();
|
||||
|
||||
/**
|
||||
* <p>Gets the right element from this triple.</p>
|
||||
* <p>
|
||||
* Gets the right element from this triple.
|
||||
* </p>
|
||||
*
|
||||
* @return the right element, may be null
|
||||
*/
|
||||
public abstract R getRight();
|
||||
|
||||
//-----------------------------------------------------------------------
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* <p>Compares the triple based on the left element, followed by the middle element,
|
||||
* finally the right element.
|
||||
* The types must be {@code Comparable}.</p>
|
||||
* <p>
|
||||
* Compares the triple based on the left element, followed by the middle element, finally the right element. The types
|
||||
* must be {@code Comparable}.
|
||||
* </p>
|
||||
*
|
||||
* @param other the other triple, not null
|
||||
* @return negative if this is less, zero if equal, positive if greater
|
||||
@@ -109,7 +125,9 @@ public abstract class Triple<L, M, R> implements Comparable<Triple<L, M, R>>, Se
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Compares this triple to another based on the three elements.</p>
|
||||
* <p>
|
||||
* Compares this triple to another based on the three elements.
|
||||
* </p>
|
||||
*
|
||||
* @param obj the object to compare to, null returns false
|
||||
* @return true if the elements of the triple are equal
|
||||
@@ -122,27 +140,29 @@ public abstract class Triple<L, M, R> implements Comparable<Triple<L, M, R>>, Se
|
||||
}
|
||||
if (obj instanceof Triple<?, ?, ?>) {
|
||||
final Triple<?, ?, ?> other = (Triple<?, ?, ?>) obj;
|
||||
return getLeft().equals(other.getLeft())
|
||||
&& getMiddle().equals(other.getMiddle())
|
||||
return getLeft().equals(other.getLeft()) && getMiddle().equals(other.getMiddle())
|
||||
&& getRight().equals(other.getRight());
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Returns a suitable hash code.</p>
|
||||
* <p>
|
||||
* Returns a suitable hash code.
|
||||
* </p>
|
||||
*
|
||||
* @return the hash code
|
||||
*/
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return (getLeft() == null ? 0 : getLeft().hashCode())
|
||||
^ (getMiddle() == null ? 0 : getMiddle().hashCode())
|
||||
return (getLeft() == null ? 0 : getLeft().hashCode()) ^ (getMiddle() == null ? 0 : getMiddle().hashCode())
|
||||
^ (getRight() == null ? 0 : getRight().hashCode());
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Returns a String representation of this triple using the format {@code ($left,$middle,$right)}.</p>
|
||||
* <p>
|
||||
* Returns a String representation of this triple using the format {@code ($left,$middle,$right)}.
|
||||
* </p>
|
||||
*
|
||||
* @return a string describing this object, not null
|
||||
*/
|
||||
@@ -153,12 +173,15 @@ public abstract class Triple<L, M, R> implements Comparable<Triple<L, M, R>>, Se
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Formats the receiver using the given format.</p>
|
||||
* <p>
|
||||
* Formats the receiver using the given format.
|
||||
* </p>
|
||||
*
|
||||
* <p>This uses {@link java.util.Formattable} to perform the formatting. Three variables may
|
||||
* be used to embed the left and right elements. Use {@code %1$s} for the left
|
||||
* element, {@code %2$s} for the middle and {@code %3$s} for the right element.
|
||||
* The default format used by {@code toString()} is {@code (%1$s,%2$s,%3$s)}.</p>
|
||||
* <p>
|
||||
* This uses {@link java.util.Formattable} to perform the formatting. Three variables may be used to embed the left
|
||||
* and right elements. Use {@code %1$s} for the left element, {@code %2$s} for the middle and {@code %3$s} for the
|
||||
* right element. The default format used by {@code toString()} is {@code (%1$s,%2$s,%3$s)}.
|
||||
* </p>
|
||||
*
|
||||
* @param format the format string, optionally containing {@code %1$s}, {@code %2$s} and {@code %3$s}, not null
|
||||
* @return the formatted string, not null
|
||||
|
||||
@@ -37,9 +37,9 @@ import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
/**
|
||||
* Executor which orchestrates concurrent producers and consumers communicating through a bounded in-memory queue.
|
||||
* This class takes as input the size limit, queue producer(s), consumer and transformer
|
||||
* and exposes API to orchestrate concurrent execution of these actors communicating through a central bounded queue
|
||||
* Executor which orchestrates concurrent producers and consumers communicating through a bounded in-memory queue. This
|
||||
* class takes as input the size limit, queue producer(s), consumer and transformer and exposes API to orchestrate
|
||||
* concurrent execution of these actors communicating through a central bounded queue
|
||||
*/
|
||||
public class BoundedInMemoryExecutor<I, O, E> {
|
||||
|
||||
@@ -54,17 +54,13 @@ public class BoundedInMemoryExecutor<I, O, E> {
|
||||
// Consumer
|
||||
private final Option<BoundedInMemoryQueueConsumer<O, E>> consumer;
|
||||
|
||||
public BoundedInMemoryExecutor(final long bufferLimitInBytes,
|
||||
BoundedInMemoryQueueProducer<I> producer,
|
||||
Option<BoundedInMemoryQueueConsumer<O, E>> consumer,
|
||||
final Function<I, O> transformFunction) {
|
||||
public BoundedInMemoryExecutor(final long bufferLimitInBytes, BoundedInMemoryQueueProducer<I> producer,
|
||||
Option<BoundedInMemoryQueueConsumer<O, E>> consumer, final Function<I, O> transformFunction) {
|
||||
this(bufferLimitInBytes, Arrays.asList(producer), consumer, transformFunction, new DefaultSizeEstimator<>());
|
||||
}
|
||||
|
||||
public BoundedInMemoryExecutor(final long bufferLimitInBytes,
|
||||
List<BoundedInMemoryQueueProducer<I>> producers,
|
||||
Option<BoundedInMemoryQueueConsumer<O, E>> consumer,
|
||||
final Function<I, O> transformFunction,
|
||||
public BoundedInMemoryExecutor(final long bufferLimitInBytes, List<BoundedInMemoryQueueProducer<I>> producers,
|
||||
Option<BoundedInMemoryQueueConsumer<O, E>> consumer, final Function<I, O> transformFunction,
|
||||
final SizeEstimator<O> sizeEstimator) {
|
||||
this.producers = producers;
|
||||
this.consumer = consumer;
|
||||
@@ -74,8 +70,7 @@ public class BoundedInMemoryExecutor<I, O, E> {
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback to implement environment specific behavior before executors (producers/consumer)
|
||||
* run.
|
||||
* Callback to implement environment specific behavior before executors (producers/consumer) run.
|
||||
*/
|
||||
public void preExecute() {
|
||||
// Do Nothing in general context
|
||||
@@ -118,20 +113,19 @@ public class BoundedInMemoryExecutor<I, O, E> {
|
||||
*/
|
||||
private Future<E> startConsumer() {
|
||||
return consumer.map(consumer -> {
|
||||
return executorService.submit(
|
||||
() -> {
|
||||
logger.info("starting consumer thread");
|
||||
preExecute();
|
||||
try {
|
||||
E result = consumer.consume(queue);
|
||||
logger.info("Queue Consumption is done; notifying producer threads");
|
||||
return result;
|
||||
} catch (Exception e) {
|
||||
logger.error("error consuming records", e);
|
||||
queue.markAsFailed(e);
|
||||
throw e;
|
||||
}
|
||||
});
|
||||
return executorService.submit(() -> {
|
||||
logger.info("starting consumer thread");
|
||||
preExecute();
|
||||
try {
|
||||
E result = consumer.consume(queue);
|
||||
logger.info("Queue Consumption is done; notifying producer threads");
|
||||
return result;
|
||||
} catch (Exception e) {
|
||||
logger.error("error consuming records", e);
|
||||
queue.markAsFailed(e);
|
||||
throw e;
|
||||
}
|
||||
});
|
||||
}).orElse(CompletableFuture.completedFuture(null));
|
||||
}
|
||||
|
||||
|
||||
@@ -36,12 +36,12 @@ import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
/**
|
||||
* Used for enqueueing input records. Queue limit is controlled by {@link #memoryLimit}.
|
||||
* Unlike standard bounded queue implementations, this queue bounds the size by memory bytes occupied by its
|
||||
* tenants. The standard implementation bounds by the number of entries in the queue.
|
||||
* Used for enqueueing input records. Queue limit is controlled by {@link #memoryLimit}. Unlike standard bounded queue
|
||||
* implementations, this queue bounds the size by memory bytes occupied by its tenants. The standard implementation
|
||||
* bounds by the number of entries in the queue.
|
||||
*
|
||||
* It internally samples every {@link #RECORD_SAMPLING_RATE}th record and adjusts number of records in
|
||||
* queue accordingly. This is done to ensure that we don't OOM.
|
||||
* It internally samples every {@link #RECORD_SAMPLING_RATE}th record and adjusts number of records in queue
|
||||
* accordingly. This is done to ensure that we don't OOM.
|
||||
*
|
||||
* This queue supports multiple producer single consumer pattern.
|
||||
*
|
||||
@@ -65,8 +65,7 @@ public class BoundedInMemoryQueue<I, O> implements Iterable<O> {
|
||||
// used for sampling records with "RECORD_SAMPLING_RATE" frequency.
|
||||
public final AtomicLong samplingRecordCounter = new AtomicLong(-1);
|
||||
// internal queue for records.
|
||||
private final LinkedBlockingQueue<Option<O>> queue = new
|
||||
LinkedBlockingQueue<>();
|
||||
private final LinkedBlockingQueue<Option<O>> queue = new LinkedBlockingQueue<>();
|
||||
// maximum amount of memory to be used for queueing records.
|
||||
private final long memoryLimit;
|
||||
// it holds the root cause of the exception in case either queueing records (consuming from
|
||||
@@ -96,24 +95,21 @@ public class BoundedInMemoryQueue<I, O> implements Iterable<O> {
|
||||
/**
|
||||
* Construct BoundedInMemoryQueue with default SizeEstimator
|
||||
*
|
||||
* @param memoryLimit MemoryLimit in bytes
|
||||
* @param memoryLimit MemoryLimit in bytes
|
||||
* @param transformFunction Transformer Function to convert input payload type to stored payload type
|
||||
*/
|
||||
public BoundedInMemoryQueue(final long memoryLimit, final Function<I, O> transformFunction) {
|
||||
this(memoryLimit, transformFunction, new DefaultSizeEstimator() {
|
||||
});
|
||||
this(memoryLimit, transformFunction, new DefaultSizeEstimator() {});
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct BoundedInMemoryQueue with passed in size estimator
|
||||
*
|
||||
* @param memoryLimit MemoryLimit in bytes
|
||||
* @param transformFunction Transformer Function to convert input payload type to stored payload type
|
||||
* @param memoryLimit MemoryLimit in bytes
|
||||
* @param transformFunction Transformer Function to convert input payload type to stored payload type
|
||||
* @param payloadSizeEstimator Payload Size Estimator
|
||||
*/
|
||||
public BoundedInMemoryQueue(
|
||||
final long memoryLimit,
|
||||
final Function<I, O> transformFunction,
|
||||
public BoundedInMemoryQueue(final long memoryLimit, final Function<I, O> transformFunction,
|
||||
final SizeEstimator<O> payloadSizeEstimator) {
|
||||
this.memoryLimit = memoryLimit;
|
||||
this.transformFunction = transformFunction;
|
||||
@@ -127,9 +123,9 @@ public class BoundedInMemoryQueue<I, O> implements Iterable<O> {
|
||||
}
|
||||
|
||||
/**
|
||||
* Samples records with "RECORD_SAMPLING_RATE" frequency and computes average record size in bytes. It is used
|
||||
* for determining how many maximum records to queue. Based on change in avg size it ma increase or decrease
|
||||
* available permits.
|
||||
* Samples records with "RECORD_SAMPLING_RATE" frequency and computes average record size in bytes. It is used for
|
||||
* determining how many maximum records to queue. Based on change in avg size it ma increase or decrease available
|
||||
* permits.
|
||||
*
|
||||
* @param payload Payload to size
|
||||
*/
|
||||
@@ -139,10 +135,10 @@ public class BoundedInMemoryQueue<I, O> implements Iterable<O> {
|
||||
}
|
||||
|
||||
final long recordSizeInBytes = payloadSizeEstimator.sizeEstimate(payload);
|
||||
final long newAvgRecordSizeInBytes = Math
|
||||
.max(1, (avgRecordSizeInBytes * numSamples + recordSizeInBytes) / (numSamples + 1));
|
||||
final int newRateLimit = (int) Math
|
||||
.min(RECORD_CACHING_LIMIT, Math.max(1, this.memoryLimit / newAvgRecordSizeInBytes));
|
||||
final long newAvgRecordSizeInBytes =
|
||||
Math.max(1, (avgRecordSizeInBytes * numSamples + recordSizeInBytes) / (numSamples + 1));
|
||||
final int newRateLimit =
|
||||
(int) Math.min(RECORD_CACHING_LIMIT, Math.max(1, this.memoryLimit / newAvgRecordSizeInBytes));
|
||||
|
||||
// If there is any change in number of records to cache then we will either release (if it increased) or acquire
|
||||
// (if it decreased) to adjust rate limiting to newly computed value.
|
||||
@@ -187,8 +183,8 @@ public class BoundedInMemoryQueue<I, O> implements Iterable<O> {
|
||||
}
|
||||
|
||||
/**
|
||||
* Reader interface but never exposed to outside world as this is a single consumer queue.
|
||||
* Reading is done through a singleton iterator for this queue.
|
||||
* Reader interface but never exposed to outside world as this is a single consumer queue. Reading is done through a
|
||||
* singleton iterator for this queue.
|
||||
*/
|
||||
private Option<O> readNextRecord() {
|
||||
if (this.isReadDone.get()) {
|
||||
|
||||
@@ -19,8 +19,7 @@
|
||||
package org.apache.hudi.common.util.queue;
|
||||
|
||||
/**
|
||||
* Producer for BoundedInMemoryQueue. Memory Bounded Buffer supports
|
||||
* multiple producers single consumer pattern.
|
||||
* Producer for BoundedInMemoryQueue. Memory Bounded Buffer supports multiple producers single consumer pattern.
|
||||
*
|
||||
* @param <I> Input type for buffer items produced
|
||||
*/
|
||||
|
||||
@@ -32,15 +32,13 @@ public class DefaultHoodieConfig implements Serializable {
|
||||
this.props = props;
|
||||
}
|
||||
|
||||
public static void setDefaultOnCondition(Properties props, boolean condition, String propName,
|
||||
String defaultValue) {
|
||||
public static void setDefaultOnCondition(Properties props, boolean condition, String propName, String defaultValue) {
|
||||
if (condition) {
|
||||
props.setProperty(propName, defaultValue);
|
||||
}
|
||||
}
|
||||
|
||||
public static void setDefaultOnCondition(Properties props, boolean condition,
|
||||
DefaultHoodieConfig config) {
|
||||
public static void setDefaultOnCondition(Properties props, boolean condition, DefaultHoodieConfig config) {
|
||||
if (condition) {
|
||||
props.putAll(config.getProps());
|
||||
}
|
||||
|
||||
@@ -23,7 +23,9 @@ import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
||||
/**
|
||||
* <p> Exception thrown to indicate that a hoodie dataset was not found on the path provided <p>
|
||||
* <p>
|
||||
* Exception thrown to indicate that a hoodie dataset was not found on the path provided
|
||||
* <p>
|
||||
*/
|
||||
public class DatasetNotFoundException extends HoodieException {
|
||||
|
||||
@@ -50,8 +52,7 @@ public class DatasetNotFoundException extends HoodieException {
|
||||
// if the base path is file:///, then we have a IllegalArgumentException
|
||||
throw new DatasetNotFoundException(metaPathDir.toString());
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException(
|
||||
"Could not check if dataset " + basePathDir + " is valid dataset", e);
|
||||
throw new HoodieIOException("Could not check if dataset " + basePathDir + " is valid dataset", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user