1
0

[HUDI-296] Explore use of spotless to auto fix formatting errors (#945)

- Add spotless format fixing to project
- One time reformatting for conformity
- Build fails for formatting changes and mvn spotless:apply autofixes them
This commit is contained in:
leesf
2019-10-10 20:19:40 +08:00
committed by vinoth chandar
parent 834c591955
commit b19bed442d
381 changed files with 7350 additions and 9064 deletions

View File

@@ -35,8 +35,7 @@ public class HoodieAvroWriteSupport extends AvroWriteSupport {
private String maxRecordKey;
public static final String HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY =
"org.apache.hudi.bloomfilter";
public static final String HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY = "org.apache.hudi.bloomfilter";
public static final String HOODIE_MIN_RECORD_KEY_FOOTER = "hoodie_min_record_key";
public static final String HOODIE_MAX_RECORD_KEY_FOOTER = "hoodie_max_record_key";
@@ -50,8 +49,7 @@ public class HoodieAvroWriteSupport extends AvroWriteSupport {
public WriteSupport.FinalizedWriteContext finalizeWrite() {
HashMap<String, String> extraMetaData = new HashMap<>();
if (bloomFilter != null) {
extraMetaData
.put(HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY, bloomFilter.serializeToString());
extraMetaData.put(HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY, bloomFilter.serializeToString());
if (minRecordKey != null && maxRecordKey != null) {
extraMetaData.put(HOODIE_MIN_RECORD_KEY_FOOTER, minRecordKey);
extraMetaData.put(HOODIE_MAX_RECORD_KEY_FOOTER, maxRecordKey);

View File

@@ -30,8 +30,8 @@ import org.apache.avro.generic.GenericRecord;
/**
* Marjority of this is copied from
* https://github.com/jwills/avro-json/blob/master/src/main/java/com/cloudera/science/avro/
* common/JsonConverter.java Adjusted for expected behavior of our use cases
* https://github.com/jwills/avro-json/blob/master/src/main/java/com/cloudera/science/avro/ common/JsonConverter.java
* Adjusted for expected behavior of our use cases
*/
public class MercifulJsonConverter {
@@ -51,8 +51,7 @@ public class MercifulJsonConverter {
}
}
private GenericRecord convert(Map<String, Object> raw, Schema schema)
throws IOException {
private GenericRecord convert(Map<String, Object> raw, Schema schema) throws IOException {
GenericRecord result = new GenericData.Record(schema);
for (Schema.Field f : schema.getFields()) {
String name = f.name();
@@ -128,17 +127,15 @@ public class MercifulJsonConverter {
}
return mapRes;
default:
throw new IllegalArgumentException(
"JsonConverter cannot handle type: " + schema.getType());
throw new IllegalArgumentException("JsonConverter cannot handle type: " + schema.getType());
}
throw new JsonConversionException(value, name, schema);
}
private boolean isOptional(Schema schema) {
return schema.getType().equals(Schema.Type.UNION)
&& schema.getTypes().size() == 2
return schema.getType().equals(Schema.Type.UNION) && schema.getTypes().size() == 2
&& (schema.getTypes().get(0).getType().equals(Schema.Type.NULL)
|| schema.getTypes().get(1).getType().equals(Schema.Type.NULL));
|| schema.getTypes().get(1).getType().equals(Schema.Type.NULL));
}
private Schema getNonNull(Schema schema) {
@@ -160,8 +157,7 @@ public class MercifulJsonConverter {
@Override
public String toString() {
return String.format("Type conversion error for field %s, %s for %s",
fieldName, value, schema);
return String.format("Type conversion error for field %s, %s for %s", fieldName, value, schema);
}
}
}

View File

@@ -41,9 +41,8 @@ public class HoodieCleanStat implements Serializable {
// Earliest commit that was retained in this clean
private final String earliestCommitToRetain;
public HoodieCleanStat(HoodieCleaningPolicy policy, String partitionPath,
List<String> deletePathPatterns, List<String> successDeleteFiles,
List<String> failedDeleteFiles, String earliestCommitToRetain) {
public HoodieCleanStat(HoodieCleaningPolicy policy, String partitionPath, List<String> deletePathPatterns,
List<String> successDeleteFiles, List<String> failedDeleteFiles, String earliestCommitToRetain) {
this.policy = policy;
this.partitionPath = partitionPath;
this.deletePathPatterns = deletePathPatterns;
@@ -115,14 +114,14 @@ public class HoodieCleanStat implements Serializable {
}
public Builder withEarliestCommitRetained(Option<HoodieInstant> earliestCommitToRetain) {
this.earliestCommitToRetain = (earliestCommitToRetain.isPresent())
? earliestCommitToRetain.get().getTimestamp() : "-1";
this.earliestCommitToRetain =
(earliestCommitToRetain.isPresent()) ? earliestCommitToRetain.get().getTimestamp() : "-1";
return this;
}
public HoodieCleanStat build() {
return new HoodieCleanStat(policy, partitionPath, deletePathPatterns,
successDeleteFiles, failedDeleteFiles, earliestCommitToRetain);
return new HoodieCleanStat(policy, partitionPath, deletePathPatterns, successDeleteFiles, failedDeleteFiles,
earliestCommitToRetain);
}
}
}

View File

@@ -50,8 +50,7 @@ public class HoodieJsonPayload implements HoodieRecordPayload<HoodieJsonPayload>
}
@Override
public Option<IndexedRecord> combineAndGetUpdateValue(IndexedRecord oldRec, Schema schema)
throws IOException {
public Option<IndexedRecord> combineAndGetUpdateValue(IndexedRecord oldRec, Schema schema) throws IOException {
return getInsertValue(schema);
}
@@ -68,8 +67,7 @@ public class HoodieJsonPayload implements HoodieRecordPayload<HoodieJsonPayload>
private byte[] compressData(String jsonData) throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
Deflater deflater = new Deflater(Deflater.BEST_COMPRESSION);
DeflaterOutputStream dos =
new DeflaterOutputStream(baos, deflater, true);
DeflaterOutputStream dos = new DeflaterOutputStream(baos, deflater, true);
try {
dos.write(jsonData.getBytes());
} finally {

View File

@@ -37,8 +37,8 @@ public class HoodieRollbackStat implements Serializable {
// Count of HoodieLogFile to commandBlocks written for a particular rollback
private final Map<FileStatus, Long> commandBlocksCount;
public HoodieRollbackStat(String partitionPath, List<String> successDeleteFiles,
List<String> failedDeleteFiles, Map<FileStatus, Long> commandBlocksCount) {
public HoodieRollbackStat(String partitionPath, List<String> successDeleteFiles, List<String> failedDeleteFiles,
Map<FileStatus, Long> commandBlocksCount) {
this.partitionPath = partitionPath;
this.successDeleteFiles = successDeleteFiles;
this.failedDeleteFiles = failedDeleteFiles;
@@ -73,7 +73,7 @@ public class HoodieRollbackStat implements Serializable {
private String partitionPath;
public Builder withDeletedFileResults(Map<FileStatus, Boolean> deletedFiles) {
//noinspection Convert2MethodRef
// noinspection Convert2MethodRef
successDeleteFiles = deletedFiles.entrySet().stream().filter(s -> s.getValue())
.map(s -> s.getKey().getPath().toString()).collect(Collectors.toList());
failedDeleteFiles = deletedFiles.entrySet().stream().filter(s -> !s.getValue())
@@ -92,8 +92,7 @@ public class HoodieRollbackStat implements Serializable {
}
public HoodieRollbackStat build() {
return new HoodieRollbackStat(partitionPath, successDeleteFiles, failedDeleteFiles,
commandBlocksCount);
return new HoodieRollbackStat(partitionPath, successDeleteFiles, failedDeleteFiles, commandBlocksCount);
}
}
}

View File

@@ -57,8 +57,7 @@ public class SerializableConfiguration implements Serializable {
@Override
public String toString() {
StringBuilder str = new StringBuilder();
configuration.iterator().forEachRemaining(e ->
str.append(String.format("%s => %s \n", e.getKey(), e.getValue())));
configuration.iterator().forEachRemaining(e -> str.append(String.format("%s => %s \n", e.getKey(), e.getValue())));
return configuration.toString();
}
}

View File

@@ -59,21 +59,19 @@ import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
/**
* HoodieWrapperFileSystem wraps the default file system. It holds state about the open streams in
* the file system to support getting the written size to each of the open streams.
* HoodieWrapperFileSystem wraps the default file system. It holds state about the open streams in the file system to
* support getting the written size to each of the open streams.
*/
public class HoodieWrapperFileSystem extends FileSystem {
public static final String HOODIE_SCHEME_PREFIX = "hoodie-";
private ConcurrentMap<String, SizeAwareFSDataOutputStream> openStreams = new
ConcurrentHashMap<>();
private ConcurrentMap<String, SizeAwareFSDataOutputStream> openStreams = new ConcurrentHashMap<>();
private FileSystem fileSystem;
private URI uri;
private ConsistencyGuard consistencyGuard = new NoOpConsistencyGuard();
public HoodieWrapperFileSystem() {
}
public HoodieWrapperFileSystem() {}
public HoodieWrapperFileSystem(FileSystem fileSystem, ConsistencyGuard consistencyGuard) {
this.fileSystem = fileSystem;
@@ -94,8 +92,8 @@ public class HoodieWrapperFileSystem extends FileSystem {
URI oldURI = oldPath.toUri();
URI newURI;
try {
newURI = new URI(newScheme, oldURI.getUserInfo(), oldURI.getHost(), oldURI.getPort(),
oldURI.getPath(), oldURI.getQuery(), oldURI.getFragment());
newURI = new URI(newScheme, oldURI.getUserInfo(), oldURI.getHost(), oldURI.getPort(), oldURI.getPath(),
oldURI.getQuery(), oldURI.getFragment());
return new Path(newURI);
} catch (URISyntaxException e) {
// TODO - Better Exception handling
@@ -108,8 +106,7 @@ public class HoodieWrapperFileSystem extends FileSystem {
if (StorageSchemes.isSchemeSupported(scheme)) {
newScheme = HOODIE_SCHEME_PREFIX + scheme;
} else {
throw new IllegalArgumentException(
"BlockAlignedAvroParquetWriter does not support scheme " + scheme);
throw new IllegalArgumentException("BlockAlignedAvroParquetWriter does not support scheme " + scheme);
}
return newScheme;
}
@@ -143,22 +140,21 @@ public class HoodieWrapperFileSystem extends FileSystem {
}
@Override
public FSDataOutputStream create(Path f, FsPermission permission, boolean overwrite,
int bufferSize, short replication, long blockSize, Progressable progress) throws IOException {
public FSDataOutputStream create(Path f, FsPermission permission, boolean overwrite, int bufferSize,
short replication, long blockSize, Progressable progress) throws IOException {
final Path translatedPath = convertToDefaultPath(f);
return wrapOutputStream(f, fileSystem
.create(translatedPath, permission, overwrite, bufferSize, replication, blockSize,
progress));
return wrapOutputStream(f,
fileSystem.create(translatedPath, permission, overwrite, bufferSize, replication, blockSize, progress));
}
private FSDataOutputStream wrapOutputStream(final Path path,
FSDataOutputStream fsDataOutputStream) throws IOException {
private FSDataOutputStream wrapOutputStream(final Path path, FSDataOutputStream fsDataOutputStream)
throws IOException {
if (fsDataOutputStream instanceof SizeAwareFSDataOutputStream) {
return fsDataOutputStream;
}
SizeAwareFSDataOutputStream os = new SizeAwareFSDataOutputStream(path,
fsDataOutputStream, consistencyGuard, () -> openStreams.remove(path.getName()));
SizeAwareFSDataOutputStream os = new SizeAwareFSDataOutputStream(path, fsDataOutputStream, consistencyGuard,
() -> openStreams.remove(path.getName()));
openStreams.put(path.getName(), os);
return os;
}
@@ -184,8 +180,7 @@ public class HoodieWrapperFileSystem extends FileSystem {
}
@Override
public FSDataOutputStream create(Path f, short replication, Progressable progress)
throws IOException {
public FSDataOutputStream create(Path f, short replication, Progressable progress) throws IOException {
return wrapOutputStream(f, fileSystem.create(convertToDefaultPath(f), replication, progress));
}
@@ -201,39 +196,35 @@ public class HoodieWrapperFileSystem extends FileSystem {
}
@Override
public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize, short replication,
long blockSize, Progressable progress) throws IOException {
return wrapOutputStream(f, fileSystem
.create(convertToDefaultPath(f), overwrite, bufferSize, replication, blockSize, progress));
public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize, short replication, long blockSize,
Progressable progress) throws IOException {
return wrapOutputStream(f,
fileSystem.create(convertToDefaultPath(f), overwrite, bufferSize, replication, blockSize, progress));
}
@Override
public FSDataOutputStream create(Path f, FsPermission permission, EnumSet<CreateFlag> flags,
int bufferSize, short replication, long blockSize, Progressable progress) throws IOException {
return wrapOutputStream(f, fileSystem
.create(convertToDefaultPath(f), permission, flags, bufferSize, replication, blockSize,
progress));
public FSDataOutputStream create(Path f, FsPermission permission, EnumSet<CreateFlag> flags, int bufferSize,
short replication, long blockSize, Progressable progress) throws IOException {
return wrapOutputStream(f,
fileSystem.create(convertToDefaultPath(f), permission, flags, bufferSize, replication, blockSize, progress));
}
@Override
public FSDataOutputStream create(Path f, FsPermission permission, EnumSet<CreateFlag> flags,
int bufferSize, short replication, long blockSize, Progressable progress,
Options.ChecksumOpt checksumOpt) throws IOException {
return wrapOutputStream(f, fileSystem
.create(convertToDefaultPath(f), permission, flags, bufferSize, replication, blockSize,
progress, checksumOpt));
public FSDataOutputStream create(Path f, FsPermission permission, EnumSet<CreateFlag> flags, int bufferSize,
short replication, long blockSize, Progressable progress, Options.ChecksumOpt checksumOpt) throws IOException {
return wrapOutputStream(f, fileSystem.create(convertToDefaultPath(f), permission, flags, bufferSize, replication,
blockSize, progress, checksumOpt));
}
@Override
public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize, short replication,
long blockSize) throws IOException {
return wrapOutputStream(f, fileSystem
.create(convertToDefaultPath(f), overwrite, bufferSize, replication, blockSize));
}
@Override
public FSDataOutputStream append(Path f, int bufferSize, Progressable progress)
public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize, short replication, long blockSize)
throws IOException {
return wrapOutputStream(f,
fileSystem.create(convertToDefaultPath(f), overwrite, bufferSize, replication, blockSize));
}
@Override
public FSDataOutputStream append(Path f, int bufferSize, Progressable progress) throws IOException {
return wrapOutputStream(f, fileSystem.append(convertToDefaultPath(f), bufferSize, progress));
}
@@ -341,8 +332,7 @@ public class HoodieWrapperFileSystem extends FileSystem {
}
@Override
public Token<?>[] addDelegationTokens(String renewer, Credentials credentials)
throws IOException {
public Token<?>[] addDelegationTokens(String renewer, Credentials credentials) throws IOException {
return fileSystem.addDelegationTokens(renewer, credentials);
}
@@ -352,8 +342,7 @@ public class HoodieWrapperFileSystem extends FileSystem {
}
@Override
public BlockLocation[] getFileBlockLocations(FileStatus file, long start, long len)
throws IOException {
public BlockLocation[] getFileBlockLocations(FileStatus file, long start, long len) throws IOException {
return fileSystem.getFileBlockLocations(file, start, len);
}
@@ -383,28 +372,27 @@ public class HoodieWrapperFileSystem extends FileSystem {
}
@Override
public FSDataOutputStream createNonRecursive(Path f, boolean overwrite, int bufferSize,
public FSDataOutputStream createNonRecursive(Path f, boolean overwrite, int bufferSize, short replication,
long blockSize, Progressable progress) throws IOException {
Path p = convertToDefaultPath(f);
return wrapOutputStream(p,
fileSystem.createNonRecursive(p, overwrite, bufferSize, replication, blockSize, progress));
}
@Override
public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, boolean overwrite, int bufferSize,
short replication, long blockSize, Progressable progress) throws IOException {
Path p = convertToDefaultPath(f);
return wrapOutputStream(p, fileSystem.createNonRecursive(p, overwrite, bufferSize, replication, blockSize,
progress));
return wrapOutputStream(p,
fileSystem.createNonRecursive(p, permission, overwrite, bufferSize, replication, blockSize, progress));
}
@Override
public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, boolean overwrite,
public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, EnumSet<CreateFlag> flags,
int bufferSize, short replication, long blockSize, Progressable progress) throws IOException {
Path p = convertToDefaultPath(f);
return wrapOutputStream(p, fileSystem.createNonRecursive(p, permission, overwrite, bufferSize, replication,
blockSize, progress));
}
@Override
public FSDataOutputStream createNonRecursive(Path f, FsPermission permission,
EnumSet<CreateFlag> flags, int bufferSize, short replication, long blockSize,
Progressable progress) throws IOException {
Path p = convertToDefaultPath(f);
return wrapOutputStream(p, fileSystem.createNonRecursive(p, permission, flags, bufferSize, replication,
blockSize, progress));
return wrapOutputStream(p,
fileSystem.createNonRecursive(p, permission, flags, bufferSize, replication, blockSize, progress));
}
@Override
@@ -590,10 +578,8 @@ public class HoodieWrapperFileSystem extends FileSystem {
}
@Override
public void copyFromLocalFile(boolean delSrc, boolean overwrite, Path[] srcs, Path dst)
throws IOException {
fileSystem
.copyFromLocalFile(delSrc, overwrite, convertLocalPaths(srcs), convertToDefaultPath(dst));
public void copyFromLocalFile(boolean delSrc, boolean overwrite, Path[] srcs, Path dst) throws IOException {
fileSystem.copyFromLocalFile(delSrc, overwrite, convertLocalPaths(srcs), convertToDefaultPath(dst));
try {
consistencyGuard.waitTillFileAppears(convertToDefaultPath(dst));
} catch (TimeoutException e) {
@@ -602,10 +588,8 @@ public class HoodieWrapperFileSystem extends FileSystem {
}
@Override
public void copyFromLocalFile(boolean delSrc, boolean overwrite, Path src, Path dst)
throws IOException {
fileSystem
.copyFromLocalFile(delSrc, overwrite, convertToLocalPath(src), convertToDefaultPath(dst));
public void copyFromLocalFile(boolean delSrc, boolean overwrite, Path src, Path dst) throws IOException {
fileSystem.copyFromLocalFile(delSrc, overwrite, convertToLocalPath(src), convertToDefaultPath(dst));
try {
consistencyGuard.waitTillFileAppears(convertToDefaultPath(dst));
} catch (TimeoutException e) {
@@ -629,22 +613,19 @@ public class HoodieWrapperFileSystem extends FileSystem {
}
@Override
public void copyToLocalFile(boolean delSrc, Path src, Path dst, boolean useRawLocalFileSystem)
throws IOException {
fileSystem.copyToLocalFile(delSrc, convertToDefaultPath(src), convertToLocalPath(dst),
useRawLocalFileSystem);
public void copyToLocalFile(boolean delSrc, Path src, Path dst, boolean useRawLocalFileSystem) throws IOException {
fileSystem.copyToLocalFile(delSrc, convertToDefaultPath(src), convertToLocalPath(dst), useRawLocalFileSystem);
}
@Override
public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile) throws IOException {
return convertToHoodiePath(fileSystem
.startLocalOutput(convertToDefaultPath(fsOutputFile), convertToDefaultPath(tmpLocalFile)));
return convertToHoodiePath(
fileSystem.startLocalOutput(convertToDefaultPath(fsOutputFile), convertToDefaultPath(tmpLocalFile)));
}
@Override
public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile) throws IOException {
fileSystem.completeLocalOutput(convertToDefaultPath(fsOutputFile),
convertToDefaultPath(tmpLocalFile));
fileSystem.completeLocalOutput(convertToDefaultPath(fsOutputFile), convertToDefaultPath(tmpLocalFile));
}
@Override
@@ -691,8 +672,7 @@ public class HoodieWrapperFileSystem extends FileSystem {
@Override
public void createSymlink(Path target, Path link, boolean createParent) throws IOException {
fileSystem
.createSymlink(convertToDefaultPath(target), convertToDefaultPath(link), createParent);
fileSystem.createSymlink(convertToDefaultPath(target), convertToDefaultPath(link), createParent);
}
@Override
@@ -761,8 +741,7 @@ public class HoodieWrapperFileSystem extends FileSystem {
}
@Override
public void renameSnapshot(Path path, String snapshotOldName, String snapshotNewName)
throws IOException {
public void renameSnapshot(Path path, String snapshotOldName, String snapshotNewName) throws IOException {
fileSystem.renameSnapshot(convertToDefaultPath(path), snapshotOldName, snapshotNewName);
}
@@ -807,8 +786,7 @@ public class HoodieWrapperFileSystem extends FileSystem {
}
@Override
public void setXAttr(Path path, String name, byte[] value, EnumSet<XAttrSetFlag> flag)
throws IOException {
public void setXAttr(Path path, String name, byte[] value, EnumSet<XAttrSetFlag> flag) throws IOException {
fileSystem.setXAttr(convertToDefaultPath(path), name, value, flag);
}
@@ -899,8 +877,8 @@ public class HoodieWrapperFileSystem extends FileSystem {
return openStreams.get(file.getName()).getBytesWritten();
}
// When the file is first written, we do not have a track of it
throw new IllegalArgumentException(file.toString()
+ " does not have a open stream. Cannot get the bytes written on the stream");
throw new IllegalArgumentException(
file.toString() + " does not have a open stream. Cannot get the bytes written on the stream");
}
public FileSystem getFileSystem() {

View File

@@ -27,8 +27,8 @@ import org.apache.hudi.common.util.ConsistencyGuard;
import org.apache.hudi.exception.HoodieException;
/**
* Wrapper over <code>FSDataOutputStream</code> to keep track of the size of the written bytes. This
* gives a cheap way to check on the underlying file size.
* Wrapper over <code>FSDataOutputStream</code> to keep track of the size of the written bytes. This gives a cheap way
* to check on the underlying file size.
*/
public class SizeAwareFSDataOutputStream extends FSDataOutputStream {
@@ -41,8 +41,8 @@ public class SizeAwareFSDataOutputStream extends FSDataOutputStream {
// Consistency guard
private final ConsistencyGuard consistencyGuard;
public SizeAwareFSDataOutputStream(Path path, FSDataOutputStream out,
ConsistencyGuard consistencyGuard, Runnable closeCallback) throws IOException {
public SizeAwareFSDataOutputStream(Path path, FSDataOutputStream out, ConsistencyGuard consistencyGuard,
Runnable closeCallback) throws IOException {
super(out);
this.path = path;
this.closeCallback = closeCallback;

View File

@@ -31,8 +31,8 @@ import org.apache.hudi.common.util.FSUtils;
import org.apache.hudi.common.util.Option;
/**
* Encapsulates all the needed information about a compaction and make a decision whether this
* compaction is effective or not
* Encapsulates all the needed information about a compaction and make a decision whether this compaction is effective
* or not
*
*/
public class CompactionOperation implements Serializable {
@@ -44,10 +44,9 @@ public class CompactionOperation implements Serializable {
private HoodieFileGroupId id;
private Map<String, Double> metrics;
//Only for serialization/de-serialization
// Only for serialization/de-serialization
@Deprecated
public CompactionOperation() {
}
public CompactionOperation() {}
public CompactionOperation(String fileId, String partitionPath, String baseInstantTime,
Option<String> dataFileCommitTime, List<String> deltaFilePaths, Option<String> dataFilePath,
@@ -60,8 +59,8 @@ public class CompactionOperation implements Serializable {
this.metrics = metrics;
}
public CompactionOperation(Option<HoodieDataFile> dataFile, String partitionPath,
List<HoodieLogFile> logFiles, Map<String, Double> metrics) {
public CompactionOperation(Option<HoodieDataFile> dataFile, String partitionPath, List<HoodieLogFile> logFiles,
Map<String, Double> metrics) {
if (dataFile.isPresent()) {
this.baseInstantTime = dataFile.get().getCommitTime();
this.dataFilePath = Option.of(dataFile.get().getPath());
@@ -75,8 +74,7 @@ public class CompactionOperation implements Serializable {
this.dataFileCommitTime = Option.empty();
}
this.deltaFilePaths = logFiles.stream().map(s -> s.getPath().toString())
.collect(Collectors.toList());
this.deltaFilePaths = logFiles.stream().map(s -> s.getPath().toString()).collect(Collectors.toList());
this.metrics = metrics;
}
@@ -113,12 +111,13 @@ public class CompactionOperation implements Serializable {
}
public Option<HoodieDataFile> getBaseFile() {
//TODO: HUDI-130 - Paths return in compaction plan needs to be relative to base-path
// TODO: HUDI-130 - Paths return in compaction plan needs to be relative to base-path
return dataFilePath.map(df -> new HoodieDataFile(df));
}
/**
* Convert Avro generated Compaction operation to POJO for Spark RDD operation
*
* @param operation Hoodie Compaction Operation
* @return
*/
@@ -126,8 +125,7 @@ public class CompactionOperation implements Serializable {
CompactionOperation op = new CompactionOperation();
op.baseInstantTime = operation.getBaseInstantTime();
op.dataFilePath = Option.ofNullable(operation.getDataFilePath());
op.dataFileCommitTime =
op.dataFilePath.map(p -> FSUtils.getCommitTime(new Path(p).getName()));
op.dataFileCommitTime = op.dataFilePath.map(p -> FSUtils.getCommitTime(new Path(p).getName()));
op.deltaFilePaths = new ArrayList<>(operation.getDeltaFilePaths());
op.id = new HoodieFileGroupId(operation.getPartitionPath(), operation.getFileId());
op.metrics = operation.getMetrics() == null ? new HashMap<>() : new HashMap<>(operation.getMetrics());
@@ -136,14 +134,9 @@ public class CompactionOperation implements Serializable {
@Override
public String toString() {
return "CompactionOperation{"
+ "baseInstantTime='" + baseInstantTime + '\''
+ ", dataFileCommitTime=" + dataFileCommitTime
+ ", deltaFilePaths=" + deltaFilePaths
+ ", dataFilePath=" + dataFilePath
+ ", id='" + id + '\''
+ ", metrics=" + metrics
+ '}';
return "CompactionOperation{" + "baseInstantTime='" + baseInstantTime + '\'' + ", dataFileCommitTime="
+ dataFileCommitTime + ", deltaFilePaths=" + deltaFilePaths + ", dataFilePath=" + dataFilePath + ", id='" + id
+ '\'' + ", metrics=" + metrics + '}';
}
@Override
@@ -158,8 +151,7 @@ public class CompactionOperation implements Serializable {
return Objects.equals(baseInstantTime, operation.baseInstantTime)
&& Objects.equals(dataFileCommitTime, operation.dataFileCommitTime)
&& Objects.equals(deltaFilePaths, operation.deltaFilePaths)
&& Objects.equals(dataFilePath, operation.dataFilePath)
&& Objects.equals(id, operation.id);
&& Objects.equals(dataFilePath, operation.dataFilePath) && Objects.equals(id, operation.id);
}
@Override

View File

@@ -25,8 +25,8 @@ import java.util.stream.Stream;
import org.apache.hudi.common.util.Option;
/**
* Within a file group, a slice is a combination of data file written at a commit time and list of
* log files, containing changes to the data file from that commit time
* Within a file group, a slice is a combination of data file written at a commit time and list of log files, containing
* changes to the data file from that commit time
*/
public class FileSlice implements Serializable {
@@ -46,8 +46,8 @@ public class FileSlice implements Serializable {
private HoodieDataFile dataFile;
/**
* List of appendable log files with real time data - Sorted with greater log version first -
* Always empty for copy_on_write storage.
* List of appendable log files with real time data - Sorted with greater log version first - Always empty for
* copy_on_write storage.
*/
private final TreeSet<HoodieLogFile> logFiles;
@@ -100,6 +100,7 @@ public class FileSlice implements Serializable {
/**
* Returns true if there is no data file and no log files. Happens as part of pending compaction
*
* @return
*/
public boolean isEmpty() {
@@ -126,10 +127,8 @@ public class FileSlice implements Serializable {
return false;
}
FileSlice slice = (FileSlice) o;
return Objects.equals(fileGroupId, slice.fileGroupId)
&& Objects.equals(baseInstantTime, slice.baseInstantTime)
&& Objects.equals(dataFile, slice.dataFile)
&& Objects.equals(logFiles, slice.logFiles);
return Objects.equals(fileGroupId, slice.fileGroupId) && Objects.equals(baseInstantTime, slice.baseInstantTime)
&& Objects.equals(dataFile, slice.dataFile) && Objects.equals(logFiles, slice.logFiles);
}
@Override

View File

@@ -27,14 +27,14 @@ import org.apache.hudi.common.util.Option;
import org.apache.hudi.exception.HoodieIOException;
/**
* This is a payload to wrap a existing Hoodie Avro Record. Useful to create a HoodieRecord over
* existing GenericRecords in a hoodie datasets (useful in compactions)
* This is a payload to wrap a existing Hoodie Avro Record. Useful to create a HoodieRecord over existing GenericRecords
* in a hoodie datasets (useful in compactions)
*/
public class HoodieAvroPayload implements HoodieRecordPayload<HoodieAvroPayload> {
// Store the GenericRecord converted to bytes - 1) Doesn't store schema hence memory efficient 2) Makes the payload
// java serializable
private final byte [] recordBytes;
private final byte[] recordBytes;
public HoodieAvroPayload(Option<GenericRecord> record) {
try {
@@ -54,8 +54,7 @@ public class HoodieAvroPayload implements HoodieRecordPayload<HoodieAvroPayload>
}
@Override
public Option<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema)
throws IOException {
public Option<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema) throws IOException {
return getInsertValue(schema);
}

View File

@@ -19,6 +19,5 @@
package org.apache.hudi.common.model;
public enum HoodieCleaningPolicy {
KEEP_LATEST_FILE_VERSIONS,
KEEP_LATEST_COMMITS
KEEP_LATEST_FILE_VERSIONS, KEEP_LATEST_COMMITS
}

View File

@@ -338,10 +338,7 @@ public class HoodieCommitMetadata implements Serializable {
@Override
public String toString() {
return "HoodieCommitMetadata{"
+ "partitionToWriteStats=" + partitionToWriteStats
+ ", compacted=" + compacted
+ ", extraMetadataMap=" + extraMetadataMap
+ '}';
return "HoodieCommitMetadata{" + "partitionToWriteStats=" + partitionToWriteStats + ", compacted=" + compacted
+ ", extraMetadataMap=" + extraMetadataMap + '}';
}
}

View File

@@ -93,9 +93,6 @@ public class HoodieDataFile implements Serializable {
@Override
public String toString() {
return "HoodieDataFile{"
+ "fullPath=" + fullPath
+ ", fileLen=" + fileLen
+ '}';
return "HoodieDataFile{" + "fullPath=" + fullPath + ", fileLen=" + fileLen + '}';
}
}

View File

@@ -69,8 +69,8 @@ public class HoodieFileGroup implements Serializable {
}
/**
* Potentially add a new file-slice by adding base-instant time
* A file-slice without any data-file and log-files can exist (if a compaction just got requested)
* Potentially add a new file-slice by adding base-instant time A file-slice without any data-file and log-files can
* exist (if a compaction just got requested)
*/
public void addNewFileSliceAtInstant(String baseInstantTime) {
if (!fileSlices.containsKey(baseInstantTime)) {
@@ -107,15 +107,13 @@ public class HoodieFileGroup implements Serializable {
}
/**
* A FileSlice is considered committed, if one of the following is true - There is a committed
* data file - There are some log files, that are based off a commit or delta commit
* A FileSlice is considered committed, if one of the following is true - There is a committed data file - There are
* some log files, that are based off a commit or delta commit
*/
private boolean isFileSliceCommitted(FileSlice slice) {
String maxCommitTime = lastInstant.get().getTimestamp();
return timeline.containsOrBeforeTimelineStarts(slice.getBaseInstantTime())
&& HoodieTimeline.compareTimestamps(slice.getBaseInstantTime(),
maxCommitTime,
HoodieTimeline.LESSER_OR_EQUAL);
&& HoodieTimeline.compareTimestamps(slice.getBaseInstantTime(), maxCommitTime, HoodieTimeline.LESSER_OR_EQUAL);
}
@@ -138,9 +136,7 @@ public class HoodieFileGroup implements Serializable {
*/
public Stream<FileSlice> getAllFileSlices() {
if (!timeline.empty()) {
return fileSlices.entrySet().stream()
.map(Map.Entry::getValue)
.filter(this::isFileSliceCommitted);
return fileSlices.entrySet().stream().map(Map.Entry::getValue).filter(this::isFileSliceCommitted);
}
return Stream.empty();
}
@@ -166,41 +162,32 @@ public class HoodieFileGroup implements Serializable {
* Obtain the latest file slice, upto a commitTime i.e <= maxCommitTime
*/
public Option<FileSlice> getLatestFileSliceBeforeOrOn(String maxCommitTime) {
return Option.fromJavaOptional(getAllFileSlices()
.filter(slice ->
HoodieTimeline.compareTimestamps(slice.getBaseInstantTime(),
maxCommitTime,
HoodieTimeline.LESSER_OR_EQUAL))
.findFirst());
return Option.fromJavaOptional(getAllFileSlices().filter(slice -> HoodieTimeline
.compareTimestamps(slice.getBaseInstantTime(), maxCommitTime, HoodieTimeline.LESSER_OR_EQUAL)).findFirst());
}
/**
* Obtain the latest file slice, upto a commitTime i.e < maxInstantTime
*
* @param maxInstantTime Max Instant Time
* @return
*/
public Option<FileSlice> getLatestFileSliceBefore(String maxInstantTime) {
return Option.fromJavaOptional(getAllFileSlices()
.filter(slice ->
HoodieTimeline.compareTimestamps(slice.getBaseInstantTime(),
maxInstantTime,
HoodieTimeline.LESSER))
return Option.fromJavaOptional(getAllFileSlices().filter(
slice -> HoodieTimeline.compareTimestamps(slice.getBaseInstantTime(), maxInstantTime, HoodieTimeline.LESSER))
.findFirst());
}
public Option<FileSlice> getLatestFileSliceInRange(List<String> commitRange) {
return Option.fromJavaOptional(getAllFileSlices()
.filter(slice -> commitRange.contains(slice.getBaseInstantTime()))
.findFirst());
return Option.fromJavaOptional(
getAllFileSlices().filter(slice -> commitRange.contains(slice.getBaseInstantTime())).findFirst());
}
/**
* Stream of committed data files, sorted reverse commit time
*/
public Stream<HoodieDataFile> getAllDataFiles() {
return getAllFileSlices()
.filter(slice -> slice.getDataFile().isPresent())
.map(slice -> slice.getDataFile().get());
return getAllFileSlices().filter(slice -> slice.getDataFile().isPresent()).map(slice -> slice.getDataFile().get());
}
@Override

View File

@@ -52,8 +52,7 @@ public class HoodieFileGroupId implements Serializable {
return false;
}
HoodieFileGroupId that = (HoodieFileGroupId) o;
return Objects.equals(partitionPath, that.partitionPath)
&& Objects.equals(fileId, that.fileId);
return Objects.equals(partitionPath, that.partitionPath) && Objects.equals(fileId, that.fileId);
}
@Override
@@ -63,9 +62,6 @@ public class HoodieFileGroupId implements Serializable {
@Override
public String toString() {
return "HoodieFileGroupId{"
+ "partitionPath='" + partitionPath + '\''
+ ", fileId='" + fileId + '\''
+ '}';
return "HoodieFileGroupId{" + "partitionPath='" + partitionPath + '\'' + ", fileId='" + fileId + '\'' + '}';
}
}

View File

@@ -24,8 +24,8 @@ import java.io.Serializable;
/**
* HoodieKey consists of
* <p>
* - recordKey : a recordKey that acts as primary key for a record - partitionPath : path to the
* partition that contains the record
* - recordKey : a recordKey that acts as primary key for a record - partitionPath : path to the partition that contains
* the record
*/
public class HoodieKey implements Serializable {
@@ -56,8 +56,7 @@ public class HoodieKey implements Serializable {
return false;
}
HoodieKey otherKey = (HoodieKey) o;
return Objects.equal(recordKey, otherKey.recordKey)
&& Objects.equal(partitionPath, otherKey.partitionPath);
return Objects.equal(recordKey, otherKey.recordKey) && Objects.equal(partitionPath, otherKey.partitionPath);
}
@Override

View File

@@ -109,9 +109,7 @@ public class HoodieLogFile implements Serializable {
String baseCommitTime = getBaseCommitTime();
Path path = getPath();
String extension = "." + FSUtils.getFileExtensionFromLog(path);
int newVersion = FSUtils
.computeNextLogVersion(fs, path.getParent(), fileId,
extension, baseCommitTime);
int newVersion = FSUtils.computeNextLogVersion(fs, path.getParent(), fileId, extension, baseCommitTime);
return new HoodieLogFile(new Path(path.getParent(),
FSUtils.makeLogFileName(fileId, extension, baseCommitTime, newVersion, logWriteToken)));
}
@@ -179,9 +177,6 @@ public class HoodieLogFile implements Serializable {
@Override
public String toString() {
return "HoodieLogFile{"
+ "pathStr='" + pathStr + '\''
+ ", fileLen=" + fileLen
+ '}';
return "HoodieLogFile{" + "pathStr='" + pathStr + '\'' + ", fileLen=" + fileLen + '}';
}
}

View File

@@ -64,12 +64,10 @@ public class HoodiePartitionMetadata {
/**
* Construct metadata object to be written out.
*/
public HoodiePartitionMetadata(FileSystem fs, String commitTime, Path basePath,
Path partitionPath) {
public HoodiePartitionMetadata(FileSystem fs, String commitTime, Path basePath, Path partitionPath) {
this(fs, partitionPath);
props.setProperty(COMMIT_TIME_KEY, commitTime);
props
.setProperty(PARTITION_DEPTH_KEY, String.valueOf(partitionPath.depth() - basePath.depth()));
props.setProperty(PARTITION_DEPTH_KEY, String.valueOf(partitionPath.depth() - basePath.depth()));
}
public int getPartitionDepth() {
@@ -83,8 +81,8 @@ public class HoodiePartitionMetadata {
* Write the metadata safely into partition atomically.
*/
public void trySave(int taskPartitionId) {
Path tmpMetaPath = new Path(partitionPath,
HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE + "_" + taskPartitionId);
Path tmpMetaPath =
new Path(partitionPath, HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE + "_" + taskPartitionId);
Path metaPath = new Path(partitionPath, HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE);
boolean metafileExists = false;
@@ -102,9 +100,8 @@ public class HoodiePartitionMetadata {
fs.rename(tmpMetaPath, metaPath);
}
} catch (IOException ioe) {
log.warn(
"Error trying to save partition metadata (this is okay, as long as "
+ "atleast 1 of these succced), " + partitionPath, ioe);
log.warn("Error trying to save partition metadata (this is okay, as long as " + "atleast 1 of these succced), "
+ partitionPath, ioe);
} finally {
if (!metafileExists) {
try {
@@ -129,8 +126,7 @@ public class HoodiePartitionMetadata {
is = fs.open(metaFile);
props.load(is);
} catch (IOException ioe) {
throw new HoodieException("Error reading Hoodie partition metadata for " + partitionPath,
ioe);
throw new HoodieException("Error reading Hoodie partition metadata for " + partitionPath, ioe);
} finally {
if (is != null) {
is.close();
@@ -143,8 +139,7 @@ public class HoodiePartitionMetadata {
try {
return fs.exists(new Path(partitionPath, HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
} catch (IOException ioe) {
throw new HoodieException("Error checking Hoodie partition metadata for " + partitionPath,
ioe);
throw new HoodieException("Error checking Hoodie partition metadata for " + partitionPath, ioe);
}
}
}

View File

@@ -36,12 +36,8 @@ public class HoodieRecord<T extends HoodieRecordPayload> implements Serializable
public static String FILENAME_METADATA_FIELD = "_hoodie_file_name";
public static final List<String> HOODIE_META_COLUMNS =
new ImmutableList.Builder<String>().add(COMMIT_TIME_METADATA_FIELD)
.add(COMMIT_SEQNO_METADATA_FIELD)
.add(RECORD_KEY_METADATA_FIELD)
.add(PARTITION_PATH_METADATA_FIELD)
.add(FILENAME_METADATA_FIELD)
.build();
new ImmutableList.Builder<String>().add(COMMIT_TIME_METADATA_FIELD).add(COMMIT_SEQNO_METADATA_FIELD)
.add(RECORD_KEY_METADATA_FIELD).add(PARTITION_PATH_METADATA_FIELD).add(FILENAME_METADATA_FIELD).build();
/**
* Identifies the record across the table
@@ -95,8 +91,8 @@ public class HoodieRecord<T extends HoodieRecordPayload> implements Serializable
}
/**
* Release the actual payload, to ease memory pressure. To be called after the record has been
* written to storage. Once deflated, cannot be inflated.
* Release the actual payload, to ease memory pressure. To be called after the record has been written to storage.
* Once deflated, cannot be inflated.
*/
public void deflate() {
this.data = null;
@@ -118,8 +114,7 @@ public class HoodieRecord<T extends HoodieRecordPayload> implements Serializable
}
/**
* Sets the new currentLocation of the record, after being written. This again should happen
* exactly-once.
* Sets the new currentLocation of the record, after being written. This again should happen exactly-once.
*/
public HoodieRecord setNewLocation(HoodieRecordLocation location) {
checkState();
@@ -145,10 +140,8 @@ public class HoodieRecord<T extends HoodieRecordPayload> implements Serializable
return false;
}
HoodieRecord that = (HoodieRecord) o;
return Objects.equal(key, that.key)
&& Objects.equal(data, that.data)
&& Objects.equal(currentLocation, that.currentLocation)
&& Objects.equal(newLocation, that.newLocation);
return Objects.equal(key, that.key) && Objects.equal(data, that.data)
&& Objects.equal(currentLocation, that.currentLocation) && Objects.equal(newLocation, that.newLocation);
}
@Override

View File

@@ -22,8 +22,7 @@ import com.google.common.base.Objects;
import java.io.Serializable;
/**
* Location of a HoodieRecord within the partition it belongs to. Ultimately, this points to an
* actual file on disk
* Location of a HoodieRecord within the partition it belongs to. Ultimately, this points to an actual file on disk
*/
public class HoodieRecordLocation implements Serializable {
@@ -44,8 +43,7 @@ public class HoodieRecordLocation implements Serializable {
return false;
}
HoodieRecordLocation otherLoc = (HoodieRecordLocation) o;
return Objects.equal(instantTime, otherLoc.instantTime)
&& Objects.equal(fileId, otherLoc.fileId);
return Objects.equal(instantTime, otherLoc.instantTime) && Objects.equal(fileId, otherLoc.fileId);
}
@Override

View File

@@ -26,45 +26,41 @@ import org.apache.avro.generic.IndexedRecord;
import org.apache.hudi.common.util.Option;
/**
* Every Hoodie dataset has an implementation of the <code>HoodieRecordPayload</code> This abstracts
* out callbacks which depend on record specific logic
* Every Hoodie dataset has an implementation of the <code>HoodieRecordPayload</code> This abstracts out callbacks which
* depend on record specific logic
*/
public interface HoodieRecordPayload<T extends HoodieRecordPayload> extends Serializable {
/**
* When more than one HoodieRecord have the same HoodieKey, this function combines them before
* attempting to insert/upsert (if combining turned on in HoodieClientConfig)
* When more than one HoodieRecord have the same HoodieKey, this function combines them before attempting to
* insert/upsert (if combining turned on in HoodieClientConfig)
*/
T preCombine(T another);
/**
* This methods lets you write custom merging/combining logic to produce new values as a function
* of current value on storage and whats contained in this object.
* This methods lets you write custom merging/combining logic to produce new values as a function of current value on
* storage and whats contained in this object.
* <p>
* eg: 1) You are updating counters, you may want to add counts to currentValue and write back
* updated counts 2) You may be reading DB redo logs, and merge them with current image for a
* database row on storage
* eg: 1) You are updating counters, you may want to add counts to currentValue and write back updated counts 2) You
* may be reading DB redo logs, and merge them with current image for a database row on storage
*
* @param currentValue Current value in storage, to merge/combine this payload with
* @param schema Schema used for record
* @return new combined/merged value to be written back to storage. EMPTY to skip writing this
* record.
* @param schema Schema used for record
* @return new combined/merged value to be written back to storage. EMPTY to skip writing this record.
*/
Option<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema)
throws IOException;
Option<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema) throws IOException;
/**
* Generates an avro record out of the given HoodieRecordPayload, to be written out to storage.
* Called when writing a new value for the given HoodieKey, wherein there is no existing record in
* storage to be combined against. (i.e insert) Return EMPTY to skip writing this record.
* Generates an avro record out of the given HoodieRecordPayload, to be written out to storage. Called when writing a
* new value for the given HoodieKey, wherein there is no existing record in storage to be combined against. (i.e
* insert) Return EMPTY to skip writing this record.
*/
Option<IndexedRecord> getInsertValue(Schema schema) throws IOException;
/**
* This method can be used to extract some metadata from HoodieRecordPayload. The metadata is
* passed to {@code WriteStatus.markSuccess()} and {@code WriteStatus.markFailure()} in order to
* compute some aggregate metrics using the metadata in the context of a write success or
* failure.
* This method can be used to extract some metadata from HoodieRecordPayload. The metadata is passed to
* {@code WriteStatus.markSuccess()} and {@code WriteStatus.markFailure()} in order to compute some aggregate metrics
* using the metadata in the context of a write success or failure.
*/
default Option<Map<String, String>> getMetadata() {
return Option.empty();

View File

@@ -23,14 +23,13 @@ package org.apache.hudi.common.model;
* <p>
* Currently, 1 type is supported
* <p>
* COPY_ON_WRITE - Performs upserts by versioning entire files, with later versions containing newer
* value of a record.
* COPY_ON_WRITE - Performs upserts by versioning entire files, with later versions containing newer value of a record.
* <p>
* In the future, following might be added.
* <p>
* MERGE_ON_READ - Speeds up upserts, by delaying merge until enough work piles up.
* <p>
* SIMPLE_LSM - A simple 2 level LSM tree.
* SIMPLE_LSM - A simple 2 level LSM tree.
*/
public enum HoodieTableType {
COPY_ON_WRITE, MERGE_ON_READ

View File

@@ -48,8 +48,8 @@ public class HoodieWriteStat implements Serializable {
private String prevCommit;
/**
* Total number of records written for this file. - for updates, its the entire number of records
* in the file - for inserts, its the actual number of records inserted.
* Total number of records written for this file. - for updates, its the entire number of records in the file - for
* inserts, its the actual number of records inserted.
*/
private long numWrites;
@@ -318,25 +318,13 @@ public class HoodieWriteStat implements Serializable {
@Override
public String toString() {
return "HoodieWriteStat{"
+ "fileId='" + fileId + '\''
+ ", path='" + path + '\''
+ ", prevCommit='" + prevCommit + '\''
+ ", numWrites=" + numWrites
+ ", numDeletes=" + numDeletes
+ ", numUpdateWrites=" + numUpdateWrites
+ ", totalWriteBytes=" + totalWriteBytes
+ ", totalWriteErrors=" + totalWriteErrors
+ ", tempPath='" + tempPath + '\''
+ ", partitionPath='" + partitionPath
+ '\'' + ", totalLogRecords=" + totalLogRecords
+ ", totalLogFilesCompacted=" + totalLogFilesCompacted
+ ", totalLogSizeCompacted=" + totalLogSizeCompacted
+ ", totalUpdatedRecordsCompacted=" + totalUpdatedRecordsCompacted
+ ", totalLogBlocks=" + totalLogBlocks
+ ", totalCorruptLogBlock=" + totalCorruptLogBlock
+ ", totalRollbackBlocks=" + totalRollbackBlocks
+ '}';
return "HoodieWriteStat{" + "fileId='" + fileId + '\'' + ", path='" + path + '\'' + ", prevCommit='" + prevCommit
+ '\'' + ", numWrites=" + numWrites + ", numDeletes=" + numDeletes + ", numUpdateWrites=" + numUpdateWrites
+ ", totalWriteBytes=" + totalWriteBytes + ", totalWriteErrors=" + totalWriteErrors + ", tempPath='" + tempPath
+ '\'' + ", partitionPath='" + partitionPath + '\'' + ", totalLogRecords=" + totalLogRecords
+ ", totalLogFilesCompacted=" + totalLogFilesCompacted + ", totalLogSizeCompacted=" + totalLogSizeCompacted
+ ", totalUpdatedRecordsCompacted=" + totalUpdatedRecordsCompacted + ", totalLogBlocks=" + totalLogBlocks
+ ", totalCorruptLogBlock=" + totalCorruptLogBlock + ", totalRollbackBlocks=" + totalRollbackBlocks + '}';
}
@Override

View File

@@ -33,8 +33,7 @@ public enum StorageSchemes {
// Apache Ignite FS
IGNITE("igfs", true),
// AWS S3
S3A("s3a", false),
S3("s3", false),
S3A("s3a", false), S3("s3", false),
// Google Cloud Storage
GCS("gs", false),
// View FS for federated setups. If federating across cloud stores, then append support is false

View File

@@ -36,10 +36,9 @@ import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
/**
* Configurations on the Hoodie Table like type of ingestion, storage formats, hive table name etc
* Configurations are loaded from hoodie.properties, these properties are usually set during
* initializing a path as hoodie base path and never changes during the lifetime of a hoodie
* dataset.
* Configurations on the Hoodie Table like type of ingestion, storage formats, hive table name etc Configurations are
* loaded from hoodie.properties, these properties are usually set during initializing a path as hoodie base path and
* never changes during the lifetime of a hoodie dataset.
*
* @see HoodieTableMetaClient
* @since 0.3.0
@@ -51,10 +50,8 @@ public class HoodieTableConfig implements Serializable {
public static final String HOODIE_PROPERTIES_FILE = "hoodie.properties";
public static final String HOODIE_TABLE_NAME_PROP_NAME = "hoodie.table.name";
public static final String HOODIE_TABLE_TYPE_PROP_NAME = "hoodie.table.type";
public static final String HOODIE_RO_FILE_FORMAT_PROP_NAME =
"hoodie.table.ro.file.format";
public static final String HOODIE_RT_FILE_FORMAT_PROP_NAME =
"hoodie.table.rt.file.format";
public static final String HOODIE_RO_FILE_FORMAT_PROP_NAME = "hoodie.table.ro.file.format";
public static final String HOODIE_RT_FILE_FORMAT_PROP_NAME = "hoodie.table.rt.file.format";
public static final String HOODIE_PAYLOAD_CLASS_PROP_NAME = "hoodie.compaction.payload.class";
public static final String HOODIE_ARCHIVELOG_FOLDER_PROP_NAME = "hoodie.archivelog.folder";
@@ -88,37 +85,32 @@ public class HoodieTableConfig implements Serializable {
*
* @deprecated
*/
public HoodieTableConfig() {
}
public HoodieTableConfig() {}
/**
* Initialize the hoodie meta directory and any necessary files inside the meta (including the
* hoodie.properties)
* Initialize the hoodie meta directory and any necessary files inside the meta (including the hoodie.properties)
*/
public static void createHoodieProperties(FileSystem fs, Path metadataFolder,
Properties properties) throws IOException {
public static void createHoodieProperties(FileSystem fs, Path metadataFolder, Properties properties)
throws IOException {
if (!fs.exists(metadataFolder)) {
fs.mkdirs(metadataFolder);
}
Path propertyPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE);
try (FSDataOutputStream outputStream = fs.create(propertyPath)) {
if (!properties.containsKey(HOODIE_TABLE_NAME_PROP_NAME)) {
throw new IllegalArgumentException(
HOODIE_TABLE_NAME_PROP_NAME + " property needs to be specified");
throw new IllegalArgumentException(HOODIE_TABLE_NAME_PROP_NAME + " property needs to be specified");
}
if (!properties.containsKey(HOODIE_TABLE_TYPE_PROP_NAME)) {
properties.setProperty(HOODIE_TABLE_TYPE_PROP_NAME, DEFAULT_TABLE_TYPE.name());
}
if (properties.getProperty(HOODIE_TABLE_TYPE_PROP_NAME) == HoodieTableType.MERGE_ON_READ
.name()
&& !properties.containsKey(HOODIE_PAYLOAD_CLASS_PROP_NAME)) {
if (properties.getProperty(HOODIE_TABLE_TYPE_PROP_NAME) == HoodieTableType.MERGE_ON_READ.name()
&& !properties.containsKey(HOODIE_PAYLOAD_CLASS_PROP_NAME)) {
properties.setProperty(HOODIE_PAYLOAD_CLASS_PROP_NAME, DEFAULT_PAYLOAD_CLASS);
}
if (!properties.containsKey(HOODIE_ARCHIVELOG_FOLDER_PROP_NAME)) {
properties.setProperty(HOODIE_ARCHIVELOG_FOLDER_PROP_NAME, DEFAULT_ARCHIVELOG_FOLDER);
}
properties
.store(outputStream, "Properties saved on " + new Date(System.currentTimeMillis()));
properties.store(outputStream, "Properties saved on " + new Date(System.currentTimeMillis()));
}
}
@@ -139,8 +131,8 @@ public class HoodieTableConfig implements Serializable {
public String getPayloadClass() {
// There could be datasets written with payload class from com.uber.hoodie. Need to transparently
// change to org.apache.hudi
return props.getProperty(HOODIE_PAYLOAD_CLASS_PROP_NAME, DEFAULT_PAYLOAD_CLASS)
.replace("com.uber.hoodie", "org.apache.hudi");
return props.getProperty(HOODIE_PAYLOAD_CLASS_PROP_NAME, DEFAULT_PAYLOAD_CLASS).replace("com.uber.hoodie",
"org.apache.hudi");
}
/**
@@ -182,7 +174,7 @@ public class HoodieTableConfig implements Serializable {
}
public Map<String, String> getProps() {
return props.entrySet().stream().collect(
Collectors.toMap(e -> String.valueOf(e.getKey()), e -> String.valueOf(e.getValue())));
return props.entrySet().stream()
.collect(Collectors.toMap(e -> String.valueOf(e.getKey()), e -> String.valueOf(e.getValue())));
}
}

View File

@@ -50,12 +50,12 @@ import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
/**
* <code>HoodieTableMetaClient</code> allows to access meta-data about a hoodie table It returns
* meta-data about commits, savepoints, compactions, cleanups as a <code>HoodieTimeline</code>
* Create an instance of the <code>HoodieTableMetaClient</code> with FileSystem and basePath to
* start getting the meta-data. <p> All the timelines are computed lazily, once computed the
* timeline is cached and never refreshed. Use the <code>HoodieTimeline.reload()</code> to refresh
* timelines.
* <code>HoodieTableMetaClient</code> allows to access meta-data about a hoodie table It returns meta-data about
* commits, savepoints, compactions, cleanups as a <code>HoodieTimeline</code> Create an instance of the
* <code>HoodieTableMetaClient</code> with FileSystem and basePath to start getting the meta-data.
* <p>
* All the timelines are computed lazily, once computed the timeline is cached and never refreshed. Use the
* <code>HoodieTimeline.reload()</code> to refresh timelines.
*
* @see HoodieTimeline
* @since 0.3.0
@@ -79,20 +79,17 @@ public class HoodieTableMetaClient implements Serializable {
private HoodieArchivedTimeline archivedTimeline;
private ConsistencyGuardConfig consistencyGuardConfig = ConsistencyGuardConfig.newBuilder().build();
public HoodieTableMetaClient(Configuration conf, String basePath)
throws DatasetNotFoundException {
public HoodieTableMetaClient(Configuration conf, String basePath) throws DatasetNotFoundException {
// Do not load any timeline by default
this(conf, basePath, false);
}
public HoodieTableMetaClient(Configuration conf, String basePath,
boolean loadActiveTimelineOnLoad) {
public HoodieTableMetaClient(Configuration conf, String basePath, boolean loadActiveTimelineOnLoad) {
this(conf, basePath, loadActiveTimelineOnLoad, ConsistencyGuardConfig.newBuilder().build());
}
public HoodieTableMetaClient(Configuration conf, String basePath,
boolean loadActiveTimelineOnLoad, ConsistencyGuardConfig consistencyGuardConfig)
throws DatasetNotFoundException {
public HoodieTableMetaClient(Configuration conf, String basePath, boolean loadActiveTimelineOnLoad,
ConsistencyGuardConfig consistencyGuardConfig) throws DatasetNotFoundException {
log.info("Loading HoodieTableMetaClient from " + basePath);
this.basePath = basePath;
this.consistencyGuardConfig = consistencyGuardConfig;
@@ -117,15 +114,11 @@ public class HoodieTableMetaClient implements Serializable {
*
* @deprecated
*/
public HoodieTableMetaClient() {
}
public HoodieTableMetaClient() {}
public static HoodieTableMetaClient reload(HoodieTableMetaClient oldMetaClient) {
return new HoodieTableMetaClient(
oldMetaClient.hadoopConf.get(),
oldMetaClient.basePath,
oldMetaClient.loadActiveTimelineOnLoad,
oldMetaClient.consistencyGuardConfig);
return new HoodieTableMetaClient(oldMetaClient.hadoopConf.get(), oldMetaClient.basePath,
oldMetaClient.loadActiveTimelineOnLoad, oldMetaClient.consistencyGuardConfig);
}
/**
@@ -133,14 +126,12 @@ public class HoodieTableMetaClient implements Serializable {
*
* @deprecated
*/
private void readObject(java.io.ObjectInputStream in)
throws IOException, ClassNotFoundException {
private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException {
in.defaultReadObject();
fs = null; // will be lazily inited
}
private void writeObject(java.io.ObjectOutputStream out)
throws IOException {
private void writeObject(java.io.ObjectOutputStream out) throws IOException {
out.defaultWriteObject();
}
@@ -173,8 +164,9 @@ public class HoodieTableMetaClient implements Serializable {
}
/**
* Returns Marker folder path
* @param instantTs Instant Timestamp
* Returns Marker folder path
*
* @param instantTs Instant Timestamp
* @return
*/
public String getMarkerFolderPath(String instantTs) {
@@ -215,14 +207,17 @@ public class HoodieTableMetaClient implements Serializable {
FileSystem fileSystem = FSUtils.getFs(metaPath, hadoopConf.newCopy());
Preconditions.checkArgument(!(fileSystem instanceof HoodieWrapperFileSystem),
"File System not expected to be that of HoodieWrapperFileSystem");
fs = new HoodieWrapperFileSystem(fileSystem, consistencyGuardConfig.isConsistencyCheckEnabled()
? new FailSafeConsistencyGuard(fileSystem, consistencyGuardConfig) : new NoOpConsistencyGuard());
fs = new HoodieWrapperFileSystem(fileSystem,
consistencyGuardConfig.isConsistencyCheckEnabled()
? new FailSafeConsistencyGuard(fileSystem, consistencyGuardConfig)
: new NoOpConsistencyGuard());
}
return fs;
}
/**
* Return raw file-system
*
* @return
*/
public FileSystem getRawFs() {
@@ -260,8 +255,8 @@ public class HoodieTableMetaClient implements Serializable {
}
/**
* Get the archived commits as a timeline. This is costly operation, as all data from the archived
* files are read. This should not be used, unless for historical debugging purposes
* Get the archived commits as a timeline. This is costly operation, as all data from the archived files are read.
* This should not be used, unless for historical debugging purposes
*
* @return Active commit timeline
*/
@@ -276,8 +271,8 @@ public class HoodieTableMetaClient implements Serializable {
/**
* Helper method to initialize a dataset, with given basePath, tableType, name, archiveFolder
*/
public static HoodieTableMetaClient initTableType(Configuration hadoopConf, String basePath,
String tableType, String tableName, String archiveLogFolder) throws IOException {
public static HoodieTableMetaClient initTableType(Configuration hadoopConf, String basePath, String tableType,
String tableName, String archiveLogFolder) throws IOException {
HoodieTableType type = HoodieTableType.valueOf(tableType);
Properties properties = new Properties();
properties.put(HoodieTableConfig.HOODIE_TABLE_NAME_PROP_NAME, tableName);
@@ -301,13 +296,12 @@ public class HoodieTableMetaClient implements Serializable {
}
/**
* Helper method to initialize a given path as a hoodie dataset with configs passed in as as
* Properties
* Helper method to initialize a given path as a hoodie dataset with configs passed in as as Properties
*
* @return Instance of HoodieTableMetaClient
*/
public static HoodieTableMetaClient initDatasetAndGetMetaClient(Configuration hadoopConf,
String basePath, Properties props) throws IOException {
public static HoodieTableMetaClient initDatasetAndGetMetaClient(Configuration hadoopConf, String basePath,
Properties props) throws IOException {
log.info("Initializing " + basePath + " as hoodie dataset " + basePath);
Path basePathDir = new Path(basePath);
final FileSystem fs = FSUtils.getFs(basePath, hadoopConf);
@@ -320,9 +314,8 @@ public class HoodieTableMetaClient implements Serializable {
}
// if anything other than default archive log folder is specified, create that too
String archiveLogPropVal = props
.getProperty(HoodieTableConfig.HOODIE_ARCHIVELOG_FOLDER_PROP_NAME,
HoodieTableConfig.DEFAULT_ARCHIVELOG_FOLDER);
String archiveLogPropVal = props.getProperty(HoodieTableConfig.HOODIE_ARCHIVELOG_FOLDER_PROP_NAME,
HoodieTableConfig.DEFAULT_ARCHIVELOG_FOLDER);
if (!archiveLogPropVal.equals(HoodieTableConfig.DEFAULT_ARCHIVELOG_FOLDER)) {
Path archiveLogDir = new Path(metaPathDir, archiveLogPropVal);
if (!fs.exists(archiveLogDir)) {
@@ -346,14 +339,12 @@ public class HoodieTableMetaClient implements Serializable {
// We should not use fs.getConf as this might be different from the original configuration
// used to create the fs in unit tests
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(hadoopConf, basePath);
log.info("Finished initializing Table of type " + metaClient.getTableConfig().getTableType()
+ " from " + basePath);
log.info("Finished initializing Table of type " + metaClient.getTableConfig().getTableType() + " from " + basePath);
return metaClient;
}
// HELPER METHODS TO CREATE META FILE NAMES
public static FileStatus[] scanFiles(FileSystem fs, Path metaPath, PathFilter nameFilter)
throws IOException {
public static FileStatus[] scanFiles(FileSystem fs, Path metaPath, PathFilter nameFilter) throws IOException {
return fs.listStatus(metaPath, nameFilter);
}
@@ -375,10 +366,10 @@ public class HoodieTableMetaClient implements Serializable {
}
/**
* Get the commit + pending-compaction timeline visible for this table.
* A RT filesystem view is constructed with this timeline so that file-slice after pending compaction-requested
* instant-time is also considered valid. A RT file-system view for reading must then merge the file-slices before
* and after pending compaction instant so that all delta-commits are read.
* Get the commit + pending-compaction timeline visible for this table. A RT filesystem view is constructed with this
* timeline so that file-slice after pending compaction-requested instant-time is also considered valid. A RT
* file-system view for reading must then merge the file-slices before and after pending compaction instant so that
* all delta-commits are read.
*/
public HoodieTimeline getCommitsAndCompactionTimeline() {
switch (this.getTableType()) {
@@ -415,8 +406,7 @@ public class HoodieTableMetaClient implements Serializable {
case MERGE_ON_READ:
return HoodieActiveTimeline.DELTA_COMMIT_ACTION;
default:
throw new HoodieException(
"Could not commit on unknown storage type " + this.getTableType());
throw new HoodieException("Could not commit on unknown storage type " + this.getTableType());
}
}
@@ -424,23 +414,21 @@ public class HoodieTableMetaClient implements Serializable {
/**
* Helper method to scan all hoodie-instant metafiles and construct HoodieInstant objects
*
* @param fs FileSystem
* @param metaPath Meta Path where hoodie instants are present
* @param fs FileSystem
* @param metaPath Meta Path where hoodie instants are present
* @param includedExtensions Included hoodie extensions
* @return List of Hoodie Instants generated
* @throws IOException in case of failure
*/
public static List<HoodieInstant> scanHoodieInstantsFromFileSystem(
FileSystem fs, Path metaPath, Set<String> includedExtensions) throws IOException {
return Arrays.stream(
HoodieTableMetaClient
.scanFiles(fs, metaPath, path -> {
// Include only the meta files with extensions that needs to be included
String extension = FSUtils.getFileExtension(path.getName());
return includedExtensions.contains(extension);
})).sorted(Comparator.comparing(
// Sort the meta-data by the instant time (first part of the file name)
fileStatus -> FSUtils.getInstantTime(fileStatus.getPath().getName())))
public static List<HoodieInstant> scanHoodieInstantsFromFileSystem(FileSystem fs, Path metaPath,
Set<String> includedExtensions) throws IOException {
return Arrays.stream(HoodieTableMetaClient.scanFiles(fs, metaPath, path -> {
// Include only the meta files with extensions that needs to be included
String extension = FSUtils.getFileExtension(path.getName());
return includedExtensions.contains(extension);
})).sorted(Comparator.comparing(
// Sort the meta-data by the instant time (first part of the file name)
fileStatus -> FSUtils.getInstantTime(fileStatus.getPath().getName())))
// create HoodieInstantMarkers from FileStatus, which extracts properties
.map(HoodieInstant::new).collect(Collectors.toList());
}

View File

@@ -29,10 +29,11 @@ import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
/**
* HoodieTimeline is a view of meta-data instants in the hoodie dataset. Instants are specific
* points in time represented as HoodieInstant. <p> Timelines are immutable once created and
* operations create new instance of timelines which filter on the instants and this can be
* chained.
* HoodieTimeline is a view of meta-data instants in the hoodie dataset. Instants are specific points in time
* represented as HoodieInstant.
* <p>
* Timelines are immutable once created and operations create new instance of timelines which filter on the instants and
* this can be chained.
*
* @see HoodieTableMetaClient
* @see HoodieDefaultTimeline
@@ -58,22 +59,19 @@ public interface HoodieTimeline extends Serializable {
String CLEAN_EXTENSION = "." + CLEAN_ACTION;
String ROLLBACK_EXTENSION = "." + ROLLBACK_ACTION;
String SAVEPOINT_EXTENSION = "." + SAVEPOINT_ACTION;
//this is to preserve backwards compatibility on commit in-flight filenames
// this is to preserve backwards compatibility on commit in-flight filenames
String INFLIGHT_COMMIT_EXTENSION = INFLIGHT_EXTENSION;
String INFLIGHT_DELTA_COMMIT_EXTENSION = "." + DELTA_COMMIT_ACTION + INFLIGHT_EXTENSION;
String INFLIGHT_CLEAN_EXTENSION = "." + CLEAN_ACTION + INFLIGHT_EXTENSION;
String INFLIGHT_ROLLBACK_EXTENSION = "." + ROLLBACK_ACTION + INFLIGHT_EXTENSION;
String INFLIGHT_SAVEPOINT_EXTENSION = "." + SAVEPOINT_ACTION + INFLIGHT_EXTENSION;
String REQUESTED_COMPACTION_SUFFIX =
StringUtils.join(COMPACTION_ACTION, REQUESTED_EXTENSION);
String REQUESTED_COMPACTION_EXTENSION =
StringUtils.join(".", REQUESTED_COMPACTION_SUFFIX);
String INFLIGHT_COMPACTION_EXTENSION =
StringUtils.join(".", COMPACTION_ACTION, INFLIGHT_EXTENSION);
String REQUESTED_COMPACTION_SUFFIX = StringUtils.join(COMPACTION_ACTION, REQUESTED_EXTENSION);
String REQUESTED_COMPACTION_EXTENSION = StringUtils.join(".", REQUESTED_COMPACTION_SUFFIX);
String INFLIGHT_COMPACTION_EXTENSION = StringUtils.join(".", COMPACTION_ACTION, INFLIGHT_EXTENSION);
String INFLIGHT_RESTORE_EXTENSION = "." + RESTORE_ACTION + INFLIGHT_EXTENSION;
String RESTORE_EXTENSION = "." + RESTORE_ACTION;
String INVALID_INSTANT_TS = "0";
String INVALID_INSTANT_TS = "0";
/**
* Filter this timeline to just include the in-flights
@@ -97,22 +95,25 @@ public interface HoodieTimeline extends Serializable {
HoodieTimeline filterCompletedInstants();
/**
* Filter this timeline to just include the completed + compaction (inflight + requested) instants
* A RT filesystem view is constructed with this timeline so that file-slice after pending compaction-requested
* instant-time is also considered valid. A RT file-system view for reading must then merge the file-slices before
* and after pending compaction instant so that all delta-commits are read.
* Filter this timeline to just include the completed + compaction (inflight + requested) instants A RT filesystem
* view is constructed with this timeline so that file-slice after pending compaction-requested instant-time is also
* considered valid. A RT file-system view for reading must then merge the file-slices before and after pending
* compaction instant so that all delta-commits are read.
*
* @return New instance of HoodieTimeline with just completed instants
*/
HoodieTimeline filterCompletedAndCompactionInstants();
/**
* Timeline to just include commits (commit/deltacommit) and compaction actions
* Timeline to just include commits (commit/deltacommit) and compaction actions
*
* @return
*/
HoodieTimeline getCommitsAndCompactionTimeline();
/**
* Filter this timeline to just include requested and inflight compaction instants
*
* @return
*/
HoodieTimeline filterPendingCompactionTimeline();
@@ -162,6 +163,7 @@ public interface HoodieTimeline extends Serializable {
/**
* Get hash of timeline
*
* @return
*/
String getTimelineHash();
@@ -177,8 +179,8 @@ public interface HoodieTimeline extends Serializable {
boolean containsInstant(HoodieInstant instant);
/**
* @return true if the passed instant is present as a completed instant on the timeline or if the
* instant is before the first completed instant in the timeline
* @return true if the passed instant is present as a completed instant on the timeline or if the instant is before
* the first completed instant in the timeline
*/
boolean containsOrBeforeTimelineStarts(String ts);
@@ -188,8 +190,8 @@ public interface HoodieTimeline extends Serializable {
Stream<HoodieInstant> getInstants();
/**
* @return Get the stream of completed instants in reverse order
* TODO Change code references to getInstants() that reverse the instants later on to use this method instead.
* @return Get the stream of completed instants in reverse order TODO Change code references to getInstants() that
* reverse the instants later on to use this method instead.
*/
Stream<HoodieInstant> getReverseOrderedInstants();
@@ -206,17 +208,13 @@ public interface HoodieTimeline extends Serializable {
/**
* Helper methods to compare instants
**/
BiPredicate<String, String> EQUAL =
(commit1, commit2) -> commit1.compareTo(commit2) == 0;
BiPredicate<String, String> GREATER_OR_EQUAL =
(commit1, commit2) -> commit1.compareTo(commit2) >= 0;
BiPredicate<String, String> EQUAL = (commit1, commit2) -> commit1.compareTo(commit2) == 0;
BiPredicate<String, String> GREATER_OR_EQUAL = (commit1, commit2) -> commit1.compareTo(commit2) >= 0;
BiPredicate<String, String> GREATER = (commit1, commit2) -> commit1.compareTo(commit2) > 0;
BiPredicate<String, String> LESSER_OR_EQUAL =
(commit1, commit2) -> commit1.compareTo(commit2) <= 0;
BiPredicate<String, String> LESSER_OR_EQUAL = (commit1, commit2) -> commit1.compareTo(commit2) <= 0;
BiPredicate<String, String> LESSER = (commit1, commit2) -> commit1.compareTo(commit2) < 0;
static boolean compareTimestamps(String commit1, String commit2,
BiPredicate<String, String> predicateToApply) {
static boolean compareTimestamps(String commit1, String commit2, BiPredicate<String, String> predicateToApply) {
return predicateToApply.test(commit1, commit2);
}

View File

@@ -22,8 +22,8 @@ package org.apache.hudi.common.table;
* A consolidated file-system view interface exposing both realtime and read-optimized views along with
* update operations.
*/
public interface SyncableFileSystemView extends TableFileSystemView, TableFileSystemView.ReadOptimizedView,
TableFileSystemView.RealtimeView {
public interface SyncableFileSystemView
extends TableFileSystemView, TableFileSystemView.ReadOptimizedView, TableFileSystemView.RealtimeView {
@@ -38,9 +38,9 @@ public interface SyncableFileSystemView extends TableFileSystemView, TableFileSy
void reset();
/**
* Read the latest timeline and refresh the file-system view to match the current state of the file-system.
* The refresh can either be done incrementally (from reading file-slices in metadata files) or from scratch by
* reseting view storage
* Read the latest timeline and refresh the file-system view to match the current state of the file-system. The
* refresh can either be done incrementally (from reading file-slices in metadata files) or from scratch by reseting
* view storage
*/
void sync();
}

View File

@@ -59,8 +59,7 @@ public interface TableFileSystemView {
* Stream all the latest version data files in the given partition with precondition that commitTime(file) before
* maxCommitTime
*/
Stream<HoodieDataFile> getLatestDataFilesBeforeOrOn(String partitionPath,
String maxCommitTime);
Stream<HoodieDataFile> getLatestDataFilesBeforeOrOn(String partitionPath, String maxCommitTime);
/**
* Stream all the latest data files pass
@@ -105,20 +104,20 @@ public interface TableFileSystemView {
Stream<FileSlice> getLatestUnCompactedFileSlices(String partitionPath);
/**
* Stream all latest file slices in given partition with precondition that commitTime(file) before maxCommitTime
* Stream all latest file slices in given partition with precondition that commitTime(file) before maxCommitTime
*
* @param partitionPath Partition path
* @param maxCommitTime Max Instant Time
* @param includeFileSlicesInPendingCompaction include file-slices that are in pending compaction
*/
Stream<FileSlice> getLatestFileSlicesBeforeOrOn(String partitionPath,
String maxCommitTime, boolean includeFileSlicesInPendingCompaction);
Stream<FileSlice> getLatestFileSlicesBeforeOrOn(String partitionPath, String maxCommitTime,
boolean includeFileSlicesInPendingCompaction);
/**
* Stream all "merged" file-slices before on an instant time
* If a file-group has a pending compaction request, the file-slice before and after compaction request instant
* is merged and returned.
* @param partitionPath Partition Path
* Stream all "merged" file-slices before on an instant time If a file-group has a pending compaction request, the
* file-slice before and after compaction request instant is merged and returned.
*
* @param partitionPath Partition Path
* @param maxInstantTime Max Instant Time
* @return
*/

View File

@@ -51,15 +51,16 @@ import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
/**
* Implements logic to scan log blocks and expose valid and deleted log records to subclass implementation.
* Subclass is free to either apply merging or expose raw data back to the caller.
* Implements logic to scan log blocks and expose valid and deleted log records to subclass implementation. Subclass is
* free to either apply merging or expose raw data back to the caller.
*
* NOTE: If readBlockLazily is
* turned on, does not merge, instead keeps reading log blocks and merges everything at once This is an optimization to
* avoid seek() back and forth to read new block (forward seek()) and lazily read content of seen block (reverse and
* forward seek()) during merge | | Read Block 1 Metadata | | Read Block 1 Data | | | Read Block 2
* Metadata | | Read Block 2 Data | | I/O Pass 1 | ..................... | I/O Pass 2 | ................. | |
* | Read Block N Metadata | | Read Block N Data | <p> This results in two I/O passes over the log file.
* NOTE: If readBlockLazily is turned on, does not merge, instead keeps reading log blocks and merges everything at once
* This is an optimization to avoid seek() back and forth to read new block (forward seek()) and lazily read content of
* seen block (reverse and forward seek()) during merge | | Read Block 1 Metadata | | Read Block 1 Data | | | Read Block
* 2 Metadata | | Read Block 2 Data | | I/O Pass 1 | ..................... | I/O Pass 2 | ................. | | | Read
* Block N Metadata | | Read Block N Data |
* <p>
* This results in two I/O passes over the log file.
*/
public abstract class AbstractHoodieLogRecordScanner {
@@ -122,10 +123,9 @@ public abstract class AbstractHoodieLogRecordScanner {
HoodieLogFormatReader logFormatReaderWrapper = null;
try {
// iterate over the paths
logFormatReaderWrapper =
new HoodieLogFormatReader(fs,
logFilePaths.stream().map(logFile -> new HoodieLogFile(new Path(logFile)))
.collect(Collectors.toList()), readerSchema, readBlocksLazily, reverseReader, bufferSize);
logFormatReaderWrapper = new HoodieLogFormatReader(fs,
logFilePaths.stream().map(logFile -> new HoodieLogFile(new Path(logFile))).collect(Collectors.toList()),
readerSchema, readBlocksLazily, reverseReader, bufferSize);
Set<HoodieLogFile> scannedLogFiles = new HashSet<>();
while (logFormatReaderWrapper.hasNext()) {
HoodieLogFile logFile = logFormatReaderWrapper.getLogFile();
@@ -136,10 +136,9 @@ public abstract class AbstractHoodieLogRecordScanner {
HoodieLogBlock r = logFormatReaderWrapper.next();
totalLogBlocks.incrementAndGet();
if (r.getBlockType() != CORRUPT_BLOCK
&& !HoodieTimeline.compareTimestamps(r.getLogBlockHeader().get(INSTANT_TIME),
this.latestInstantTime,
HoodieTimeline.LESSER_OR_EQUAL)) {
//hit a block with instant time greater than should be processed, stop processing further
&& !HoodieTimeline.compareTimestamps(r.getLogBlockHeader().get(INSTANT_TIME), this.latestInstantTime,
HoodieTimeline.LESSER_OR_EQUAL)) {
// hit a block with instant time greater than should be processed, stop processing further
break;
}
switch (r.getBlockType()) {
@@ -167,7 +166,7 @@ public abstract class AbstractHoodieLogRecordScanner {
// Consider the following scenario
// (Time 0, C1, Task T1) -> Running
// (Time 1, C1, Task T1) -> Failed (Wrote either a corrupt block or a correct
// DataBlock (B1) with commitTime C1
// DataBlock (B1) with commitTime C1
// (Time 2, C1, Task T1.2) -> Running (Task T1 was retried and the attempt number is 2)
// (Time 3, C1, Task T1.2) -> Finished (Wrote a correct DataBlock B2)
// Now a logFile L1 can have 2 correct Datablocks (B1 and B2) which are the same.
@@ -179,8 +178,8 @@ public abstract class AbstractHoodieLogRecordScanner {
log.info("Reading a command block from file " + logFile.getPath());
// This is a command block - take appropriate action based on the command
HoodieCommandBlock commandBlock = (HoodieCommandBlock) r;
String targetInstantForCommandBlock = r.getLogBlockHeader()
.get(HoodieLogBlock.HeaderMetadataType.TARGET_INSTANT_TIME);
String targetInstantForCommandBlock =
r.getLogBlockHeader().get(HoodieLogBlock.HeaderMetadataType.TARGET_INSTANT_TIME);
switch (commandBlock.getType()) { // there can be different types of command blocks
case ROLLBACK_PREVIOUS_BLOCK:
// Rollback the last read log block
@@ -195,20 +194,17 @@ public abstract class AbstractHoodieLogRecordScanner {
HoodieLogBlock lastBlock = currentInstantLogBlocks.peek();
// handle corrupt blocks separately since they may not have metadata
if (lastBlock.getBlockType() == CORRUPT_BLOCK) {
log.info(
"Rolling back the last corrupted log block read in " + logFile.getPath());
log.info("Rolling back the last corrupted log block read in " + logFile.getPath());
currentInstantLogBlocks.pop();
numBlocksRolledBack++;
} else if (lastBlock.getBlockType() != CORRUPT_BLOCK
&& targetInstantForCommandBlock
.contentEquals(lastBlock.getLogBlockHeader().get(INSTANT_TIME))) {
&& targetInstantForCommandBlock.contentEquals(lastBlock.getLogBlockHeader().get(INSTANT_TIME))) {
// rollback last data block or delete block
log.info("Rolling back the last log block read in " + logFile.getPath());
currentInstantLogBlocks.pop();
numBlocksRolledBack++;
} else if (!targetInstantForCommandBlock
.contentEquals(
currentInstantLogBlocks.peek().getLogBlockHeader().get(INSTANT_TIME))) {
.contentEquals(currentInstantLogBlocks.peek().getLogBlockHeader().get(INSTANT_TIME))) {
// invalid or extra rollback block
log.warn("TargetInstantTime " + targetInstantForCommandBlock
+ " invalid or extra rollback command block in " + logFile.getPath());
@@ -260,15 +256,14 @@ public abstract class AbstractHoodieLogRecordScanner {
* Checks if the current logblock belongs to a later instant
*/
private boolean isNewInstantBlock(HoodieLogBlock logBlock) {
return currentInstantLogBlocks.size() > 0
&& currentInstantLogBlocks.peek().getBlockType() != CORRUPT_BLOCK
return currentInstantLogBlocks.size() > 0 && currentInstantLogBlocks.peek().getBlockType() != CORRUPT_BLOCK
&& !logBlock.getLogBlockHeader().get(INSTANT_TIME)
.contentEquals(currentInstantLogBlocks.peek().getLogBlockHeader().get(INSTANT_TIME));
.contentEquals(currentInstantLogBlocks.peek().getLogBlockHeader().get(INSTANT_TIME));
}
/**
* Iterate over the GenericRecord in the block, read the hoodie key and partition path and
* call subclass processors to handle it.
* Iterate over the GenericRecord in the block, read the hoodie key and partition path and call subclass processors to
* handle it.
*/
private void processAvroDataBlock(HoodieAvroDataBlock dataBlock) throws Exception {
// TODO (NA) - Implement getRecordItr() in HoodieAvroDataBlock and use that here
@@ -286,8 +281,7 @@ public abstract class AbstractHoodieLogRecordScanner {
*
* @param hoodieRecord Hoodie Record to process
*/
protected abstract void processNextRecord(HoodieRecord<? extends HoodieRecordPayload> hoodieRecord)
throws Exception;
protected abstract void processNextRecord(HoodieRecord<? extends HoodieRecordPayload> hoodieRecord) throws Exception;
/**
* Process next deleted key
@@ -299,8 +293,7 @@ public abstract class AbstractHoodieLogRecordScanner {
/**
* Process the set of log blocks belonging to the last instant which is read fully.
*/
private void processQueuedBlocksForInstant(Deque<HoodieLogBlock> lastBlocks, int numLogFilesSeen)
throws Exception {
private void processQueuedBlocksForInstant(Deque<HoodieLogBlock> lastBlocks, int numLogFilesSeen) throws Exception {
while (!lastBlocks.isEmpty()) {
log.info("Number of remaining logblocks to merge " + lastBlocks.size());
// poll the element at the bottom of the stack since that's the order it was inserted

View File

@@ -46,9 +46,8 @@ import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
/**
* Scans a log file and provides block level iterator on the log file Loads the entire block
* contents in memory Can emit either a DataBlock, CommandBlock, DeleteBlock or CorruptBlock (if one
* is found)
* Scans a log file and provides block level iterator on the log file Loads the entire block contents in memory Can emit
* either a DataBlock, CommandBlock, DeleteBlock or CorruptBlock (if one is found)
*/
class HoodieLogFileReader implements HoodieLogFormat.Reader {
@@ -71,8 +70,7 @@ class HoodieLogFileReader implements HoodieLogFormat.Reader {
FSDataInputStream fsDataInputStream = fs.open(logFile.getPath(), bufferSize);
if (fsDataInputStream.getWrappedStream() instanceof FSInputStream) {
this.inputStream = new FSDataInputStream(
new BufferedFSInputStream((FSInputStream) fsDataInputStream.getWrappedStream(),
bufferSize));
new BufferedFSInputStream((FSInputStream) fsDataInputStream.getWrappedStream(), bufferSize));
} else {
// fsDataInputStream.getWrappedStream() maybe a BufferedFSInputStream
// need to wrap in another BufferedFSInputStream the make bufferSize work?
@@ -84,19 +82,17 @@ class HoodieLogFileReader implements HoodieLogFormat.Reader {
this.readBlockLazily = readBlockLazily;
this.reverseReader = reverseReader;
if (this.reverseReader) {
this.reverseLogFilePosition = this.lastReverseLogFilePosition = fs
.getFileStatus(logFile.getPath()).getLen();
this.reverseLogFilePosition = this.lastReverseLogFilePosition = fs.getFileStatus(logFile.getPath()).getLen();
}
addShutDownHook();
}
HoodieLogFileReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema,
boolean readBlockLazily, boolean reverseReader) throws IOException {
HoodieLogFileReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema, boolean readBlockLazily,
boolean reverseReader) throws IOException {
this(fs, logFile, readerSchema, DEFAULT_BUFFER_SIZE, readBlockLazily, reverseReader);
}
HoodieLogFileReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema)
throws IOException {
HoodieLogFileReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema) throws IOException {
this(fs, logFile, readerSchema, DEFAULT_BUFFER_SIZE, false, false);
}
@@ -154,8 +150,7 @@ class HoodieLogFileReader implements HoodieLogFormat.Reader {
if (nextBlockVersion.getVersion() != HoodieLogFormatVersion.DEFAULT_VERSION) {
type = inputStream.readInt();
Preconditions.checkArgument(type < HoodieLogBlockType.values().length,
"Invalid block byte type found " + type);
Preconditions.checkArgument(type < HoodieLogBlockType.values().length, "Invalid block byte type found " + type);
blockType = HoodieLogBlockType.values()[type];
}
@@ -198,18 +193,15 @@ class HoodieLogFileReader implements HoodieLogFormat.Reader {
if (nextBlockVersion.getVersion() == HoodieLogFormatVersion.DEFAULT_VERSION) {
return HoodieAvroDataBlock.getBlock(content, readerSchema);
} else {
return HoodieAvroDataBlock
.getBlock(logFile, inputStream, Option.ofNullable(content), readBlockLazily,
contentPosition, contentLength, blockEndPos, readerSchema, header, footer);
return HoodieAvroDataBlock.getBlock(logFile, inputStream, Option.ofNullable(content), readBlockLazily,
contentPosition, contentLength, blockEndPos, readerSchema, header, footer);
}
case DELETE_BLOCK:
return HoodieDeleteBlock
.getBlock(logFile, inputStream, Option.ofNullable(content), readBlockLazily,
contentPosition, contentLength, blockEndPos, header, footer);
return HoodieDeleteBlock.getBlock(logFile, inputStream, Option.ofNullable(content), readBlockLazily,
contentPosition, contentLength, blockEndPos, header, footer);
case COMMAND_BLOCK:
return HoodieCommandBlock
.getBlock(logFile, inputStream, Option.ofNullable(content), readBlockLazily,
contentPosition, contentLength, blockEndPos, header, footer);
return HoodieCommandBlock.getBlock(logFile, inputStream, Option.ofNullable(content), readBlockLazily,
contentPosition, contentLength, blockEndPos, header, footer);
default:
throw new HoodieNotSupportedException("Unsupported Block " + blockType);
}
@@ -224,12 +216,9 @@ class HoodieLogFileReader implements HoodieLogFormat.Reader {
log.info("Next available block in " + logFile + " starts at " + nextBlockOffset);
int corruptedBlockSize = (int) (nextBlockOffset - currentPos);
long contentPosition = inputStream.getPos();
byte[] corruptedBytes = HoodieLogBlock
.readOrSkipContent(inputStream, corruptedBlockSize, readBlockLazily);
return HoodieCorruptBlock
.getBlock(logFile, inputStream, Option.ofNullable(corruptedBytes), readBlockLazily,
contentPosition, corruptedBlockSize, corruptedBlockSize, new HashMap<>(),
new HashMap<>());
byte[] corruptedBytes = HoodieLogBlock.readOrSkipContent(inputStream, corruptedBlockSize, readBlockLazily);
return HoodieCorruptBlock.getBlock(logFile, inputStream, Option.ofNullable(corruptedBytes), readBlockLazily,
contentPosition, corruptedBlockSize, corruptedBlockSize, new HashMap<>(), new HashMap<>());
}
private boolean isBlockCorrupt(int blocksize) throws IOException {
@@ -311,8 +300,7 @@ class HoodieLogFileReader implements HoodieLogFormat.Reader {
boolean hasMagic = hasNextMagic();
if (!hasMagic) {
throw new CorruptedLogFileException(
logFile
+ "could not be read. Did not find the magic bytes at the start of the block");
logFile + "could not be read. Did not find the magic bytes at the start of the block");
}
return hasMagic;
} catch (EOFException e) {
@@ -362,9 +350,9 @@ class HoodieLogFileReader implements HoodieLogFormat.Reader {
}
/**
* This is a reverse iterator Note: At any point, an instance of HoodieLogFileReader should either
* iterate reverse (prev) or forward (next). Doing both in the same instance is not supported
* WARNING : Every call to prev() should be preceded with hasPrev()
* This is a reverse iterator Note: At any point, an instance of HoodieLogFileReader should either iterate reverse
* (prev) or forward (next). Doing both in the same instance is not supported WARNING : Every call to prev() should be
* preceded with hasPrev()
*/
@Override
public HoodieLogBlock prev() throws IOException {
@@ -380,9 +368,8 @@ class HoodieLogFileReader implements HoodieLogFormat.Reader {
} catch (Exception e) {
// this could be a corrupt block
inputStream.seek(blockEndPos);
throw new CorruptedLogFileException(
"Found possible corrupted block, cannot read log file in reverse, "
+ "fallback to forward reading of logfile");
throw new CorruptedLogFileException("Found possible corrupted block, cannot read log file in reverse, "
+ "fallback to forward reading of logfile");
}
boolean hasNext = hasNext();
reverseLogFilePosition -= blockSize;
@@ -391,10 +378,9 @@ class HoodieLogFileReader implements HoodieLogFormat.Reader {
}
/**
* Reverse pointer, does not read the block. Return the current position of the log file (in
* reverse) If the pointer (inputstream) is moved in any way, it is the job of the client of this
* class to seek/reset it back to the file position returned from the method to expect correct
* results
* Reverse pointer, does not read the block. Return the current position of the log file (in reverse) If the pointer
* (inputstream) is moved in any way, it is the job of the client of this class to seek/reset it back to the file
* position returned from the method to expect correct results
*/
public long moveToPrev() throws IOException {

View File

@@ -33,11 +33,10 @@ import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
/**
* File Format for Hoodie Log Files. The File Format consists of blocks each separated with a
* MAGIC sync marker. A Block can either be a Data block, Command block or Delete Block. Data
* Block - Contains log records serialized as Avro Binary Format Command Block - Specific commands
* like ROLLBACK_PREVIOUS-BLOCK - Tombstone for the previously written block Delete Block - List of
* keys to delete - tombstone for keys
* File Format for Hoodie Log Files. The File Format consists of blocks each separated with a MAGIC sync marker. A Block
* can either be a Data block, Command block or Delete Block. Data Block - Contains log records serialized as Avro
* Binary Format Command Block - Specific commands like ROLLBACK_PREVIOUS-BLOCK - Tombstone for the previously written
* block Delete Block - List of keys to delete - tombstone for keys
*/
public interface HoodieLogFormat {
@@ -47,8 +46,8 @@ public interface HoodieLogFormat {
byte[] MAGIC = new byte[] {'#', 'H', 'U', 'D', 'I', '#'};
/**
* The current version of the log format. Anytime the log format changes this version needs to be
* bumped and corresponding changes need to be made to {@link HoodieLogFormatVersion}
* The current version of the log format. Anytime the log format changes this version needs to be bumped and
* corresponding changes need to be made to {@link HoodieLogFormatVersion}
*/
int currentVersion = 1;
@@ -84,12 +83,14 @@ public interface HoodieLogFormat {
/**
* Read log file in reverse order and check if prev block is present
*
* @return
*/
public boolean hasPrev();
/**
* Read log file in reverse order and return prev block if present
*
* @return
* @throws IOException
*/
@@ -220,9 +221,8 @@ public interface HoodieLogFormat {
// Use rollover write token as write token to create new log file with tokens
logWriteToken = rolloverLogWriteToken;
}
log.info(
"Computed the next log version for " + logFileId + " in " + parentPath + " as "
+ logVersion + " with write-token " + logWriteToken);
log.info("Computed the next log version for " + logFileId + " in " + parentPath + " as " + logVersion
+ " with write-token " + logWriteToken);
}
if (logWriteToken == null) {
@@ -259,16 +259,15 @@ public interface HoodieLogFormat {
return new HoodieLogFileReader(fs, logFile, readerSchema, HoodieLogFileReader.DEFAULT_BUFFER_SIZE, false, false);
}
static HoodieLogFormat.Reader newReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema, boolean
readBlockLazily, boolean reverseReader)
throws IOException {
return new HoodieLogFileReader(fs, logFile, readerSchema, HoodieLogFileReader.DEFAULT_BUFFER_SIZE,
readBlockLazily, reverseReader);
static HoodieLogFormat.Reader newReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema,
boolean readBlockLazily, boolean reverseReader) throws IOException {
return new HoodieLogFileReader(fs, logFile, readerSchema, HoodieLogFileReader.DEFAULT_BUFFER_SIZE, readBlockLazily,
reverseReader);
}
/**
* A set of feature flags associated with a log format. Versions are changed when the log format
* changes. TODO(na) - Implement policies around major/minor versions
* A set of feature flags associated with a log format. Versions are changed when the log format changes. TODO(na) -
* Implement policies around major/minor versions
*/
abstract class LogFormatVersion {

View File

@@ -43,8 +43,8 @@ public class HoodieLogFormatReader implements HoodieLogFormat.Reader {
private static final Logger log = LogManager.getLogger(HoodieLogFormatReader.class);
HoodieLogFormatReader(FileSystem fs, List<HoodieLogFile> logFiles,
Schema readerSchema, boolean readBlocksLazily, boolean reverseLogReader, int bufferSize) throws IOException {
HoodieLogFormatReader(FileSystem fs, List<HoodieLogFile> logFiles, Schema readerSchema, boolean readBlocksLazily,
boolean reverseLogReader, int bufferSize) throws IOException {
this.logFiles = logFiles;
this.fs = fs;
this.readerSchema = readerSchema;
@@ -60,10 +60,9 @@ public class HoodieLogFormatReader implements HoodieLogFormat.Reader {
@Override
/**
* Note : In lazy mode, clients must ensure close() should be called only after processing
* all log-blocks as the underlying inputstream will be closed.
* TODO: We can introduce invalidate() API at HoodieLogBlock and this object can call invalidate on
* all returned log-blocks so that we check this scenario specifically in HoodieLogBlock
* Note : In lazy mode, clients must ensure close() should be called only after processing all log-blocks as the
* underlying inputstream will be closed. TODO: We can introduce invalidate() API at HoodieLogBlock and this object
* can call invalidate on all returned log-blocks so that we check this scenario specifically in HoodieLogBlock
*/
public void close() throws IOException {
@@ -94,8 +93,8 @@ public class HoodieLogFormatReader implements HoodieLogFormat.Reader {
} else {
this.prevReadersInOpenState.add(currentReader);
}
this.currentReader = new HoodieLogFileReader(fs, nextLogFile, readerSchema, bufferSize, readBlocksLazily,
false);
this.currentReader =
new HoodieLogFileReader(fs, nextLogFile, readerSchema, bufferSize, readBlocksLazily, false);
} catch (IOException io) {
throw new HoodieIOException("unable to initialize read with log file ", io);
}
@@ -116,8 +115,7 @@ public class HoodieLogFormatReader implements HoodieLogFormat.Reader {
}
@Override
public void remove() {
}
public void remove() {}
@Override
public boolean hasPrev() {

View File

@@ -19,8 +19,7 @@
package org.apache.hudi.common.table.log;
/**
* Implements logic to determine behavior for feature flags for
* {@link HoodieLogFormat.LogFormatVersion}.
* Implements logic to determine behavior for feature flags for {@link HoodieLogFormat.LogFormatVersion}.
*/
final class HoodieLogFormatVersion extends HoodieLogFormat.LogFormatVersion {

View File

@@ -38,8 +38,7 @@ import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
/**
* HoodieLogFormatWriter can be used to append blocks to a log file Use
* HoodieLogFormat.WriterBuilder to construct
* HoodieLogFormatWriter can be used to append blocks to a log file Use HoodieLogFormat.WriterBuilder to construct
*/
public class HoodieLogFormatWriter implements HoodieLogFormat.Writer {
@@ -62,9 +61,8 @@ public class HoodieLogFormatWriter implements HoodieLogFormat.Writer {
* @param replication
* @param sizeThreshold
*/
HoodieLogFormatWriter(FileSystem fs, HoodieLogFile logFile, Integer bufferSize,
Short replication, Long sizeThreshold, String logWriteToken, String rolloverLogWriteToken)
throws IOException, InterruptedException {
HoodieLogFormatWriter(FileSystem fs, HoodieLogFile logFile, Integer bufferSize, Short replication, Long sizeThreshold,
String logWriteToken, String rolloverLogWriteToken) throws IOException, InterruptedException {
this.fs = fs;
this.logFile = logFile;
this.sizeThreshold = sizeThreshold;
@@ -116,12 +114,11 @@ public class HoodieLogFormatWriter implements HoodieLogFormat.Writer {
}
@Override
public Writer appendBlock(HoodieLogBlock block)
throws IOException, InterruptedException {
public Writer appendBlock(HoodieLogBlock block) throws IOException, InterruptedException {
// Find current version
HoodieLogFormat.LogFormatVersion currentLogFormatVersion = new HoodieLogFormatVersion(
HoodieLogFormat.currentVersion);
HoodieLogFormat.LogFormatVersion currentLogFormatVersion =
new HoodieLogFormatVersion(HoodieLogFormat.currentVersion);
long currentSize = this.output.size();
// 1. Write the magic header for the start of the block
@@ -135,8 +132,7 @@ public class HoodieLogFormatWriter implements HoodieLogFormat.Writer {
byte[] footerBytes = HoodieLogBlock.getLogMetadataBytes(block.getLogBlockFooter());
// 2. Write the total size of the block (excluding Magic)
this.output
.writeLong(getLogBlockLength(content.length, headerBytes.length, footerBytes.length));
this.output.writeLong(getLogBlockLength(content.length, headerBytes.length, footerBytes.length));
// 3. Write the version of this log block
this.output.writeInt(currentLogFormatVersion.getVersion());
@@ -162,26 +158,24 @@ public class HoodieLogFormatWriter implements HoodieLogFormat.Writer {
}
/**
* This method returns the total LogBlock Length which is the sum of 1. Number of bytes to write
* version 2. Number of bytes to write ordinal 3. Length of the headers 4. Number of bytes used to
* write content length 5. Length of the content 6. Length of the footers 7. Number of bytes to
* write totalLogBlockLength
* This method returns the total LogBlock Length which is the sum of 1. Number of bytes to write version 2. Number of
* bytes to write ordinal 3. Length of the headers 4. Number of bytes used to write content length 5. Length of the
* content 6. Length of the footers 7. Number of bytes to write totalLogBlockLength
*/
private int getLogBlockLength(int contentLength, int headerLength, int footerLength) {
return
Integer.BYTES + // Number of bytes to write version
Integer.BYTES + // Number of bytes to write ordinal
headerLength + // Length of the headers
Long.BYTES + // Number of bytes used to write content length
contentLength + // Length of the content
footerLength + // Length of the footers
Long.BYTES; // bytes to write totalLogBlockLength at end of block (for reverse ptr)
return Integer.BYTES + // Number of bytes to write version
Integer.BYTES + // Number of bytes to write ordinal
headerLength + // Length of the headers
Long.BYTES + // Number of bytes used to write content length
contentLength + // Length of the content
footerLength + // Length of the footers
Long.BYTES; // bytes to write totalLogBlockLength at end of block (for reverse ptr)
}
private Writer rolloverIfNeeded() throws IOException, InterruptedException {
// Roll over if the size is past the threshold
if (getCurrentSize() > sizeThreshold) {
//TODO - make an end marker which seals the old log file (no more appends possible to that
// TODO - make an end marker which seals the old log file (no more appends possible to that
// file).
log.info("CurrentSize " + getCurrentSize() + " has reached threshold " + sizeThreshold
+ ". Rolling over to the next version");
@@ -195,8 +189,8 @@ public class HoodieLogFormatWriter implements HoodieLogFormat.Writer {
}
private void createNewFile() throws IOException {
this.output = fs.create(this.logFile.getPath(), false, bufferSize, replication,
WriterBuilder.DEFAULT_SIZE_THRESHOLD, null);
this.output =
fs.create(this.logFile.getPath(), false, bufferSize, replication, WriterBuilder.DEFAULT_SIZE_THRESHOLD, null);
}
@Override
@@ -218,14 +212,13 @@ public class HoodieLogFormatWriter implements HoodieLogFormat.Writer {
public long getCurrentSize() throws IOException {
if (output == null) {
throw new IllegalStateException(
"Cannot get current size as the underlying stream has been closed already");
throw new IllegalStateException("Cannot get current size as the underlying stream has been closed already");
}
return output.getPos();
}
private void handleAppendExceptionOrRecoverLease(Path path, RemoteException e) throws IOException,
InterruptedException {
private void handleAppendExceptionOrRecoverLease(Path path, RemoteException e)
throws IOException, InterruptedException {
if (e.getMessage().contains(APPEND_UNAVAILABLE_EXCEPTION_MESSAGE)) {
// This issue happens when all replicas for a file are down and/or being decommissioned.
// The fs.append() API could append to the last block for a file. If the last block is full, a new block is

View File

@@ -40,12 +40,13 @@ import org.apache.log4j.Logger;
* Scans through all the blocks in a list of HoodieLogFile and builds up a compacted/merged list of records which will
* be used as a lookup table when merging the base columnar file with the redo log file.
*
* NOTE: If readBlockLazily is
* turned on, does not merge, instead keeps reading log blocks and merges everything at once This is an optimization to
* avoid seek() back and forth to read new block (forward seek()) and lazily read content of seen block (reverse and
* forward seek()) during merge | | Read Block 1 Metadata | | Read Block 1 Data | | | Read Block 2
* Metadata | | Read Block 2 Data | | I/O Pass 1 | ..................... | I/O Pass 2 | ................. | |
* | Read Block N Metadata | | Read Block N Data | <p> This results in two I/O passes over the log file.
* NOTE: If readBlockLazily is turned on, does not merge, instead keeps reading log blocks and merges everything at once
* This is an optimization to avoid seek() back and forth to read new block (forward seek()) and lazily read content of
* seen block (reverse and forward seek()) during merge | | Read Block 1 Metadata | | Read Block 1 Data | | | Read Block
* 2 Metadata | | Read Block 2 Data | | I/O Pass 1 | ..................... | I/O Pass 2 | ................. | | | Read
* Block N Metadata | | Read Block N Data |
* <p>
* This results in two I/O passes over the log file.
*/
public class HoodieMergedLogRecordScanner extends AbstractHoodieLogRecordScanner
@@ -65,26 +66,24 @@ public class HoodieMergedLogRecordScanner extends AbstractHoodieLogRecordScanner
public final HoodieTimer timer = new HoodieTimer();
@SuppressWarnings("unchecked")
public HoodieMergedLogRecordScanner(FileSystem fs, String basePath, List<String> logFilePaths,
Schema readerSchema, String latestInstantTime, Long maxMemorySizeInBytes,
boolean readBlocksLazily, boolean reverseReader, int bufferSize, String spillableMapBasePath) {
public HoodieMergedLogRecordScanner(FileSystem fs, String basePath, List<String> logFilePaths, Schema readerSchema,
String latestInstantTime, Long maxMemorySizeInBytes, boolean readBlocksLazily, boolean reverseReader,
int bufferSize, String spillableMapBasePath) {
super(fs, basePath, logFilePaths, readerSchema, latestInstantTime, readBlocksLazily, reverseReader, bufferSize);
try {
// Store merged records for all versions for this log file, set the in-memory footprint to maxInMemoryMapSize
this.records = new ExternalSpillableMap<>(maxMemorySizeInBytes, spillableMapBasePath,
new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(readerSchema));
this.records = new ExternalSpillableMap<>(maxMemorySizeInBytes, spillableMapBasePath, new DefaultSizeEstimator(),
new HoodieRecordSizeEstimator(readerSchema));
// Do the scan and merge
timer.startTimer();
scan();
this.totalTimeTakenToReadAndMergeBlocks = timer.endTimer();
this.numMergedRecordsInLog = records.size();
log.info("MaxMemoryInBytes allowed for compaction => " + maxMemorySizeInBytes);
log.info("Number of entries in MemoryBasedMap in ExternalSpillableMap => " + records
.getInMemoryMapNumEntries());
log.info("Total size in bytes of MemoryBasedMap in ExternalSpillableMap => " + records
.getCurrentInMemoryMapSize());
log.info("Number of entries in DiskBasedMap in ExternalSpillableMap => " + records
.getDiskBasedMapNumEntries());
log.info("Number of entries in MemoryBasedMap in ExternalSpillableMap => " + records.getInMemoryMapNumEntries());
log.info(
"Total size in bytes of MemoryBasedMap in ExternalSpillableMap => " + records.getCurrentInMemoryMapSize());
log.info("Number of entries in DiskBasedMap in ExternalSpillableMap => " + records.getDiskBasedMapNumEntries());
log.info("Size of file spilled to disk => " + records.getSizeOfFileOnDiskInBytes());
} catch (IOException e) {
throw new HoodieIOException("IOException when reading log file ");

View File

@@ -29,9 +29,8 @@ public class HoodieUnMergedLogRecordScanner extends AbstractHoodieLogRecordScann
private final LogRecordScannerCallback callback;
public HoodieUnMergedLogRecordScanner(FileSystem fs, String basePath,
List<String> logFilePaths, Schema readerSchema, String latestInstantTime,
boolean readBlocksLazily, boolean reverseReader, int bufferSize,
public HoodieUnMergedLogRecordScanner(FileSystem fs, String basePath, List<String> logFilePaths, Schema readerSchema,
String latestInstantTime, boolean readBlocksLazily, boolean reverseReader, int bufferSize,
LogRecordScannerCallback callback) {
super(fs, basePath, logFilePaths, readerSchema, latestInstantTime, readBlocksLazily, reverseReader, bufferSize);
this.callback = callback;

View File

@@ -48,9 +48,8 @@ import org.apache.hudi.common.util.Option;
import org.apache.hudi.exception.HoodieIOException;
/**
* DataBlock contains a list of records serialized using Avro. The Datablock contains 1. Data Block
* version 2. Total number of records in the block 3. Size of a record 4. Actual avro serialized
* content of the record
* DataBlock contains a list of records serialized using Avro. The Datablock contains 1. Data Block version 2. Total
* number of records in the block 3. Size of a record 4. Actual avro serialized content of the record
*/
public class HoodieAvroDataBlock extends HoodieLogBlock {
@@ -59,41 +58,31 @@ public class HoodieAvroDataBlock extends HoodieLogBlock {
private ThreadLocal<BinaryEncoder> encoderCache = new ThreadLocal<>();
private ThreadLocal<BinaryDecoder> decoderCache = new ThreadLocal<>();
public HoodieAvroDataBlock(@Nonnull List<IndexedRecord> records,
@Nonnull Map<HeaderMetadataType, String> header,
public HoodieAvroDataBlock(@Nonnull List<IndexedRecord> records, @Nonnull Map<HeaderMetadataType, String> header,
@Nonnull Map<HeaderMetadataType, String> footer) {
super(header, footer, Option.empty(), Option.empty(), null, false);
this.records = records;
this.schema = Schema.parse(super.getLogBlockHeader().get(HeaderMetadataType.SCHEMA));
}
public HoodieAvroDataBlock(@Nonnull List<IndexedRecord> records,
@Nonnull Map<HeaderMetadataType, String> header) {
public HoodieAvroDataBlock(@Nonnull List<IndexedRecord> records, @Nonnull Map<HeaderMetadataType, String> header) {
this(records, header, new HashMap<>());
}
private HoodieAvroDataBlock(Option<byte[]> content, @Nonnull FSDataInputStream inputStream,
boolean readBlockLazily, Option<HoodieLogBlockContentLocation> blockContentLocation,
Schema readerSchema, @Nonnull Map<HeaderMetadataType, String> headers,
@Nonnull Map<HeaderMetadataType, String> footer) {
private HoodieAvroDataBlock(Option<byte[]> content, @Nonnull FSDataInputStream inputStream, boolean readBlockLazily,
Option<HoodieLogBlockContentLocation> blockContentLocation, Schema readerSchema,
@Nonnull Map<HeaderMetadataType, String> headers, @Nonnull Map<HeaderMetadataType, String> footer) {
super(headers, footer, blockContentLocation, content, inputStream, readBlockLazily);
this.schema = readerSchema;
}
public static HoodieLogBlock getBlock(HoodieLogFile logFile,
FSDataInputStream inputStream,
Option<byte[]> content,
boolean readBlockLazily,
long position,
long blockSize,
long blockEndpos,
Schema readerSchema,
Map<HeaderMetadataType, String> header,
Map<HeaderMetadataType, String> footer) {
public static HoodieLogBlock getBlock(HoodieLogFile logFile, FSDataInputStream inputStream, Option<byte[]> content,
boolean readBlockLazily, long position, long blockSize, long blockEndpos, Schema readerSchema,
Map<HeaderMetadataType, String> header, Map<HeaderMetadataType, String> footer) {
return new HoodieAvroDataBlock(content, inputStream, readBlockLazily,
Option.of(new HoodieLogBlockContentLocation(logFile, position, blockSize, blockEndpos)),
readerSchema, header, footer);
Option.of(new HoodieLogBlockContentLocation(logFile, position, blockSize, blockEndpos)), readerSchema, header,
footer);
}
@@ -171,8 +160,8 @@ public class HoodieAvroDataBlock extends HoodieLogBlock {
return schema;
}
//TODO (na) - Break down content into smaller chunks of byte [] to be GC as they are used
//TODO (na) - Implement a recordItr instead of recordList
// TODO (na) - Break down content into smaller chunks of byte [] to be GC as they are used
// TODO (na) - Implement a recordItr instead of recordList
private void createRecordsFromContentBytes() throws IOException {
if (readBlockLazily && !getContent().isPresent()) {
@@ -181,16 +170,14 @@ public class HoodieAvroDataBlock extends HoodieLogBlock {
}
SizeAwareDataInputStream dis =
new SizeAwareDataInputStream(
new DataInputStream(new ByteArrayInputStream(getContent().get())));
new SizeAwareDataInputStream(new DataInputStream(new ByteArrayInputStream(getContent().get())));
// 1. Read version for this data block
int version = dis.readInt();
HoodieAvroDataBlockVersion logBlockVersion = new HoodieAvroDataBlockVersion(version);
// Get schema from the header
Schema writerSchema = new Schema.Parser()
.parse(super.getLogBlockHeader().get(HeaderMetadataType.SCHEMA));
Schema writerSchema = new Schema.Parser().parse(super.getLogBlockHeader().get(HeaderMetadataType.SCHEMA));
// If readerSchema was not present, use writerSchema
if (schema == null) {
@@ -208,8 +195,8 @@ public class HoodieAvroDataBlock extends HoodieLogBlock {
// 3. Read the content
for (int i = 0; i < totalRecords; i++) {
int recordLength = dis.readInt();
BinaryDecoder decoder = DecoderFactory.get()
.binaryDecoder(getContent().get(), dis.getNumberOfBytesRead(), recordLength, decoderCache.get());
BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(getContent().get(), dis.getNumberOfBytesRead(),
recordLength, decoderCache.get());
decoderCache.set(decoder);
IndexedRecord record = reader.read(null, decoder);
records.add(record);
@@ -221,13 +208,13 @@ public class HoodieAvroDataBlock extends HoodieLogBlock {
deflate();
}
/*********************************DEPRECATED METHODS***********************************/
/********************************* DEPRECATED METHODS ***********************************/
@Deprecated
@VisibleForTesting
/**
* This constructor is retained to provide backwards compatibility to HoodieArchivedLogs
* which were written using HoodieLogFormat V1
* This constructor is retained to provide backwards compatibility to HoodieArchivedLogs which were written using
* HoodieLogFormat V1
*/
public HoodieAvroDataBlock(List<IndexedRecord> records, Schema schema) {
super(new HashMap<>(), new HashMap<>(), Option.empty(), Option.empty(), null, false);
@@ -237,13 +224,12 @@ public class HoodieAvroDataBlock extends HoodieLogBlock {
@Deprecated
/**
* This method is retained to provide backwards compatibility to HoodieArchivedLogs which
* were written using HoodieLogFormat V1
* This method is retained to provide backwards compatibility to HoodieArchivedLogs which were written using
* HoodieLogFormat V1
*/
public static HoodieLogBlock getBlock(byte[] content, Schema readerSchema) throws IOException {
SizeAwareDataInputStream dis = new SizeAwareDataInputStream(
new DataInputStream(new ByteArrayInputStream(content)));
SizeAwareDataInputStream dis = new SizeAwareDataInputStream(new DataInputStream(new ByteArrayInputStream(content)));
// 1. Read the schema written out
int schemaLength = dis.readInt();
@@ -263,8 +249,7 @@ public class HoodieAvroDataBlock extends HoodieLogBlock {
// 3. Read the content
for (int i = 0; i < totalRecords; i++) {
int recordLength = dis.readInt();
Decoder decoder = DecoderFactory.get()
.binaryDecoder(content, dis.getNumberOfBytesRead(), recordLength, null);
Decoder decoder = DecoderFactory.get().binaryDecoder(content, dis.getNumberOfBytesRead(), recordLength, null);
IndexedRecord record = reader.read(null, decoder);
records.add(record);
dis.skipBytes(recordLength);

View File

@@ -19,8 +19,8 @@
package org.apache.hudi.common.table.log.block;
/**
* A set of feature flags associated with a data log block format. Versions are changed when the log
* block format changes. TODO(na) - Implement policies around major/minor versions
* A set of feature flags associated with a data log block format. Versions are changed when the log block format
* changes. TODO(na) - Implement policies around major/minor versions
*/
final class HoodieAvroDataBlockVersion extends HoodieLogBlockVersion {

View File

@@ -39,12 +39,12 @@ public class HoodieCommandBlock extends HoodieLogBlock {
this(Option.empty(), null, false, Option.empty(), header, new HashMap<>());
}
private HoodieCommandBlock(Option<byte[]> content, FSDataInputStream inputStream,
boolean readBlockLazily, Option<HoodieLogBlockContentLocation> blockContentLocation,
Map<HeaderMetadataType, String> header, Map<HeaderMetadataType, String> footer) {
private HoodieCommandBlock(Option<byte[]> content, FSDataInputStream inputStream, boolean readBlockLazily,
Option<HoodieLogBlockContentLocation> blockContentLocation, Map<HeaderMetadataType, String> header,
Map<HeaderMetadataType, String> footer) {
super(header, footer, blockContentLocation, content, inputStream, readBlockLazily);
this.type = HoodieCommandBlockTypeEnum.values()[Integer
.parseInt(header.get(HeaderMetadataType.COMMAND_BLOCK_TYPE))];
this.type =
HoodieCommandBlockTypeEnum.values()[Integer.parseInt(header.get(HeaderMetadataType.COMMAND_BLOCK_TYPE))];
}
public HoodieCommandBlockTypeEnum getType() {
@@ -61,18 +61,11 @@ public class HoodieCommandBlock extends HoodieLogBlock {
return new byte[0];
}
public static HoodieLogBlock getBlock(HoodieLogFile logFile,
FSDataInputStream inputStream,
Option<byte[]> content,
boolean readBlockLazily,
long position,
long blockSize,
long blockEndpos,
Map<HeaderMetadataType, String> header,
public static HoodieLogBlock getBlock(HoodieLogFile logFile, FSDataInputStream inputStream, Option<byte[]> content,
boolean readBlockLazily, long position, long blockSize, long blockEndpos, Map<HeaderMetadataType, String> header,
Map<HeaderMetadataType, String> footer) {
return new HoodieCommandBlock(content, inputStream, readBlockLazily,
Option.of(new HoodieLogBlockContentLocation(logFile, position, blockSize, blockEndpos)),
header, footer);
Option.of(new HoodieLogBlockContentLocation(logFile, position, blockSize, blockEndpos)), header, footer);
}
}

View File

@@ -19,8 +19,8 @@
package org.apache.hudi.common.table.log.block;
/**
* A set of feature flags associated with a command log block format. Versions are changed when the
* log block format changes. TODO(na) - Implement policies around major/minor versions
* A set of feature flags associated with a command log block format. Versions are changed when the log block format
* changes. TODO(na) - Implement policies around major/minor versions
*/
final class HoodieCommandBlockVersion extends HoodieLogBlockVersion {

View File

@@ -25,14 +25,14 @@ import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.util.Option;
/**
* Corrupt block is emitted whenever the scanner finds the length of the block written at the
* beginning does not match (did not find a EOF or a sync marker after the length)
* Corrupt block is emitted whenever the scanner finds the length of the block written at the beginning does not match
* (did not find a EOF or a sync marker after the length)
*/
public class HoodieCorruptBlock extends HoodieLogBlock {
private HoodieCorruptBlock(Option<byte[]> corruptedBytes, FSDataInputStream inputStream,
boolean readBlockLazily, Option<HoodieLogBlockContentLocation> blockContentLocation,
Map<HeaderMetadataType, String> header, Map<HeaderMetadataType, String> footer) {
private HoodieCorruptBlock(Option<byte[]> corruptedBytes, FSDataInputStream inputStream, boolean readBlockLazily,
Option<HoodieLogBlockContentLocation> blockContentLocation, Map<HeaderMetadataType, String> header,
Map<HeaderMetadataType, String> footer) {
super(header, footer, blockContentLocation, corruptedBytes, inputStream, readBlockLazily);
}
@@ -51,18 +51,11 @@ public class HoodieCorruptBlock extends HoodieLogBlock {
return HoodieLogBlockType.CORRUPT_BLOCK;
}
public static HoodieLogBlock getBlock(HoodieLogFile logFile,
FSDataInputStream inputStream,
Option<byte[]> corruptedBytes,
boolean readBlockLazily,
long position,
long blockSize,
long blockEndPos,
Map<HeaderMetadataType, String> header,
Map<HeaderMetadataType, String> footer) {
public static HoodieLogBlock getBlock(HoodieLogFile logFile, FSDataInputStream inputStream,
Option<byte[]> corruptedBytes, boolean readBlockLazily, long position, long blockSize, long blockEndPos,
Map<HeaderMetadataType, String> header, Map<HeaderMetadataType, String> footer) {
return new HoodieCorruptBlock(corruptedBytes, inputStream, readBlockLazily,
Option.of(new HoodieLogBlockContentLocation(logFile, position, blockSize, blockEndPos)),
header, footer);
Option.of(new HoodieLogBlockContentLocation(logFile, position, blockSize, blockEndPos)), header, footer);
}
}

View File

@@ -40,16 +40,15 @@ public class HoodieDeleteBlock extends HoodieLogBlock {
private HoodieKey[] keysToDelete;
public HoodieDeleteBlock(HoodieKey[] keysToDelete,
Map<HeaderMetadataType, String> header) {
public HoodieDeleteBlock(HoodieKey[] keysToDelete, Map<HeaderMetadataType, String> header) {
this(Option.empty(), null, false, Option.empty(), header, new HashMap<>());
this.keysToDelete = keysToDelete;
}
private HoodieDeleteBlock(Option<byte[]> content, FSDataInputStream inputStream,
boolean readBlockLazily, Option<HoodieLogBlockContentLocation> blockContentLocation,
Map<HeaderMetadataType, String> header, Map<HeaderMetadataType, String> footer) {
private HoodieDeleteBlock(Option<byte[]> content, FSDataInputStream inputStream, boolean readBlockLazily,
Option<HoodieLogBlockContentLocation> blockContentLocation, Map<HeaderMetadataType, String> header,
Map<HeaderMetadataType, String> footer) {
super(header, footer, blockContentLocation, content, inputStream, readBlockLazily);
}
@@ -81,8 +80,7 @@ public class HoodieDeleteBlock extends HoodieLogBlock {
inflate();
}
SizeAwareDataInputStream dis =
new SizeAwareDataInputStream(
new DataInputStream(new ByteArrayInputStream(getContent().get())));
new SizeAwareDataInputStream(new DataInputStream(new ByteArrayInputStream(getContent().get())));
int version = dis.readInt();
int dataLength = dis.readInt();
byte[] data = new byte[dataLength];
@@ -101,18 +99,11 @@ public class HoodieDeleteBlock extends HoodieLogBlock {
return HoodieLogBlockType.DELETE_BLOCK;
}
public static HoodieLogBlock getBlock(HoodieLogFile logFile,
FSDataInputStream inputStream,
Option<byte[]> content,
boolean readBlockLazily,
long position,
long blockSize,
long blockEndPos,
Map<HeaderMetadataType, String> header,
public static HoodieLogBlock getBlock(HoodieLogFile logFile, FSDataInputStream inputStream, Option<byte[]> content,
boolean readBlockLazily, long position, long blockSize, long blockEndPos, Map<HeaderMetadataType, String> header,
Map<HeaderMetadataType, String> footer) throws IOException {
return new HoodieDeleteBlock(content, inputStream, readBlockLazily,
Option.of(new HoodieLogBlockContentLocation(logFile, position, blockSize, blockEndPos)),
header, footer);
Option.of(new HoodieLogBlockContentLocation(logFile, position, blockSize, blockEndPos)), header, footer);
}
}

View File

@@ -19,8 +19,8 @@
package org.apache.hudi.common.table.log.block;
/**
* A set of feature flags associated with a delete log block format. Versions are changed when the
* log block format changes. TODO(na) - Implement policies around major/minor versions
* A set of feature flags associated with a delete log block format. Versions are changed when the log block format
* changes. TODO(na) - Implement policies around major/minor versions
*/
final class HoodieDeleteBlockVersion extends HoodieLogBlockVersion {

View File

@@ -40,10 +40,9 @@ import org.apache.hudi.exception.HoodieIOException;
public abstract class HoodieLogBlock {
/**
* The current version of the log block. Anytime the logBlock format changes this version needs to
* be bumped and corresponding changes need to be made to {@link HoodieLogBlockVersion} TODO :
* Change this to a class, something like HoodieLogBlockVersionV1/V2 and implement/override
* operations there
* The current version of the log block. Anytime the logBlock format changes this version needs to be bumped and
* corresponding changes need to be made to {@link HoodieLogBlockVersion} TODO : Change this to a class, something
* like HoodieLogBlockVersionV1/V2 and implement/override operations there
*/
public static int version = 1;
// Header for each log block
@@ -63,10 +62,8 @@ public abstract class HoodieLogBlock {
public HoodieLogBlock(@Nonnull Map<HeaderMetadataType, String> logBlockHeader,
@Nonnull Map<HeaderMetadataType, String> logBlockFooter,
@Nonnull Option<HoodieLogBlockContentLocation> blockContentLocation,
@Nonnull Option<byte[]> content,
FSDataInputStream inputStream,
boolean readBlockLazily) {
@Nonnull Option<HoodieLogBlockContentLocation> blockContentLocation, @Nonnull Option<byte[]> content,
FSDataInputStream inputStream, boolean readBlockLazily) {
this.logBlockHeader = logBlockHeader;
this.logBlockFooter = logBlockFooter;
this.blockContentLocation = blockContentLocation;
@@ -109,38 +106,30 @@ public abstract class HoodieLogBlock {
}
/**
* Type of the log block WARNING: This enum is serialized as the ordinal. Only add new enums at
* the end.
* Type of the log block WARNING: This enum is serialized as the ordinal. Only add new enums at the end.
*/
public enum HoodieLogBlockType {
COMMAND_BLOCK,
DELETE_BLOCK,
CORRUPT_BLOCK,
AVRO_DATA_BLOCK
COMMAND_BLOCK, DELETE_BLOCK, CORRUPT_BLOCK, AVRO_DATA_BLOCK
}
/**
* Log Metadata headers abstraction for a HoodieLogBlock WARNING : This enum is serialized as the
* ordinal. Only add new enums at the end.
* Log Metadata headers abstraction for a HoodieLogBlock WARNING : This enum is serialized as the ordinal. Only add
* new enums at the end.
*/
public enum HeaderMetadataType {
INSTANT_TIME,
TARGET_INSTANT_TIME,
SCHEMA,
COMMAND_BLOCK_TYPE
INSTANT_TIME, TARGET_INSTANT_TIME, SCHEMA, COMMAND_BLOCK_TYPE
}
/**
* Log Metadata footers abstraction for a HoodieLogBlock WARNING : This enum is serialized as the
* ordinal. Only add new enums at the end.
* Log Metadata footers abstraction for a HoodieLogBlock WARNING : This enum is serialized as the ordinal. Only add
* new enums at the end.
*/
public enum FooterMetadataType {
}
/**
* This class is used to store the Location of the Content of a Log Block. It's used when a client
* chooses for a IO intensive CompactedScanner, the location helps to lazily read contents from
* the log file
* This class is used to store the Location of the Content of a Log Block. It's used when a client chooses for a IO
* intensive CompactedScanner, the location helps to lazily read contents from the log file
*/
public static final class HoodieLogBlockContentLocation {
@@ -153,8 +142,8 @@ public abstract class HoodieLogBlock {
// The final position where the complete block ends
private final long blockEndPos;
HoodieLogBlockContentLocation(HoodieLogFile logFile, long contentPositionInLogFile,
long blockSize, long blockEndPos) {
HoodieLogBlockContentLocation(HoodieLogFile logFile, long contentPositionInLogFile, long blockSize,
long blockEndPos) {
this.logFile = logFile;
this.contentPositionInLogFile = contentPositionInLogFile;
this.blockSize = blockSize;
@@ -179,11 +168,9 @@ public abstract class HoodieLogBlock {
}
/**
* Convert log metadata to bytes 1. Write size of metadata 2. Write enum ordinal 3. Write actual
* bytes
* Convert log metadata to bytes 1. Write size of metadata 2. Write enum ordinal 3. Write actual bytes
*/
public static byte[] getLogMetadataBytes(Map<HeaderMetadataType, String> metadata)
throws IOException {
public static byte[] getLogMetadataBytes(Map<HeaderMetadataType, String> metadata) throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DataOutputStream output = new DataOutputStream(baos);
output.writeInt(metadata.size());
@@ -197,11 +184,9 @@ public abstract class HoodieLogBlock {
}
/**
* Convert bytes to LogMetadata, follow the same order as
* {@link HoodieLogBlock#getLogMetadataBytes}
* Convert bytes to LogMetadata, follow the same order as {@link HoodieLogBlock#getLogMetadataBytes}
*/
public static Map<HeaderMetadataType, String> getLogMetadata(DataInputStream dis)
throws IOException {
public static Map<HeaderMetadataType, String> getLogMetadata(DataInputStream dis) throws IOException {
Map<HeaderMetadataType, String> metadata = Maps.newHashMap();
// 1. Read the metadata written out
@@ -225,8 +210,8 @@ public abstract class HoodieLogBlock {
* Read or Skip block content of a log block in the log file. Depends on lazy reading enabled in
* {@link HoodieMergedLogRecordScanner}
*/
public static byte[] readOrSkipContent(FSDataInputStream inputStream,
Integer contentLength, boolean readBlockLazily) throws IOException {
public static byte[] readOrSkipContent(FSDataInputStream inputStream, Integer contentLength, boolean readBlockLazily)
throws IOException {
byte[] content = null;
if (!readBlockLazily) {
// Read the contents in memory
@@ -261,9 +246,8 @@ public abstract class HoodieLogBlock {
}
/**
* After the content bytes is converted into the required DataStructure by a logBlock, deflate the
* content to release byte [] and relieve memory pressure when GC kicks in. NOTE: This still
* leaves the heap fragmented
* After the content bytes is converted into the required DataStructure by a logBlock, deflate the content to release
* byte [] and relieve memory pressure when GC kicks in. NOTE: This still leaves the heap fragmented
*/
protected void deflate() {
content = Option.empty();
@@ -271,8 +255,9 @@ public abstract class HoodieLogBlock {
/**
* Handles difference in seek behavior for GCS and non-GCS input stream
*
* @param inputStream Input Stream
* @param pos Position to seek
* @param pos Position to seek
* @throws IOException
*/
private static void safeSeek(FSDataInputStream inputStream, long pos) throws IOException {

View File

@@ -43,22 +43,25 @@ import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
/**
* Represents the Active Timeline for the HoodieDataset. Instants for the last 12 hours
* (configurable) is in the ActiveTimeline and the rest are Archived. ActiveTimeline is a special
* timeline that allows for creation of instants on the timeline. <p></p> The timeline is not
* automatically reloaded on any mutation operation, clients have to manually call reload() so that
* they can chain multiple mutations to the timeline and then call reload() once. <p></p> This class
* can be serialized and de-serialized and on de-serialization the FileSystem is re-initialized.
* Represents the Active Timeline for the HoodieDataset. Instants for the last 12 hours (configurable) is in the
* ActiveTimeline and the rest are Archived. ActiveTimeline is a special timeline that allows for creation of instants
* on the timeline.
* <p>
* </p>
* The timeline is not automatically reloaded on any mutation operation, clients have to manually call reload() so that
* they can chain multiple mutations to the timeline and then call reload() once.
* <p>
* </p>
* This class can be serialized and de-serialized and on de-serialization the FileSystem is re-initialized.
*/
public class HoodieActiveTimeline extends HoodieDefaultTimeline {
public static final SimpleDateFormat COMMIT_FORMATTER = new SimpleDateFormat("yyyyMMddHHmmss");
public static final Set<String> VALID_EXTENSIONS_IN_ACTIVE_TIMELINE = new HashSet<>(Arrays.asList(
new String[]{COMMIT_EXTENSION, INFLIGHT_COMMIT_EXTENSION, DELTA_COMMIT_EXTENSION,
INFLIGHT_DELTA_COMMIT_EXTENSION, SAVEPOINT_EXTENSION, INFLIGHT_SAVEPOINT_EXTENSION,
CLEAN_EXTENSION, INFLIGHT_CLEAN_EXTENSION, INFLIGHT_COMPACTION_EXTENSION, REQUESTED_COMPACTION_EXTENSION,
INFLIGHT_RESTORE_EXTENSION, RESTORE_EXTENSION}));
public static final Set<String> VALID_EXTENSIONS_IN_ACTIVE_TIMELINE = new HashSet<>(Arrays.asList(new String[] {
COMMIT_EXTENSION, INFLIGHT_COMMIT_EXTENSION, DELTA_COMMIT_EXTENSION, INFLIGHT_DELTA_COMMIT_EXTENSION,
SAVEPOINT_EXTENSION, INFLIGHT_SAVEPOINT_EXTENSION, CLEAN_EXTENSION, INFLIGHT_CLEAN_EXTENSION,
INFLIGHT_COMPACTION_EXTENSION, REQUESTED_COMPACTION_EXTENSION, INFLIGHT_RESTORE_EXTENSION, RESTORE_EXTENSION}));
private static final transient Logger log = LogManager.getLogger(HoodieActiveTimeline.class);
protected HoodieTableMetaClient metaClient;
@@ -83,14 +86,11 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
this.metaClient = metaClient;
// multiple casts will make this lambda serializable -
// http://docs.oracle.com/javase/specs/jls/se8/html/jls-15.html#jls-15.16
this.details =
(Function<HoodieInstant, Option<byte[]>> & Serializable) this::getInstantDetails;
this.details = (Function<HoodieInstant, Option<byte[]>> & Serializable) this::getInstantDetails;
}
public HoodieActiveTimeline(HoodieTableMetaClient metaClient) {
this(metaClient,
new ImmutableSet.Builder<String>()
.addAll(VALID_EXTENSIONS_IN_ACTIVE_TIMELINE).build());
this(metaClient, new ImmutableSet.Builder<String>().addAll(VALID_EXTENSIONS_IN_ACTIVE_TIMELINE).build());
}
/**
@@ -98,16 +98,14 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
*
* @deprecated
*/
public HoodieActiveTimeline() {
}
public HoodieActiveTimeline() {}
/**
* This method is only used when this object is deserialized in a spark executor.
*
* @deprecated
*/
private void readObject(java.io.ObjectInputStream in)
throws IOException, ClassNotFoundException {
private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException {
in.defaultReadObject();
}
@@ -116,29 +114,25 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
*
*/
public HoodieTimeline getCommitsTimeline() {
return getTimelineOfActions(
Sets.newHashSet(COMMIT_ACTION, DELTA_COMMIT_ACTION));
return getTimelineOfActions(Sets.newHashSet(COMMIT_ACTION, DELTA_COMMIT_ACTION));
}
/**
* Get all instants (commits, delta commits, in-flight/request compaction) that produce new data, in the active
* timeline *
* With Async compaction a requested/inflight compaction-instant is a valid baseInstant for a file-slice as there
* could be delta-commits with that baseInstant.
* timeline * With Async compaction a requested/inflight compaction-instant is a valid baseInstant for a file-slice as
* there could be delta-commits with that baseInstant.
*/
public HoodieTimeline getCommitsAndCompactionTimeline() {
return getTimelineOfActions(
Sets.newHashSet(COMMIT_ACTION, DELTA_COMMIT_ACTION, COMPACTION_ACTION));
return getTimelineOfActions(Sets.newHashSet(COMMIT_ACTION, DELTA_COMMIT_ACTION, COMPACTION_ACTION));
}
/**
* Get all instants (commits, delta commits, clean, savepoint, rollback) that result in actions,
* in the active timeline *
* Get all instants (commits, delta commits, clean, savepoint, rollback) that result in actions, in the active
* timeline *
*/
public HoodieTimeline getAllCommitsTimeline() {
return getTimelineOfActions(
Sets.newHashSet(COMMIT_ACTION, DELTA_COMMIT_ACTION, CLEAN_ACTION, COMPACTION_ACTION,
SAVEPOINT_ACTION, ROLLBACK_ACTION));
return getTimelineOfActions(Sets.newHashSet(COMMIT_ACTION, DELTA_COMMIT_ACTION, CLEAN_ACTION, COMPACTION_ACTION,
SAVEPOINT_ACTION, ROLLBACK_ACTION));
}
/**
@@ -157,8 +151,7 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
}
/**
* Get a timeline of a specific set of actions. useful to create a merged timeline of multiple
* actions
* Get a timeline of a specific set of actions. useful to create a merged timeline of multiple actions
*
* @param actions actions allowed in the timeline
*/
@@ -246,8 +239,7 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
throw new HoodieIOException("Could not delete in-flight instant " + instant);
}
} catch (IOException e) {
throw new HoodieIOException(
"Could not remove inflight commit " + inFlightCommitFilePath, e);
throw new HoodieIOException("Could not remove inflight commit " + inFlightCommitFilePath, e);
}
}
@@ -299,7 +291,7 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
* Transition Compaction State from inflight to Committed
*
* @param inflightInstant Inflight instant
* @param data Extra Metadata
* @param data Extra Metadata
* @return commit instant
*/
public HoodieInstant transitionCompactionInflightToComplete(HoodieInstant inflightInstant, Option<byte[]> data) {
@@ -319,8 +311,7 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
* END - COMPACTION RELATED META-DATA MANAGEMENT
**/
private void transitionState(HoodieInstant fromInstant, HoodieInstant toInstant,
Option<byte[]> data) {
private void transitionState(HoodieInstant fromInstant, HoodieInstant toInstant, Option<byte[]> data) {
Preconditions.checkArgument(fromInstant.getTimestamp().equals(toInstant.getTimestamp()));
Path commitFilePath = new Path(metaClient.getMetaPath(), toInstant.getFileName());
try {
@@ -329,8 +320,7 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
createFileInMetaPath(fromInstant.getFileName(), data);
boolean success = metaClient.getFs().rename(inflightCommitFile, commitFilePath);
if (!success) {
throw new HoodieIOException(
"Could not rename " + inflightCommitFile + " to " + commitFilePath);
throw new HoodieIOException("Could not rename " + inflightCommitFile + " to " + commitFilePath);
}
} catch (IOException e) {
throw new HoodieIOException("Could not complete " + fromInstant, e);
@@ -345,8 +335,7 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
Path commitFilePath = new Path(metaClient.getMetaPath(), completed.getFileName());
boolean success = metaClient.getFs().rename(commitFilePath, inFlightCommitFilePath);
if (!success) {
throw new HoodieIOException(
"Could not rename " + commitFilePath + " to " + inFlightCommitFilePath);
throw new HoodieIOException("Could not rename " + commitFilePath + " to " + inFlightCommitFilePath);
}
}
} catch (IOException e) {

View File

@@ -36,11 +36,15 @@ import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
/**
* Represents the Archived Timeline for the HoodieDataset. Instants for the last 12 hours
* (configurable) is in the ActiveTimeline and the rest are in ArchivedTimeline. <p></p> Instants
* are read from the archive file during initialization and never refreshed. To refresh, clients
* need to call reload() <p></p> This class can be serialized and de-serialized and on
* de-serialization the FileSystem is re-initialized.
* Represents the Archived Timeline for the HoodieDataset. Instants for the last 12 hours (configurable) is in the
* ActiveTimeline and the rest are in ArchivedTimeline.
* <p>
* </p>
* Instants are read from the archive file during initialization and never refreshed. To refresh, clients need to call
* reload()
* <p>
* </p>
* This class can be serialized and de-serialized and on de-serialization the FileSystem is re-initialized.
*/
public class HoodieArchivedTimeline extends HoodieDefaultTimeline {
@@ -54,8 +58,7 @@ public class HoodieArchivedTimeline extends HoodieDefaultTimeline {
// Read back the commits to make sure
Path archiveLogPath = HoodieArchivedTimeline.getArchiveLogPath(metaClient.getArchivePath());
try (SequenceFile.Reader reader =
new SequenceFile.Reader(metaClient.getHadoopConf(),
SequenceFile.Reader.file(archiveLogPath))) {
new SequenceFile.Reader(metaClient.getHadoopConf(), SequenceFile.Reader.file(archiveLogPath))) {
Text key = new Text();
Text val = new Text();
while (reader.next(key, val)) {
@@ -63,17 +66,14 @@ public class HoodieArchivedTimeline extends HoodieDefaultTimeline {
// This is okay because only tooling will load the archived commit timeline today
readCommits.put(key.toString(), Arrays.copyOf(val.getBytes(), val.getLength()));
}
this.setInstants(readCommits.keySet().stream().map(
s -> new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, s)).collect(
Collectors.toList()));
this.setInstants(readCommits.keySet().stream().map(s -> new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, s))
.collect(Collectors.toList()));
} catch (IOException e) {
throw new HoodieIOException(
"Could not load archived commit timeline from path " + archiveLogPath, e);
throw new HoodieIOException("Could not load archived commit timeline from path " + archiveLogPath, e);
}
// multiple casts will make this lambda serializable -
// http://docs.oracle.com/javase/specs/jls/se8/html/jls-15.html#jls-15.16
this.details =
(Function<HoodieInstant, Option<byte[]>> & Serializable) this::getInstantDetails;
this.details = (Function<HoodieInstant, Option<byte[]>> & Serializable) this::getInstantDetails;
this.metaClient = metaClient;
}
@@ -82,16 +82,14 @@ public class HoodieArchivedTimeline extends HoodieDefaultTimeline {
*
* @deprecated
*/
public HoodieArchivedTimeline() {
}
public HoodieArchivedTimeline() {}
/**
* This method is only used when this object is deserialized in a spark executor.
*
* @deprecated
*/
private void readObject(java.io.ObjectInputStream in)
throws IOException, ClassNotFoundException {
private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException {
in.defaultReadObject();
}

View File

@@ -37,9 +37,8 @@ import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
/**
* HoodieDefaultTimeline is a default implementation of the HoodieTimeline. It provides methods to
* inspect a List[HoodieInstant]. Function to get the details of the instant is passed in as a
* lamdba.
* HoodieDefaultTimeline is a default implementation of the HoodieTimeline. It provides methods to inspect a
* List[HoodieInstant]. Function to get the details of the instant is passed in as a lamdba.
*
* @see HoodieTimeline
*/
@@ -53,8 +52,7 @@ public class HoodieDefaultTimeline implements HoodieTimeline {
private List<HoodieInstant> instants;
private String timelineHash;
public HoodieDefaultTimeline(Stream<HoodieInstant> instants,
Function<HoodieInstant, Option<byte[]>> details) {
public HoodieDefaultTimeline(Stream<HoodieInstant> instants, Function<HoodieInstant, Option<byte[]>> details) {
this.details = details;
setInstants(instants.collect(Collectors.toList()));
}
@@ -64,8 +62,8 @@ public class HoodieDefaultTimeline implements HoodieTimeline {
final MessageDigest md;
try {
md = MessageDigest.getInstance(HASHING_ALGORITHM);
this.instants.stream().forEach(i -> md.update(
StringUtils.joinUsingDelim("_", i.getTimestamp(), i.getAction(), i.getState().name()).getBytes()));
this.instants.stream().forEach(i -> md
.update(StringUtils.joinUsingDelim("_", i.getTimestamp(), i.getAction(), i.getState().name()).getBytes()));
} catch (NoSuchAlgorithmException nse) {
throw new HoodieException(nse);
}
@@ -78,13 +76,11 @@ public class HoodieDefaultTimeline implements HoodieTimeline {
*
* @deprecated
*/
public HoodieDefaultTimeline() {
}
public HoodieDefaultTimeline() {}
@Override
public HoodieTimeline filterInflights() {
return new HoodieDefaultTimeline(instants.stream().filter(HoodieInstant::isInflight),
details);
return new HoodieDefaultTimeline(instants.stream().filter(HoodieInstant::isInflight), details);
}
@Override
@@ -115,24 +111,22 @@ public class HoodieDefaultTimeline implements HoodieTimeline {
@Override
public HoodieTimeline filterPendingCompactionTimeline() {
return new HoodieDefaultTimeline(
instants.stream().filter(s -> s.getAction().equals(HoodieTimeline.COMPACTION_ACTION)),
details);
instants.stream().filter(s -> s.getAction().equals(HoodieTimeline.COMPACTION_ACTION)), details);
}
@Override
public HoodieDefaultTimeline findInstantsInRange(String startTs, String endTs) {
return new HoodieDefaultTimeline(instants.stream().filter(
s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), startTs, GREATER)
&& HoodieTimeline.compareTimestamps(
s.getTimestamp(), endTs, LESSER_OR_EQUAL)), details);
return new HoodieDefaultTimeline(
instants.stream().filter(s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), startTs, GREATER)
&& HoodieTimeline.compareTimestamps(s.getTimestamp(), endTs, LESSER_OR_EQUAL)),
details);
}
@Override
public HoodieDefaultTimeline findInstantsAfter(String commitTime, int numCommits) {
return new HoodieDefaultTimeline(
instants.stream()
.filter(s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), commitTime, GREATER))
.limit(numCommits), details);
return new HoodieDefaultTimeline(instants.stream()
.filter(s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), commitTime, GREATER)).limit(numCommits),
details);
}
@Override
@@ -183,8 +177,7 @@ public class HoodieDefaultTimeline implements HoodieTimeline {
@Override
public boolean containsOrBeforeTimelineStarts(String instant) {
return instants.stream().anyMatch(s -> s.getTimestamp().equals(instant))
|| isBeforeTimelineStarts(instant);
return instants.stream().anyMatch(s -> s.getTimestamp().equals(instant)) || isBeforeTimelineStarts(instant);
}
@Override
@@ -218,8 +211,7 @@ public class HoodieDefaultTimeline implements HoodieTimeline {
@Override
public String toString() {
return this.getClass().getName() + ": " + instants.stream().map(Object::toString)
.collect(Collectors.joining(","));
return this.getClass().getName() + ": " + instants.stream().map(Object::toString).collect(Collectors.joining(","));
}
}

View File

@@ -25,8 +25,8 @@ import org.apache.hudi.common.table.HoodieTimeline;
import org.apache.hudi.common.util.FSUtils;
/**
* A Hoodie Instant represents a action done on a hoodie dataset. All actions start with a inflight
* instant and then create a completed instant after done.
* A Hoodie Instant represents a action done on a hoodie dataset. All actions start with a inflight instant and then
* create a completed instant after done.
*
* @see HoodieTimeline
*/
@@ -76,7 +76,7 @@ public class HoodieInstant implements Serializable {
}
public HoodieInstant(boolean isInflight, String action, String timestamp) {
//TODO: vb - Preserving for avoiding cascading changes. This constructor will be updated in subsequent PR
// TODO: vb - Preserving for avoiding cascading changes. This constructor will be updated in subsequent PR
this.state = isInflight ? State.INFLIGHT : State.COMPLETED;
this.action = action;
this.timestamp = timestamp;
@@ -151,9 +151,7 @@ public class HoodieInstant implements Serializable {
return false;
}
HoodieInstant that = (HoodieInstant) o;
return state == that.state
&& Objects.equals(action, that.action)
&& Objects.equals(timestamp, that.timestamp);
return state == that.state && Objects.equals(action, that.action) && Objects.equals(timestamp, that.timestamp);
}
public State getState() {

View File

@@ -55,8 +55,7 @@ public class CompactionOpDTO {
@JsonProperty("metrics")
private Map<String, Double> metrics;
public static CompactionOpDTO fromCompactionOperation(String compactionInstantTime,
CompactionOperation op) {
public static CompactionOpDTO fromCompactionOperation(String compactionInstantTime, CompactionOperation op) {
CompactionOpDTO dto = new CompactionOpDTO();
dto.fileId = op.getFileId();
dto.compactionInstantTime = compactionInstantTime;
@@ -70,8 +69,9 @@ public class CompactionOpDTO {
}
public static Pair<String, CompactionOperation> toCompactionOperation(CompactionOpDTO dto) {
return Pair.of(dto.compactionInstantTime, new CompactionOperation(dto.fileId, dto.partitionPath,
dto.baseInstantTime, Option.ofNullable(dto.dataFileCommitTime), dto.deltaFilePaths,
Option.ofNullable(dto.dataFilePath), dto.metrics));
return Pair.of(dto.compactionInstantTime,
new CompactionOperation(dto.fileId, dto.partitionPath, dto.baseInstantTime,
Option.ofNullable(dto.dataFileCommitTime), dto.deltaFilePaths, Option.ofNullable(dto.dataFilePath),
dto.metrics));
}
}

View File

@@ -50,8 +50,8 @@ public class FileGroupDTO {
}
public static HoodieFileGroup toFileGroup(FileGroupDTO dto, HoodieTableMetaClient metaClient) {
HoodieFileGroup fileGroup = new HoodieFileGroup(dto.partition, dto.id,
TimelineDTO.toTimeline(dto.timeline, metaClient));
HoodieFileGroup fileGroup =
new HoodieFileGroup(dto.partition, dto.id, TimelineDTO.toTimeline(dto.timeline, metaClient));
dto.slices.stream().map(FileSliceDTO::toFileSlice).forEach(fileSlice -> fileGroup.addFileSlice(fileSlice));
return fileGroup;
}

View File

@@ -39,7 +39,7 @@ public class TimelineDTO {
}
public static HoodieTimeline toTimeline(TimelineDTO dto, HoodieTableMetaClient metaClient) {
//TODO: For Now, we will assume, only active-timeline will be transferred.
// TODO: For Now, we will assume, only active-timeline will be transferred.
return new HoodieDefaultTimeline(dto.instants.stream().map(InstantDTO::toInstant),
metaClient.getActiveTimeline()::getInstantDetails);
}

View File

@@ -56,13 +56,11 @@ import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
/**
* Common thread-safe implementation for multiple TableFileSystemView Implementations.
* Provides uniform handling of
* (a) Loading file-system views from underlying file-system
* (b) Pending compaction operations and changing file-system views based on that
* (c) Thread-safety in loading and managing file system views for this dataset.
* (d) resetting file-system views
* The actual mechanism of fetching file slices from different view storages is delegated to sub-classes.
* Common thread-safe implementation for multiple TableFileSystemView Implementations. Provides uniform handling of (a)
* Loading file-system views from underlying file-system (b) Pending compaction operations and changing file-system
* views based on that (c) Thread-safety in loading and managing file system views for this dataset. (d) resetting
* file-system views The actual mechanism of fetching file slices from different view storages is delegated to
* sub-classes.
*/
public abstract class AbstractTableFileSystemView implements SyncableFileSystemView, Serializable {
@@ -94,15 +92,14 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
refreshTimeline(visibleActiveTimeline);
// Load Pending Compaction Operations
resetPendingCompactionOperations(
CompactionUtils.getAllPendingCompactionOperations(metaClient).values()
.stream().map(e -> Pair.of(e.getKey(),
CompactionOperation.convertFromAvroRecordInstance(e.getValue()))));
resetPendingCompactionOperations(CompactionUtils.getAllPendingCompactionOperations(metaClient).values().stream()
.map(e -> Pair.of(e.getKey(), CompactionOperation.convertFromAvroRecordInstance(e.getValue()))));
}
/**
* Refresh commits timeline
* @param visibleActiveTimeline Visible Active Timeline
*
* @param visibleActiveTimeline Visible Active Timeline
*/
protected void refreshTimeline(HoodieTimeline visibleActiveTimeline) {
this.visibleCommitsAndCompactionTimeline = visibleActiveTimeline.getCommitsAndCompactionTimeline();
@@ -117,13 +114,12 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
long fgBuildTimeTakenMs = timer.endTimer();
timer.startTimer();
// Group by partition for efficient updates for both InMemory and DiskBased stuctures.
fileGroups.stream().collect(Collectors.groupingBy(HoodieFileGroup::getPartitionPath)).entrySet()
.forEach(entry -> {
String partition = entry.getKey();
if (!isPartitionAvailableInStore(partition)) {
storePartitionView(partition, entry.getValue());
}
});
fileGroups.stream().collect(Collectors.groupingBy(HoodieFileGroup::getPartitionPath)).entrySet().forEach(entry -> {
String partition = entry.getKey();
if (!isPartitionAvailableInStore(partition)) {
storePartitionView(partition, entry.getValue());
}
});
long storePartitionsTs = timer.endTimer();
log.info("addFilesToView: NumFiles=" + statuses.length + ", FileGroupsCreationTime=" + fgBuildTimeTakenMs
+ ", StoreTimeTaken=" + storePartitionsTs);
@@ -141,19 +137,17 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
protected List<HoodieFileGroup> buildFileGroups(Stream<HoodieDataFile> dataFileStream,
Stream<HoodieLogFile> logFileStream, HoodieTimeline timeline, boolean addPendingCompactionFileSlice) {
Map<Pair<String, String>, List<HoodieDataFile>> dataFiles = dataFileStream
.collect(Collectors.groupingBy((dataFile) -> {
Map<Pair<String, String>, List<HoodieDataFile>> dataFiles =
dataFileStream.collect(Collectors.groupingBy((dataFile) -> {
String partitionPathStr = getPartitionPathFromFilePath(dataFile.getPath());
return Pair.of(partitionPathStr, dataFile.getFileId());
}));
Map<Pair<String, String>, List<HoodieLogFile>> logFiles = logFileStream
.collect(Collectors.groupingBy((logFile) -> {
String partitionPathStr = FSUtils.getRelativePartitionPath(
new Path(metaClient.getBasePath()),
logFile.getPath().getParent());
return Pair.of(partitionPathStr, logFile.getFileId());
}));
Map<Pair<String, String>, List<HoodieLogFile>> logFiles = logFileStream.collect(Collectors.groupingBy((logFile) -> {
String partitionPathStr =
FSUtils.getRelativePartitionPath(new Path(metaClient.getBasePath()), logFile.getPath().getParent());
return Pair.of(partitionPathStr, logFile.getFileId());
}));
Set<Pair<String, String>> fileIdSet = new HashSet<>(dataFiles.keySet());
fileIdSet.addAll(logFiles.keySet());
@@ -228,8 +222,8 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
long beginLsTs = System.currentTimeMillis();
FileStatus[] statuses = metaClient.getFs().listStatus(partitionPath);
long endLsTs = System.currentTimeMillis();
log.info("#files found in partition (" + partitionPathStr + ") =" + statuses.length
+ ", Time taken =" + (endLsTs - beginLsTs));
log.info("#files found in partition (" + partitionPathStr + ") =" + statuses.length + ", Time taken ="
+ (endLsTs - beginLsTs));
List<HoodieFileGroup> groups = addFilesToView(statuses);
if (groups.isEmpty()) {
@@ -253,9 +247,8 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
* @param statuses List of File-Status
*/
private Stream<HoodieDataFile> convertFileStatusesToDataFiles(FileStatus[] statuses) {
Predicate<FileStatus> roFilePredicate = fileStatus ->
fileStatus.getPath().getName()
.contains(metaClient.getTableConfig().getROFileFormat().getFileExtension());
Predicate<FileStatus> roFilePredicate = fileStatus -> fileStatus.getPath().getName()
.contains(metaClient.getTableConfig().getROFileFormat().getFileExtension());
return Arrays.stream(statuses).filter(roFilePredicate).map(HoodieDataFile::new);
}
@@ -265,9 +258,8 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
* @param statuses List of FIle-Status
*/
private Stream<HoodieLogFile> convertFileStatusesToLogFiles(FileStatus[] statuses) {
Predicate<FileStatus> rtFilePredicate = fileStatus ->
fileStatus.getPath().getName()
.contains(metaClient.getTableConfig().getRTFileFormat().getFileExtension());
Predicate<FileStatus> rtFilePredicate = fileStatus -> fileStatus.getPath().getName()
.contains(metaClient.getTableConfig().getRTFileFormat().getFileExtension());
return Arrays.stream(statuses).filter(rtFilePredicate).map(HoodieLogFile::new);
}
@@ -311,8 +303,8 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
log.info("File Slice (" + fileSlice + ") is in pending compaction");
// Data file is filtered out of the file-slice as the corresponding compaction
// instant not completed yet.
FileSlice transformed = new FileSlice(fileSlice.getPartitionPath(),
fileSlice.getBaseInstantTime(), fileSlice.getFileId());
FileSlice transformed =
new FileSlice(fileSlice.getPartitionPath(), fileSlice.getBaseInstantTime(), fileSlice.getFileId());
fileSlice.getLogFiles().forEach(transformed::addLogFile);
return transformed;
}
@@ -359,14 +351,10 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
ensurePartitionLoadedCorrectly(partitionPath);
return fetchAllStoredFileGroups(partitionPath)
.map(fileGroup -> Option.fromJavaOptional(fileGroup.getAllDataFiles()
.filter(dataFile ->
HoodieTimeline.compareTimestamps(dataFile.getCommitTime(),
maxCommitTime,
HoodieTimeline.LESSER_OR_EQUAL))
.filter(df -> !isDataFileDueToPendingCompaction(df))
.findFirst()))
.filter(Option::isPresent)
.map(Option::get);
.filter(dataFile -> HoodieTimeline.compareTimestamps(dataFile.getCommitTime(), maxCommitTime,
HoodieTimeline.LESSER_OR_EQUAL))
.filter(df -> !isDataFileDueToPendingCompaction(df)).findFirst()))
.filter(Option::isPresent).map(Option::get);
} finally {
readLock.unlock();
}
@@ -378,13 +366,10 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
readLock.lock();
String partitionPath = formatPartitionKey(partitionStr);
ensurePartitionLoadedCorrectly(partitionPath);
return fetchHoodieFileGroup(partitionPath, fileId)
.map(fileGroup -> fileGroup.getAllDataFiles()
.filter(dataFile ->
HoodieTimeline.compareTimestamps(dataFile.getCommitTime(),
instantTime, HoodieTimeline.EQUAL))
.filter(df -> !isDataFileDueToPendingCompaction(df))
.findFirst().orElse(null));
return fetchHoodieFileGroup(partitionPath, fileId).map(fileGroup -> fileGroup.getAllDataFiles()
.filter(
dataFile -> HoodieTimeline.compareTimestamps(dataFile.getCommitTime(), instantTime, HoodieTimeline.EQUAL))
.filter(df -> !isDataFileDueToPendingCompaction(df)).findFirst().orElse(null));
} finally {
readLock.unlock();
}
@@ -409,10 +394,9 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
try {
readLock.lock();
return fetchAllStoredFileGroups().map(fileGroup -> {
return Option.fromJavaOptional(fileGroup.getAllDataFiles()
.filter(dataFile -> commitsToReturn.contains(dataFile.getCommitTime())
&& !isDataFileDueToPendingCompaction(dataFile))
.findFirst());
return Option.fromJavaOptional(
fileGroup.getAllDataFiles().filter(dataFile -> commitsToReturn.contains(dataFile.getCommitTime())
&& !isDataFileDueToPendingCompaction(dataFile)).findFirst());
}).filter(Option::isPresent).map(Option::get);
} finally {
readLock.unlock();
@@ -466,19 +450,17 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
readLock.lock();
String partitionPath = formatPartitionKey(partitionStr);
ensurePartitionLoadedCorrectly(partitionPath);
return fetchAllStoredFileGroups(partitionPath)
.map(fileGroup -> {
FileSlice fileSlice = fileGroup.getLatestFileSlice().get();
// if the file-group is under compaction, pick the latest before compaction instant time.
Option<Pair<String, CompactionOperation>> compactionWithInstantPair =
getPendingCompactionOperationWithInstant(fileSlice.getFileGroupId());
if (compactionWithInstantPair.isPresent()) {
String compactionInstantTime = compactionWithInstantPair.get().getLeft();
return fileGroup.getLatestFileSliceBefore(compactionInstantTime);
}
return Option.of(fileSlice);
})
.map(Option::get);
return fetchAllStoredFileGroups(partitionPath).map(fileGroup -> {
FileSlice fileSlice = fileGroup.getLatestFileSlice().get();
// if the file-group is under compaction, pick the latest before compaction instant time.
Option<Pair<String, CompactionOperation>> compactionWithInstantPair =
getPendingCompactionOperationWithInstant(fileSlice.getFileGroupId());
if (compactionWithInstantPair.isPresent()) {
String compactionInstantTime = compactionWithInstantPair.get().getLeft();
return fileGroup.getLatestFileSliceBefore(compactionInstantTime);
}
return Option.of(fileSlice);
}).map(Option::get);
} finally {
readLock.unlock();
}
@@ -491,8 +473,7 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
readLock.lock();
String partitionPath = formatPartitionKey(partitionStr);
ensurePartitionLoadedCorrectly(partitionPath);
Stream<FileSlice> fileSliceStream =
fetchLatestFileSlicesBeforeOrOn(partitionPath, maxCommitTime);
Stream<FileSlice> fileSliceStream = fetchLatestFileSlicesBeforeOrOn(partitionPath, maxCommitTime);
if (includeFileSlicesInPendingCompaction) {
return fileSliceStream.map(fs -> filterDataFileAfterPendingCompaction(fs));
} else {
@@ -509,17 +490,14 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
readLock.lock();
String partition = formatPartitionKey(partitionStr);
ensurePartitionLoadedCorrectly(partition);
return fetchAllStoredFileGroups(partition)
.map(fileGroup -> {
Option<FileSlice> fileSlice = fileGroup.getLatestFileSliceBeforeOrOn(maxInstantTime);
// if the file-group is under construction, pick the latest before compaction instant time.
if (fileSlice.isPresent()) {
fileSlice = Option.of(fetchMergedFileSlice(fileGroup, fileSlice.get()));
}
return fileSlice;
})
.filter(Option::isPresent)
.map(Option::get);
return fetchAllStoredFileGroups(partition).map(fileGroup -> {
Option<FileSlice> fileSlice = fileGroup.getLatestFileSliceBeforeOrOn(maxInstantTime);
// if the file-group is under construction, pick the latest before compaction instant time.
if (fileSlice.isPresent()) {
fileSlice = Option.of(fetchMergedFileSlice(fileGroup, fileSlice.get()));
}
return fileSlice;
}).filter(Option::isPresent).map(Option::get);
} finally {
readLock.unlock();
}
@@ -665,8 +643,7 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
* @return file-slice stream
*/
Stream<FileSlice> fetchAllFileSlices(String partitionPath) {
return fetchAllStoredFileGroups(partitionPath)
.map(HoodieFileGroup::getAllFileSlices)
return fetchAllStoredFileGroups(partitionPath).map(HoodieFileGroup::getAllFileSlices)
.flatMap(sliceList -> sliceList);
}
@@ -674,26 +651,21 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
* Default implementation for fetching latest data-files for the partition-path
*/
Stream<HoodieDataFile> fetchLatestDataFiles(final String partitionPath) {
return fetchAllStoredFileGroups(partitionPath)
.map(this::getLatestDataFile)
.filter(Option::isPresent)
return fetchAllStoredFileGroups(partitionPath).map(this::getLatestDataFile).filter(Option::isPresent)
.map(Option::get);
}
protected Option<HoodieDataFile> getLatestDataFile(HoodieFileGroup fileGroup) {
return Option.fromJavaOptional(
fileGroup.getAllDataFiles().filter(df -> !isDataFileDueToPendingCompaction(df)).findFirst());
return Option
.fromJavaOptional(fileGroup.getAllDataFiles().filter(df -> !isDataFileDueToPendingCompaction(df)).findFirst());
}
/**
* Default implementation for fetching latest data-files across all partitions
*/
Stream<HoodieDataFile> fetchLatestDataFiles() {
return fetchAllStoredFileGroups()
.map(this::getLatestDataFile)
.filter(Option::isPresent)
.map(Option::get);
return fetchAllStoredFileGroups().map(this::getLatestDataFile).filter(Option::isPresent).map(Option::get);
}
/**
@@ -702,8 +674,7 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
* @param partitionPath partition-path
*/
Stream<HoodieDataFile> fetchAllDataFiles(String partitionPath) {
return fetchAllStoredFileGroups(partitionPath)
.map(HoodieFileGroup::getAllDataFiles)
return fetchAllStoredFileGroups(partitionPath).map(HoodieFileGroup::getAllDataFiles)
.flatMap(dataFileList -> dataFileList);
}
@@ -719,9 +690,7 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
* Default implementation for fetching latest file-slices for a partition path
*/
Stream<FileSlice> fetchLatestFileSlices(String partitionPath) {
return fetchAllStoredFileGroups(partitionPath)
.map(HoodieFileGroup::getLatestFileSlice)
.filter(Option::isPresent)
return fetchAllStoredFileGroups(partitionPath).map(HoodieFileGroup::getLatestFileSlice).filter(Option::isPresent)
.map(Option::get);
}
@@ -731,11 +700,9 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
* @param partitionPath Partition Path
* @param maxCommitTime Instant Time
*/
Stream<FileSlice> fetchLatestFileSlicesBeforeOrOn(String partitionPath,
String maxCommitTime) {
Stream<FileSlice> fetchLatestFileSlicesBeforeOrOn(String partitionPath, String maxCommitTime) {
return fetchAllStoredFileGroups(partitionPath)
.map(fileGroup -> fileGroup.getLatestFileSliceBeforeOrOn(maxCommitTime))
.filter(Option::isPresent)
.map(fileGroup -> fileGroup.getLatestFileSliceBeforeOrOn(maxCommitTime)).filter(Option::isPresent)
.map(Option::get);
}
@@ -746,8 +713,8 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
* @param penultimateSlice Penultimate file slice for a file-group in commit timeline order
*/
private static FileSlice mergeCompactionPendingFileSlices(FileSlice lastSlice, FileSlice penultimateSlice) {
FileSlice merged = new FileSlice(penultimateSlice.getPartitionPath(),
penultimateSlice.getBaseInstantTime(), penultimateSlice.getFileId());
FileSlice merged = new FileSlice(penultimateSlice.getPartitionPath(), penultimateSlice.getBaseInstantTime(),
penultimateSlice.getFileId());
if (penultimateSlice.getDataFile().isPresent()) {
merged.setDataFile(penultimateSlice.getDataFile().get());
}
@@ -782,24 +749,26 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
/**
* Default implementation for fetching latest data-file
*
* @param partitionPath Partition path
* @param fileId File Id
* @return Data File if present
*/
protected Option<HoodieDataFile> fetchLatestDataFile(String partitionPath, String fileId) {
return Option.fromJavaOptional(fetchLatestDataFiles(partitionPath)
.filter(fs -> fs.getFileId().equals(fileId)).findFirst());
return Option
.fromJavaOptional(fetchLatestDataFiles(partitionPath).filter(fs -> fs.getFileId().equals(fileId)).findFirst());
}
/**
* Default implementation for fetching file-slice
*
* @param partitionPath Partition path
* @param fileId File Id
* @return File Slice if present
*/
protected Option<FileSlice> fetchLatestFileSlice(String partitionPath, String fileId) {
return Option.fromJavaOptional(fetchLatestFileSlices(partitionPath)
.filter(fs -> fs.getFileId().equals(fileId)).findFirst());
return Option
.fromJavaOptional(fetchLatestFileSlices(partitionPath).filter(fs -> fs.getFileId().equals(fileId)).findFirst());
}
@Override
@@ -841,6 +810,7 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
/**
* Return Only Commits and Compaction timeline for building file-groups
*
* @return
*/
public HoodieTimeline getVisibleCommitsAndCompactionTimeline() {

View File

@@ -28,25 +28,21 @@ import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
/**
* A container that can potentially hold one or more dataset's
* file-system views. There is one view for each dataset. This is a view built against a timeline containing completed
* actions. In an embedded timeline-server mode, this typically holds only one dataset's view.
* In a stand-alone server mode, this can hold more than one dataset's views.
* A container that can potentially hold one or more dataset's file-system views. There is one view for each dataset.
* This is a view built against a timeline containing completed actions. In an embedded timeline-server mode, this
* typically holds only one dataset's view. In a stand-alone server mode, this can hold more than one dataset's views.
*
* FileSystemView can be stored "locally" using the following storage mechanisms:
* a. In Memory
* b. Spillable Map
* c. RocksDB
* FileSystemView can be stored "locally" using the following storage mechanisms: a. In Memory b. Spillable Map c.
* RocksDB
*
* But there can be cases where the file-system view is managed remoted. For example : Embedded Timeline Server). In
* this case, the clients will configure a remote filesystem view client (RemoteHoodieTableFileSystemView) for the
* dataset which can connect to the remote file system view and fetch views. THere are 2 modes here : REMOTE_FIRST and
* REMOTE_ONLY
* REMOTE_FIRST : The file-system view implementation on client side will act as a remote proxy. In case, if there
* is problem (or exceptions) querying remote file-system view, a backup local file-system view(using
* either one of in-memory, spillable, rocksDB) is used to server file-system view queries
* REMOTE_ONLY : In this case, there is no backup local file-system view. If there is problem (or exceptions)
* querying remote file-system view, then the exceptions are percolated back to client.
* REMOTE_ONLY REMOTE_FIRST : The file-system view implementation on client side will act as a remote proxy. In case, if
* there is problem (or exceptions) querying remote file-system view, a backup local file-system view(using either one
* of in-memory, spillable, rocksDB) is used to server file-system view queries REMOTE_ONLY : In this case, there is no
* backup local file-system view. If there is problem (or exceptions) querying remote file-system view, then the
* exceptions are percolated back to client.
*
* FileSystemViewManager is designed to encapsulate the file-system view storage from clients using the file-system
* view. FileSystemViewManager uses a factory to construct specific implementation of file-system view and passes it to
@@ -73,6 +69,7 @@ public class FileSystemViewManager {
/**
* Drops reference to File-System Views. Future calls to view results in creating a new view
*
* @param basePath
*/
public void clearFileSystemView(String basePath) {
@@ -84,12 +81,12 @@ public class FileSystemViewManager {
/**
* Main API to get the file-system view for the base-path
*
* @param basePath
* @return
*/
public SyncableFileSystemView getFileSystemView(String basePath) {
return globalViewMap.computeIfAbsent(basePath,
(path) -> viewCreator.apply(path, viewStorageConfig));
return globalViewMap.computeIfAbsent(basePath, (path) -> viewCreator.apply(path, viewStorageConfig));
}
/**
@@ -104,9 +101,10 @@ public class FileSystemViewManager {
/**
* Create RocksDB based file System view for a dataset
*
* @param conf Hadoop Configuration
* @param viewConf View Storage Configuration
* @param basePath Base Path of dataset
* @param viewConf View Storage Configuration
* @param basePath Base Path of dataset
* @return
*/
private static RocksDbBasedFileSystemView createRocksDBBasedFileSystemView(SerializableConfiguration conf,
@@ -118,9 +116,10 @@ public class FileSystemViewManager {
/**
* Create a spillable Map based file System view for a dataset
*
* @param conf Hadoop Configuration
* @param viewConf View Storage Configuration
* @param basePath Base Path of dataset
* @param viewConf View Storage Configuration
* @param basePath Base Path of dataset
* @return
*/
private static SpillableMapBasedFileSystemView createSpillableMapBasedFileSystemView(SerializableConfiguration conf,
@@ -134,9 +133,10 @@ public class FileSystemViewManager {
/**
* Create an in-memory file System view for a dataset
*
* @param conf Hadoop Configuration
* @param viewConf View Storage Configuration
* @param basePath Base Path of dataset
* @param viewConf View Storage Configuration
* @param basePath Base Path of dataset
* @return
*/
private static HoodieTableFileSystemView createInMemoryFileSystemView(SerializableConfiguration conf,
@@ -149,27 +149,29 @@ public class FileSystemViewManager {
/**
* Create a remote file System view for a dataset
*
* @param conf Hadoop Configuration
* @param viewConf View Storage Configuration
* @param metaClient Hoodie Table MetaClient for the dataset.
* @param viewConf View Storage Configuration
* @param metaClient Hoodie Table MetaClient for the dataset.
* @return
*/
private static RemoteHoodieTableFileSystemView createRemoteFileSystemView(SerializableConfiguration conf,
FileSystemViewStorageConfig viewConf, HoodieTableMetaClient metaClient) {
logger.info("Creating remote view for basePath " + metaClient.getBasePath() + ". Server="
+ viewConf.getRemoteViewServerHost() + ":" + viewConf.getRemoteViewServerPort());
return new RemoteHoodieTableFileSystemView(viewConf.getRemoteViewServerHost(),
viewConf.getRemoteViewServerPort(), metaClient);
return new RemoteHoodieTableFileSystemView(viewConf.getRemoteViewServerHost(), viewConf.getRemoteViewServerPort(),
metaClient);
}
/**
* Main Factory method for building file-system views
* @param conf Hadoop Configuration
*
* @param conf Hadoop Configuration
* @param config View Storage Configuration
* @return
*/
public static FileSystemViewManager createViewManager(
final SerializableConfiguration conf, final FileSystemViewStorageConfig config) {
public static FileSystemViewManager createViewManager(final SerializableConfiguration conf,
final FileSystemViewStorageConfig config) {
logger.info("Creating View Manager with storage type :" + config.getStorageType());
switch (config.getStorageType()) {
case EMBEDDED_KV_STORE:
@@ -186,9 +188,8 @@ public class FileSystemViewManager {
(basePath, viewConfig) -> createInMemoryFileSystemView(conf, viewConfig, basePath));
case REMOTE_ONLY:
logger.info("Creating remote only table view");
return new FileSystemViewManager(conf, config,
(basePath, viewConfig) -> createRemoteFileSystemView(conf, viewConfig,
new HoodieTableMetaClient(conf.newCopy(), basePath)));
return new FileSystemViewManager(conf, config, (basePath, viewConfig) -> createRemoteFileSystemView(conf,
viewConfig, new HoodieTableMetaClient(conf.newCopy(), basePath)));
case REMOTE_FIRST:
logger.info("Creating remote first table view");
return new FileSystemViewManager(conf, config, (basePath, viewConfig) -> {

View File

@@ -30,7 +30,7 @@ import org.apache.hudi.config.DefaultHoodieConfig;
*/
public class FileSystemViewStorageConfig extends DefaultHoodieConfig {
//Property Names
// Property Names
public static final String FILESYSTEM_VIEW_STORAGE_TYPE = "hoodie.filesystem.view.type";
public static final String FILESYSTEM_VIEW_INCREMENTAL_SYNC_MODE = "hoodie.filesystem.view.incr.timeline.sync.enable";
public static final String FILESYSTEM_SECONDARY_VIEW_STORAGE_TYPE = "hoodie.filesystem.view.secondary.type";
@@ -85,8 +85,9 @@ public class FileSystemViewStorageConfig extends DefaultHoodieConfig {
public long getMaxMemoryForPendingCompaction() {
long totalMemory = Long.parseLong(props.getProperty(FILESYSTEM_VIEW_SPILLABLE_MEM));
long reservedForPendingComaction = new Double(totalMemory * Double.parseDouble(
props.getProperty(FILESYSTEM_VIEW_PENDING_COMPACTION_MEM_FRACTION))).longValue();
long reservedForPendingComaction =
new Double(totalMemory * Double.parseDouble(props.getProperty(FILESYSTEM_VIEW_PENDING_COMPACTION_MEM_FRACTION)))
.longValue();
return reservedForPendingComaction;
}
@@ -167,26 +168,26 @@ public class FileSystemViewStorageConfig extends DefaultHoodieConfig {
}
public FileSystemViewStorageConfig build() {
setDefaultOnCondition(props, !props.containsKey(FILESYSTEM_VIEW_STORAGE_TYPE),
FILESYSTEM_VIEW_STORAGE_TYPE, DEFAULT_VIEW_STORAGE_TYPE.name());
setDefaultOnCondition(props, !props.containsKey(FILESYSTEM_VIEW_STORAGE_TYPE), FILESYSTEM_VIEW_STORAGE_TYPE,
DEFAULT_VIEW_STORAGE_TYPE.name());
setDefaultOnCondition(props, !props.containsKey(FILESYSTEM_VIEW_INCREMENTAL_SYNC_MODE),
FILESYSTEM_VIEW_INCREMENTAL_SYNC_MODE, DEFAULT_FILESYSTEM_VIEW_INCREMENTAL_SYNC_MODE);
setDefaultOnCondition(props, !props.containsKey(FILESYSTEM_SECONDARY_VIEW_STORAGE_TYPE),
FILESYSTEM_SECONDARY_VIEW_STORAGE_TYPE, DEFAULT_SECONDARY_VIEW_STORAGE_TYPE.name());
setDefaultOnCondition(props, !props.containsKey(FILESYSTEM_VIEW_REMOTE_HOST),
FILESYSTEM_VIEW_REMOTE_HOST, DEFUALT_REMOTE_VIEW_SERVER_HOST);
setDefaultOnCondition(props, !props.containsKey(FILESYSTEM_VIEW_REMOTE_PORT),
FILESYSTEM_VIEW_REMOTE_PORT, DEFAULT_REMOTE_VIEW_SERVER_PORT.toString());
setDefaultOnCondition(props, !props.containsKey(FILESYSTEM_VIEW_REMOTE_HOST), FILESYSTEM_VIEW_REMOTE_HOST,
DEFUALT_REMOTE_VIEW_SERVER_HOST);
setDefaultOnCondition(props, !props.containsKey(FILESYSTEM_VIEW_REMOTE_PORT), FILESYSTEM_VIEW_REMOTE_PORT,
DEFAULT_REMOTE_VIEW_SERVER_PORT.toString());
setDefaultOnCondition(props, !props.containsKey(FILESYSTEM_VIEW_SPILLABLE_DIR),
FILESYSTEM_VIEW_SPILLABLE_DIR, DEFAULT_VIEW_SPILLABLE_DIR);
setDefaultOnCondition(props, !props.containsKey(FILESYSTEM_VIEW_SPILLABLE_MEM),
FILESYSTEM_VIEW_SPILLABLE_MEM, DEFAULT_MAX_MEMORY_FOR_VIEW.toString());
setDefaultOnCondition(props, !props.containsKey(FILESYSTEM_VIEW_SPILLABLE_DIR), FILESYSTEM_VIEW_SPILLABLE_DIR,
DEFAULT_VIEW_SPILLABLE_DIR);
setDefaultOnCondition(props, !props.containsKey(FILESYSTEM_VIEW_SPILLABLE_MEM), FILESYSTEM_VIEW_SPILLABLE_MEM,
DEFAULT_MAX_MEMORY_FOR_VIEW.toString());
setDefaultOnCondition(props, !props.containsKey(FILESYSTEM_VIEW_PENDING_COMPACTION_MEM_FRACTION),
FILESYSTEM_VIEW_PENDING_COMPACTION_MEM_FRACTION, DEFAULT_MEM_FRACTION_FOR_PENDING_COMPACTION.toString());
setDefaultOnCondition(props, !props.containsKey(ROCKSDB_BASE_PATH_PROP),
ROCKSDB_BASE_PATH_PROP, DEFAULT_ROCKSDB_BASE_PATH);
setDefaultOnCondition(props, !props.containsKey(ROCKSDB_BASE_PATH_PROP), ROCKSDB_BASE_PATH_PROP,
DEFAULT_ROCKSDB_BASE_PATH);
// Validations
FileSystemViewStorageType.valueOf(props.getProperty(FILESYSTEM_VIEW_STORAGE_TYPE));

View File

@@ -40,6 +40,7 @@ import org.apache.log4j.Logger;
/**
* TableFileSystemView Implementations based on in-memory storage.
*
* @see TableFileSystemView
* @since 0.3.0
*/
@@ -115,13 +116,11 @@ public class HoodieTableFileSystemView extends IncrementalTimelineSyncFileSystem
*
* @deprecated
*/
private void readObject(java.io.ObjectInputStream in)
throws IOException, ClassNotFoundException {
private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException {
in.defaultReadObject();
}
private void writeObject(java.io.ObjectOutputStream out)
throws IOException {
private void writeObject(java.io.ObjectOutputStream out) throws IOException {
out.defaultWriteObject();
}
@@ -133,10 +132,9 @@ public class HoodieTableFileSystemView extends IncrementalTimelineSyncFileSystem
@Override
protected void resetPendingCompactionOperations(Stream<Pair<String, CompactionOperation>> operations) {
// Build fileId to Pending Compaction Instants
this.fgIdToPendingCompaction = createFileIdToPendingCompactionMap(
operations.map(entry -> {
return Pair.of(entry.getValue().getFileGroupId(), Pair.of(entry.getKey(),entry.getValue()));
}).collect(Collectors.toMap(Pair::getKey, Pair::getValue)));
this.fgIdToPendingCompaction = createFileIdToPendingCompactionMap(operations.map(entry -> {
return Pair.of(entry.getValue().getFileGroupId(), Pair.of(entry.getKey(), entry.getValue()));
}).collect(Collectors.toMap(Pair::getKey, Pair::getValue)));
}
@Override
@@ -161,8 +159,8 @@ public class HoodieTableFileSystemView extends IncrementalTimelineSyncFileSystem
}
/**
* Given a partition path, obtain all filegroups within that. All methods, that work at the
* partition level go through this.
* Given a partition path, obtain all filegroups within that. All methods, that work at the partition level go through
* this.
*/
@Override
Stream<HoodieFileGroup> fetchAllStoredFileGroups(String partition) {

View File

@@ -157,18 +157,19 @@ public abstract class IncrementalTimelineSyncFileSystemView extends AbstractTabl
log.info("Syncing pending compaction instant (" + instant + ")");
HoodieCompactionPlan compactionPlan = CompactionUtils.getCompactionPlan(metaClient, instant.getTimestamp());
List<Pair<String, CompactionOperation>> pendingOps =
CompactionUtils.getPendingCompactionOperations(instant, compactionPlan).map(p -> Pair.of(p.getValue().getKey(),
CompactionOperation.convertFromAvroRecordInstance(p.getValue().getValue()))).collect(Collectors.toList());
CompactionUtils.getPendingCompactionOperations(instant, compactionPlan)
.map(p -> Pair.of(p.getValue().getKey(),
CompactionOperation.convertFromAvroRecordInstance(p.getValue().getValue())))
.collect(Collectors.toList());
// First, update Pending compaction instants
addPendingCompactionOperations(pendingOps.stream());
Map<String, List<Pair<String, HoodieFileGroup>>> partitionToFileGroups =
pendingOps.stream().map(opPair -> {
String compactionInstantTime = opPair.getKey();
HoodieFileGroup fileGroup = new HoodieFileGroup(opPair.getValue().getFileGroupId(), timeline);
fileGroup.addNewFileSliceAtInstant(compactionInstantTime);
return Pair.of(compactionInstantTime, fileGroup);
}).collect(Collectors.groupingBy(x -> x.getValue().getPartitionPath()));
Map<String, List<Pair<String, HoodieFileGroup>>> partitionToFileGroups = pendingOps.stream().map(opPair -> {
String compactionInstantTime = opPair.getKey();
HoodieFileGroup fileGroup = new HoodieFileGroup(opPair.getValue().getFileGroupId(), timeline);
fileGroup.addNewFileSliceAtInstant(compactionInstantTime);
return Pair.of(compactionInstantTime, fileGroup);
}).collect(Collectors.groupingBy(x -> x.getValue().getPartitionPath()));
partitionToFileGroups.entrySet().forEach(entry -> {
if (isPartitionAvailableInStore(entry.getKey())) {
applyDeltaFileSlicesToPartitionView(entry.getKey(),
@@ -185,8 +186,8 @@ public abstract class IncrementalTimelineSyncFileSystemView extends AbstractTabl
*/
private void addCommitInstant(HoodieTimeline timeline, HoodieInstant instant) throws IOException {
log.info("Syncing committed instant (" + instant + ")");
HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(timeline.getInstantDetails(instant).get(),
HoodieCommitMetadata.class);
HoodieCommitMetadata commitMetadata =
HoodieCommitMetadata.fromBytes(timeline.getInstantDetails(instant).get(), HoodieCommitMetadata.class);
commitMetadata.getPartitionToWriteStats().entrySet().stream().forEach(entry -> {
String partition = entry.getKey();
if (isPartitionAvailableInStore(partition)) {
@@ -196,8 +197,8 @@ public abstract class IncrementalTimelineSyncFileSystemView extends AbstractTabl
new Path(String.format("%s/%s", metaClient.getBasePath(), p.getPath())));
return status;
}).toArray(FileStatus[]::new);
List<HoodieFileGroup> fileGroups = buildFileGroups(statuses, timeline.filterCompletedAndCompactionInstants(),
false);
List<HoodieFileGroup> fileGroups =
buildFileGroups(statuses, timeline.filterCompletedAndCompactionInstants(), false);
applyDeltaFileSlicesToPartitionView(partition, fileGroups, DeltaApplyMode.ADD);
} else {
log.warn("Skipping partition (" + partition + ") when syncing instant (" + instant + ") as it is not loaded");
@@ -214,8 +215,8 @@ public abstract class IncrementalTimelineSyncFileSystemView extends AbstractTabl
*/
private void addRestoreInstant(HoodieTimeline timeline, HoodieInstant instant) throws IOException {
log.info("Syncing restore instant (" + instant + ")");
HoodieRestoreMetadata metadata = AvroUtils.deserializeAvroMetadata(
timeline.getInstantDetails(instant).get(), HoodieRestoreMetadata.class);
HoodieRestoreMetadata metadata =
AvroUtils.deserializeAvroMetadata(timeline.getInstantDetails(instant).get(), HoodieRestoreMetadata.class);
Map<String, List<Pair<String, String>>> partitionFiles =
metadata.getHoodieRestoreMetadata().entrySet().stream().flatMap(entry -> {
@@ -238,8 +239,8 @@ public abstract class IncrementalTimelineSyncFileSystemView extends AbstractTabl
*/
private void addRollbackInstant(HoodieTimeline timeline, HoodieInstant instant) throws IOException {
log.info("Syncing rollback instant (" + instant + ")");
HoodieRollbackMetadata metadata = AvroUtils.deserializeAvroMetadata(
timeline.getInstantDetails(instant).get(), HoodieRollbackMetadata.class);
HoodieRollbackMetadata metadata =
AvroUtils.deserializeAvroMetadata(timeline.getInstantDetails(instant).get(), HoodieRollbackMetadata.class);
metadata.getPartitionMetadata().entrySet().stream().forEach(e -> {
removeFileSlicesForPartition(timeline, instant, e.getKey(), e.getValue().getSuccessDeleteFiles());
@@ -255,16 +256,16 @@ public abstract class IncrementalTimelineSyncFileSystemView extends AbstractTabl
*/
private void addCleanInstant(HoodieTimeline timeline, HoodieInstant instant) throws IOException {
log.info("Syncing cleaner instant (" + instant + ")");
HoodieCleanMetadata cleanMetadata = AvroUtils
.deserializeHoodieCleanMetadata(timeline.getInstantDetails(instant).get());
HoodieCleanMetadata cleanMetadata =
AvroUtils.deserializeHoodieCleanMetadata(timeline.getInstantDetails(instant).get());
cleanMetadata.getPartitionMetadata().entrySet().stream().forEach(entry -> {
removeFileSlicesForPartition(timeline, instant, entry.getKey(), entry.getValue().getSuccessDeleteFiles());
});
log.info("Done Syncing cleaner instant (" + instant + ")");
}
private void removeFileSlicesForPartition(HoodieTimeline timeline, HoodieInstant instant,
String partition, List<String> paths) {
private void removeFileSlicesForPartition(HoodieTimeline timeline, HoodieInstant instant, String partition,
List<String> paths) {
if (isPartitionAvailableInStore(partition)) {
log.info("Removing file slices for partition (" + partition + ") for instant (" + instant + ")");
FileStatus[] statuses = paths.stream().map(p -> {
@@ -272,8 +273,8 @@ public abstract class IncrementalTimelineSyncFileSystemView extends AbstractTabl
status.setPath(new Path(p));
return status;
}).toArray(FileStatus[]::new);
List<HoodieFileGroup> fileGroups = buildFileGroups(statuses,
timeline.filterCompletedAndCompactionInstants(), false);
List<HoodieFileGroup> fileGroups =
buildFileGroups(statuses, timeline.filterCompletedAndCompactionInstants(), false);
applyDeltaFileSlicesToPartitionView(partition, fileGroups, DeltaApplyMode.REMOVE);
} else {
log.warn("Skipping partition (" + partition + ") when syncing instant (" + instant + ") as it is not loaded");
@@ -284,8 +285,7 @@ public abstract class IncrementalTimelineSyncFileSystemView extends AbstractTabl
* Apply mode whether to add or remove the delta view
*/
enum DeltaApplyMode {
ADD,
REMOVE
ADD, REMOVE
}
/**
@@ -306,27 +306,27 @@ public abstract class IncrementalTimelineSyncFileSystemView extends AbstractTabl
List<HoodieFileGroup> fileGroups = fetchAllStoredFileGroups(partition).collect(Collectors.toList());
/**
* Note that while finding the new data/log files added/removed, the path stored in metadata will be missing
* the base-path,scheme and authority. Ensure the matching process takes care of this discrepancy.
* Note that while finding the new data/log files added/removed, the path stored in metadata will be missing the
* base-path,scheme and authority. Ensure the matching process takes care of this discrepancy.
*/
Map<String, HoodieDataFile> viewDataFiles = fileGroups.stream().flatMap(HoodieFileGroup::getAllRawFileSlices)
.map(FileSlice::getDataFile).filter(Option::isPresent).map(Option::get)
.map(df -> Pair.of(Path.getPathWithoutSchemeAndAuthority(new Path(df.getPath())).toString(), df))
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
//Note: Delta Log Files and Data FIles can be empty when adding/removing pending compactions
// Note: Delta Log Files and Data FIles can be empty when adding/removing pending compactions
Map<String, HoodieDataFile> deltaDataFiles = deltaFileGroups.stream().flatMap(HoodieFileGroup::getAllRawFileSlices)
.map(FileSlice::getDataFile).filter(Option::isPresent).map(Option::get)
.map(df -> Pair.of(Path.getPathWithoutSchemeAndAuthority(new Path(df.getPath())).toString(), df))
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
Map<String, HoodieLogFile> viewLogFiles = fileGroups.stream().flatMap(HoodieFileGroup::getAllRawFileSlices)
.flatMap(FileSlice::getLogFiles)
.map(lf -> Pair.of(Path.getPathWithoutSchemeAndAuthority(lf.getPath()).toString(), lf))
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
Map<String, HoodieLogFile> deltaLogFiles = deltaFileGroups.stream().flatMap(HoodieFileGroup::getAllRawFileSlices)
.flatMap(FileSlice::getLogFiles)
.map(lf -> Pair.of(Path.getPathWithoutSchemeAndAuthority(lf.getPath()).toString(), lf))
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
Map<String, HoodieLogFile> viewLogFiles =
fileGroups.stream().flatMap(HoodieFileGroup::getAllRawFileSlices).flatMap(FileSlice::getLogFiles)
.map(lf -> Pair.of(Path.getPathWithoutSchemeAndAuthority(lf.getPath()).toString(), lf))
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
Map<String, HoodieLogFile> deltaLogFiles =
deltaFileGroups.stream().flatMap(HoodieFileGroup::getAllRawFileSlices).flatMap(FileSlice::getLogFiles)
.map(lf -> Pair.of(Path.getPathWithoutSchemeAndAuthority(lf.getPath()).toString(), lf))
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
switch (mode) {
case ADD:

View File

@@ -38,8 +38,8 @@ import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
/**
* A file system view which proxies request to a preferred File System View implementation. In case of error,
* flip all subsequent calls to a backup file-system view implementation.
* A file system view which proxies request to a preferred File System View implementation. In case of error, flip all
* subsequent calls to a backup file-system view implementation.
*/
public class PriorityBasedFileSystemView implements SyncableFileSystemView, Serializable {
@@ -140,8 +140,7 @@ public class PriorityBasedFileSystemView implements SyncableFileSystemView, Seri
@Override
public Option<HoodieDataFile> getDataFileOn(String partitionPath, String instantTime, String fileId) {
return execute(partitionPath, instantTime, fileId, preferredView::getDataFileOn,
secondaryView::getDataFileOn);
return execute(partitionPath, instantTime, fileId, preferredView::getDataFileOn, secondaryView::getDataFileOn);
}
@Override

View File

@@ -57,31 +57,26 @@ import org.apache.log4j.Logger;
public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView, Serializable {
private static final String BASE_URL = "/v1/hoodie/view";
public static final String LATEST_PARTITION_SLICES_URL = String.format("%s/%s", BASE_URL,
"slices/partition/latest/");
public static final String LATEST_PARTITION_SLICE_URL = String.format("%s/%s", BASE_URL,
"slices/file/latest/");
public static final String LATEST_PARTITION_UNCOMPACTED_SLICES_URL = String.format("%s/%s", BASE_URL,
"slices/uncompacted/partition/latest/");
public static final String LATEST_PARTITION_SLICES_URL = String.format("%s/%s", BASE_URL, "slices/partition/latest/");
public static final String LATEST_PARTITION_SLICE_URL = String.format("%s/%s", BASE_URL, "slices/file/latest/");
public static final String LATEST_PARTITION_UNCOMPACTED_SLICES_URL =
String.format("%s/%s", BASE_URL, "slices/uncompacted/partition/latest/");
public static final String ALL_SLICES_URL = String.format("%s/%s", BASE_URL, "slices/all");
public static final String LATEST_SLICES_MERGED_BEFORE_ON_INSTANT_URL =
String.format("%s/%s", BASE_URL, "slices/merged/beforeoron/latest/");
public static final String LATEST_SLICES_RANGE_INSTANT_URL =
String.format("%s/%s", BASE_URL, "slices/range/latest/");
public static final String LATEST_SLICES_RANGE_INSTANT_URL = String.format("%s/%s", BASE_URL, "slices/range/latest/");
public static final String LATEST_SLICES_BEFORE_ON_INSTANT_URL =
String.format("%s/%s", BASE_URL, "slices/beforeoron/latest/");
public static final String PENDING_COMPACTION_OPS =
String.format("%s/%s", BASE_URL, "compactions/pending/");
public static final String PENDING_COMPACTION_OPS = String.format("%s/%s", BASE_URL, "compactions/pending/");
public static final String LATEST_PARTITION_DATA_FILES_URL = String.format("%s/%s", BASE_URL,
"datafiles/latest/partition");
public static final String LATEST_PARTITION_DATA_FILE_URL = String.format("%s/%s", BASE_URL,
"datafile/latest/partition");
public static final String LATEST_PARTITION_DATA_FILES_URL =
String.format("%s/%s", BASE_URL, "datafiles/latest/partition");
public static final String LATEST_PARTITION_DATA_FILE_URL =
String.format("%s/%s", BASE_URL, "datafile/latest/partition");
public static final String ALL_DATA_FILES = String.format("%s/%s", BASE_URL, "datafiles/all");
public static final String LATEST_ALL_DATA_FILES = String.format("%s/%s", BASE_URL, "datafiles/all/latest/");
public static final String LATEST_DATA_FILE_ON_INSTANT_URL =
String.format("%s/%s", BASE_URL, "datafile/on/latest/");
public static final String LATEST_DATA_FILE_ON_INSTANT_URL = String.format("%s/%s", BASE_URL, "datafile/on/latest/");
public static final String LATEST_DATA_FILES_RANGE_INSTANT_URL =
String.format("%s/%s", BASE_URL, "datafiles/range/latest/");
@@ -123,8 +118,7 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
private boolean closed = false;
private enum RequestMethod {
GET,
POST
GET, POST
}
public RemoteHoodieTableFileSystemView(String server, int port, HoodieTableMetaClient metaClient) {
@@ -140,8 +134,8 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
RequestMethod method) throws IOException {
Preconditions.checkArgument(!closed, "View already closed");
URIBuilder builder = new URIBuilder().setHost(serverHost).setPort(serverPort).setPath(requestPath)
.setScheme("http");
URIBuilder builder =
new URIBuilder().setHost(serverHost).setPort(serverPort).setPath(requestPath).setScheme("http");
queryParameters.entrySet().stream().forEach(entry -> {
builder.addParameter(entry.getKey(), entry.getValue());
@@ -213,8 +207,7 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
Map<String, String> paramsMap = getParamsWithPartitionPath(partitionPath);
try {
List<DataFileDTO> dataFiles = executeRequest(LATEST_PARTITION_DATA_FILES_URL, paramsMap,
new TypeReference<List<DataFileDTO>>() {
}, RequestMethod.GET);
new TypeReference<List<DataFileDTO>>() {}, RequestMethod.GET);
return dataFiles.stream().map(DataFileDTO::toHoodieDataFile);
} catch (IOException e) {
throw new HoodieRemoteException(e);
@@ -226,8 +219,7 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
Map<String, String> paramsMap = getParams();
try {
List<DataFileDTO> dataFiles = executeRequest(LATEST_ALL_DATA_FILES, paramsMap,
new TypeReference<List<DataFileDTO>>() {
}, RequestMethod.GET);
new TypeReference<List<DataFileDTO>>() {}, RequestMethod.GET);
return dataFiles.stream().map(DataFileDTO::toHoodieDataFile);
} catch (IOException e) {
throw new HoodieRemoteException(e);
@@ -239,8 +231,7 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
Map<String, String> paramsMap = getParamsWithAdditionalParam(partitionPath, MAX_INSTANT_PARAM, maxCommitTime);
try {
List<DataFileDTO> dataFiles = executeRequest(LATEST_DATA_FILES_BEFORE_ON_INSTANT_URL, paramsMap,
new TypeReference<List<DataFileDTO>>() {
}, RequestMethod.GET);
new TypeReference<List<DataFileDTO>>() {}, RequestMethod.GET);
return dataFiles.stream().map(DataFileDTO::toHoodieDataFile);
} catch (IOException e) {
throw new HoodieRemoteException(e);
@@ -250,12 +241,10 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
@Override
public Option<HoodieDataFile> getDataFileOn(String partitionPath, String instantTime, String fileId) {
Map<String, String> paramsMap = getParamsWithAdditionalParams(partitionPath,
new String[]{INSTANT_PARAM, FILEID_PARAM},
new String[]{instantTime, fileId});
new String[] {INSTANT_PARAM, FILEID_PARAM}, new String[] {instantTime, fileId});
try {
List<DataFileDTO> dataFiles = executeRequest(LATEST_DATA_FILE_ON_INSTANT_URL, paramsMap,
new TypeReference<List<DataFileDTO>>() {
}, RequestMethod.GET);
new TypeReference<List<DataFileDTO>>() {}, RequestMethod.GET);
return Option.fromJavaOptional(dataFiles.stream().map(DataFileDTO::toHoodieDataFile).findFirst());
} catch (IOException e) {
throw new HoodieRemoteException(e);
@@ -264,12 +253,11 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
@Override
public Stream<HoodieDataFile> getLatestDataFilesInRange(List<String> commitsToReturn) {
Map<String, String> paramsMap = getParams(INSTANTS_PARAM,
StringUtils.join(commitsToReturn.toArray(new String[0]), ","));
Map<String, String> paramsMap =
getParams(INSTANTS_PARAM, StringUtils.join(commitsToReturn.toArray(new String[0]), ","));
try {
List<DataFileDTO> dataFiles = executeRequest(LATEST_DATA_FILES_RANGE_INSTANT_URL, paramsMap,
new TypeReference<List<DataFileDTO>>() {
}, RequestMethod.GET);
new TypeReference<List<DataFileDTO>>() {}, RequestMethod.GET);
return dataFiles.stream().map(DataFileDTO::toHoodieDataFile);
} catch (IOException e) {
throw new HoodieRemoteException(e);
@@ -280,9 +268,8 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
public Stream<HoodieDataFile> getAllDataFiles(String partitionPath) {
Map<String, String> paramsMap = getParamsWithPartitionPath(partitionPath);
try {
List<DataFileDTO> dataFiles = executeRequest(ALL_DATA_FILES, paramsMap,
new TypeReference<List<DataFileDTO>>() {
}, RequestMethod.GET);
List<DataFileDTO> dataFiles =
executeRequest(ALL_DATA_FILES, paramsMap, new TypeReference<List<DataFileDTO>>() {}, RequestMethod.GET);
return dataFiles.stream().map(DataFileDTO::toHoodieDataFile);
} catch (IOException e) {
throw new HoodieRemoteException(e);
@@ -294,8 +281,7 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
Map<String, String> paramsMap = getParamsWithPartitionPath(partitionPath);
try {
List<FileSliceDTO> dataFiles = executeRequest(LATEST_PARTITION_SLICES_URL, paramsMap,
new TypeReference<List<FileSliceDTO>>() {
}, RequestMethod.GET);
new TypeReference<List<FileSliceDTO>>() {}, RequestMethod.GET);
return dataFiles.stream().map(FileSliceDTO::toFileSlice);
} catch (IOException e) {
throw new HoodieRemoteException(e);
@@ -307,8 +293,7 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
Map<String, String> paramsMap = getParamsWithAdditionalParam(partitionPath, FILEID_PARAM, fileId);
try {
List<FileSliceDTO> dataFiles = executeRequest(LATEST_PARTITION_SLICE_URL, paramsMap,
new TypeReference<List<FileSliceDTO>>() {
}, RequestMethod.GET);
new TypeReference<List<FileSliceDTO>>() {}, RequestMethod.GET);
return Option.fromJavaOptional(dataFiles.stream().map(FileSliceDTO::toFileSlice).findFirst());
} catch (IOException e) {
throw new HoodieRemoteException(e);
@@ -320,8 +305,7 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
Map<String, String> paramsMap = getParamsWithPartitionPath(partitionPath);
try {
List<FileSliceDTO> dataFiles = executeRequest(LATEST_PARTITION_UNCOMPACTED_SLICES_URL, paramsMap,
new TypeReference<List<FileSliceDTO>>() {
}, RequestMethod.GET);
new TypeReference<List<FileSliceDTO>>() {}, RequestMethod.GET);
return dataFiles.stream().map(FileSliceDTO::toFileSlice);
} catch (IOException e) {
throw new HoodieRemoteException(e);
@@ -332,12 +316,11 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
public Stream<FileSlice> getLatestFileSlicesBeforeOrOn(String partitionPath, String maxCommitTime,
boolean includeFileSlicesInPendingCompaction) {
Map<String, String> paramsMap = getParamsWithAdditionalParams(partitionPath,
new String[]{MAX_INSTANT_PARAM, INCLUDE_FILES_IN_PENDING_COMPACTION_PARAM},
new String[]{maxCommitTime, String.valueOf(includeFileSlicesInPendingCompaction)});
new String[] {MAX_INSTANT_PARAM, INCLUDE_FILES_IN_PENDING_COMPACTION_PARAM},
new String[] {maxCommitTime, String.valueOf(includeFileSlicesInPendingCompaction)});
try {
List<FileSliceDTO> dataFiles = executeRequest(LATEST_SLICES_BEFORE_ON_INSTANT_URL, paramsMap,
new TypeReference<List<FileSliceDTO>>() {
}, RequestMethod.GET);
new TypeReference<List<FileSliceDTO>>() {}, RequestMethod.GET);
return dataFiles.stream().map(FileSliceDTO::toFileSlice);
} catch (IOException e) {
throw new HoodieRemoteException(e);
@@ -349,8 +332,7 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
Map<String, String> paramsMap = getParamsWithAdditionalParam(partitionPath, MAX_INSTANT_PARAM, maxInstantTime);
try {
List<FileSliceDTO> dataFiles = executeRequest(LATEST_SLICES_MERGED_BEFORE_ON_INSTANT_URL, paramsMap,
new TypeReference<List<FileSliceDTO>>() {
}, RequestMethod.GET);
new TypeReference<List<FileSliceDTO>>() {}, RequestMethod.GET);
return dataFiles.stream().map(FileSliceDTO::toFileSlice);
} catch (IOException e) {
throw new HoodieRemoteException(e);
@@ -359,12 +341,11 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
@Override
public Stream<FileSlice> getLatestFileSliceInRange(List<String> commitsToReturn) {
Map<String, String> paramsMap = getParams(INSTANTS_PARAM,
StringUtils.join(commitsToReturn.toArray(new String[0]), ","));
Map<String, String> paramsMap =
getParams(INSTANTS_PARAM, StringUtils.join(commitsToReturn.toArray(new String[0]), ","));
try {
List<FileSliceDTO> dataFiles = executeRequest(LATEST_SLICES_RANGE_INSTANT_URL, paramsMap,
new TypeReference<List<FileSliceDTO>>() {
}, RequestMethod.GET);
new TypeReference<List<FileSliceDTO>>() {}, RequestMethod.GET);
return dataFiles.stream().map(FileSliceDTO::toFileSlice);
} catch (IOException e) {
throw new HoodieRemoteException(e);
@@ -375,9 +356,8 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
public Stream<FileSlice> getAllFileSlices(String partitionPath) {
Map<String, String> paramsMap = getParamsWithPartitionPath(partitionPath);
try {
List<FileSliceDTO> dataFiles = executeRequest(ALL_SLICES_URL, paramsMap,
new TypeReference<List<FileSliceDTO>>() {
}, RequestMethod.GET);
List<FileSliceDTO> dataFiles =
executeRequest(ALL_SLICES_URL, paramsMap, new TypeReference<List<FileSliceDTO>>() {}, RequestMethod.GET);
return dataFiles.stream().map(FileSliceDTO::toFileSlice);
} catch (IOException e) {
throw new HoodieRemoteException(e);
@@ -389,8 +369,7 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
Map<String, String> paramsMap = getParamsWithPartitionPath(partitionPath);
try {
List<FileGroupDTO> fileGroups = executeRequest(ALL_FILEGROUPS_FOR_PARTITION_URL, paramsMap,
new TypeReference<List<FileGroupDTO>>() {
}, RequestMethod.GET);
new TypeReference<List<FileGroupDTO>>() {}, RequestMethod.GET);
return fileGroups.stream().map(dto -> FileGroupDTO.toFileGroup(dto, metaClient));
} catch (IOException e) {
throw new HoodieRemoteException(e);
@@ -400,8 +379,7 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
public boolean refresh() {
Map<String, String> paramsMap = getParams();
try {
return executeRequest(REFRESH_DATASET, paramsMap, new TypeReference<Boolean>() {
}, RequestMethod.POST);
return executeRequest(REFRESH_DATASET, paramsMap, new TypeReference<Boolean>() {}, RequestMethod.POST);
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
@@ -412,8 +390,7 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
Map<String, String> paramsMap = getParams();
try {
List<CompactionOpDTO> dtos = executeRequest(PENDING_COMPACTION_OPS, paramsMap,
new TypeReference<List<CompactionOpDTO>>() {
}, RequestMethod.GET);
new TypeReference<List<CompactionOpDTO>>() {}, RequestMethod.GET);
return dtos.stream().map(CompactionOpDTO::toCompactionOperation);
} catch (IOException e) {
throw new HoodieRemoteException(e);
@@ -434,9 +411,8 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
public Option<HoodieInstant> getLastInstant() {
Map<String, String> paramsMap = getParams();
try {
List<InstantDTO> instants = executeRequest(LAST_INSTANT, paramsMap,
new TypeReference<List<InstantDTO>>() {
}, RequestMethod.GET);
List<InstantDTO> instants =
executeRequest(LAST_INSTANT, paramsMap, new TypeReference<List<InstantDTO>>() {}, RequestMethod.GET);
return Option.fromJavaOptional(instants.stream().map(InstantDTO::toInstant).findFirst());
} catch (IOException e) {
throw new HoodieRemoteException(e);
@@ -447,9 +423,8 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
public HoodieTimeline getTimeline() {
Map<String, String> paramsMap = getParams();
try {
TimelineDTO timeline = executeRequest(TIMELINE, paramsMap,
new TypeReference<TimelineDTO>() {
}, RequestMethod.GET);
TimelineDTO timeline =
executeRequest(TIMELINE, paramsMap, new TypeReference<TimelineDTO>() {}, RequestMethod.GET);
return TimelineDTO.toTimeline(timeline, metaClient);
} catch (IOException e) {
throw new HoodieRemoteException(e);
@@ -458,7 +433,7 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
@Override
public void sync() {
//noop
// noop
}
@Override
@@ -466,8 +441,7 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
Map<String, String> paramsMap = getParamsWithAdditionalParam(partitionPath, FILEID_PARAM, fileId);
try {
List<DataFileDTO> dataFiles = executeRequest(LATEST_PARTITION_DATA_FILE_URL, paramsMap,
new TypeReference<List<DataFileDTO>>() {
}, RequestMethod.GET);
new TypeReference<List<DataFileDTO>>() {}, RequestMethod.GET);
return Option.fromJavaOptional(dataFiles.stream().map(DataFileDTO::toHoodieDataFile).findFirst());
} catch (IOException e) {
throw new HoodieRemoteException(e);

View File

@@ -44,18 +44,15 @@ import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
/**
* A file-system view implementation on top of embedded Rocks DB store.
* For each DataSet : 3 column Family is added for storing
* (1) File-Slices and Data Files for View lookups
* (2) Pending compaction operations
* (3) Partitions tracked
* A file-system view implementation on top of embedded Rocks DB store. For each DataSet : 3 column Family is added for
* storing (1) File-Slices and Data Files for View lookups (2) Pending compaction operations (3) Partitions tracked
*
* Fine-grained retrieval API to fetch latest file-slice and data-file which are common operations
* for ingestion/compaction are supported.
* Fine-grained retrieval API to fetch latest file-slice and data-file which are common operations for
* ingestion/compaction are supported.
*
* TODO: vb The current implementation works in embedded server mode where each restarts blows away the view stores.
* To support view-state preservation across restarts, Hoodie timeline also needs to be stored
* inorder to detect changes to timeline across restarts.
* TODO: vb The current implementation works in embedded server mode where each restarts blows away the view stores. To
* support view-state preservation across restarts, Hoodie timeline also needs to be stored inorder to detect changes to
* timeline across restarts.
*/
public class RocksDbBasedFileSystemView extends IncrementalTimelineSyncFileSystemView {
@@ -69,8 +66,8 @@ public class RocksDbBasedFileSystemView extends IncrementalTimelineSyncFileSyste
private boolean closed = false;
public RocksDbBasedFileSystemView(HoodieTableMetaClient metaClient,
HoodieTimeline visibleActiveTimeline, FileSystemViewStorageConfig config) {
public RocksDbBasedFileSystemView(HoodieTableMetaClient metaClient, HoodieTimeline visibleActiveTimeline,
FileSystemViewStorageConfig config) {
super(config.isIncrementalTimelineSyncEnabled());
this.config = config;
this.schemaHelper = new RocksDBSchemaHelper(metaClient);
@@ -78,8 +75,8 @@ public class RocksDbBasedFileSystemView extends IncrementalTimelineSyncFileSyste
init(metaClient, visibleActiveTimeline);
}
public RocksDbBasedFileSystemView(HoodieTableMetaClient metaClient,
HoodieTimeline visibleActiveTimeline, FileStatus[] fileStatuses, FileSystemViewStorageConfig config) {
public RocksDbBasedFileSystemView(HoodieTableMetaClient metaClient, HoodieTimeline visibleActiveTimeline,
FileStatus[] fileStatuses, FileSystemViewStorageConfig config) {
this(metaClient, visibleActiveTimeline, config);
addFilesToView(fileStatuses);
}
@@ -212,9 +209,9 @@ public class RocksDbBasedFileSystemView extends IncrementalTimelineSyncFileSyste
Map<String, HoodieLogFile> logFiles = oldSlice.getLogFiles()
.map(lf -> Pair.of(Path.getPathWithoutSchemeAndAuthority(lf.getPath()).toString(), lf))
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
Map<String, HoodieLogFile> deltaLogFiles = fs.getLogFiles()
.map(lf -> Pair.of(Path.getPathWithoutSchemeAndAuthority(lf.getPath()).toString(), lf))
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
Map<String, HoodieLogFile> deltaLogFiles =
fs.getLogFiles().map(lf -> Pair.of(Path.getPathWithoutSchemeAndAuthority(lf.getPath()).toString(), lf))
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
switch (mode) {
case ADD: {
@@ -237,7 +234,7 @@ public class RocksDbBasedFileSystemView extends IncrementalTimelineSyncFileSyste
});
deltaLogFiles.keySet().stream().forEach(p -> logFiles.remove(p));
//Add remaining log files back
// Add remaining log files back
logFiles.values().stream().forEach(lf -> newFileSlice.addLogFile(lf));
if (newFileSlice.getDataFile().isPresent() || (newFileSlice.getLogFiles().count() > 0)) {
log.info("Adding back new file-slice after remove FS=" + newFileSlice);
@@ -262,15 +259,14 @@ public class RocksDbBasedFileSystemView extends IncrementalTimelineSyncFileSyste
@Override
Stream<Pair<String, CompactionOperation>> fetchPendingCompactionOperations() {
return rocksDB.<Pair<String, CompactionOperation>>prefixSearch(
schemaHelper.getColFamilyForPendingCompaction(), "").map(Pair::getValue);
return rocksDB.<Pair<String, CompactionOperation>>prefixSearch(schemaHelper.getColFamilyForPendingCompaction(), "")
.map(Pair::getValue);
}
@Override
Stream<HoodieDataFile> fetchAllDataFiles(String partitionPath) {
return rocksDB.<HoodieDataFile>prefixSearch(schemaHelper.getColFamilyForView(),
schemaHelper.getPrefixForDataFileViewByPartition(partitionPath))
.map(Pair::getValue);
schemaHelper.getPrefixForDataFileViewByPartition(partitionPath)).map(Pair::getValue);
}
@Override
@@ -281,46 +277,50 @@ public class RocksDbBasedFileSystemView extends IncrementalTimelineSyncFileSyste
@Override
Stream<HoodieFileGroup> fetchAllStoredFileGroups() {
return getFileGroups(rocksDB.<FileSlice>prefixSearch(schemaHelper.getColFamilyForView(),
schemaHelper.getPrefixForSliceView()).map(Pair::getValue));
return getFileGroups(
rocksDB.<FileSlice>prefixSearch(schemaHelper.getColFamilyForView(), schemaHelper.getPrefixForSliceView())
.map(Pair::getValue));
}
@Override
protected Option<FileSlice> fetchLatestFileSlice(String partitionPath, String fileId) {
// Retries only file-slices of the file and filters for the latest
return Option.ofNullable(rocksDB.<FileSlice>prefixSearch(schemaHelper.getColFamilyForView(),
schemaHelper.getPrefixForSliceViewByPartitionFile(partitionPath, fileId))
.map(Pair::getValue)
.reduce(null, (x, y) ->
((x == null) ? y : (y == null) ? null : HoodieTimeline.compareTimestamps(x.getBaseInstantTime(),
y.getBaseInstantTime(), HoodieTimeline.GREATER) ? x : y)));
return Option.ofNullable(rocksDB
.<FileSlice>prefixSearch(schemaHelper.getColFamilyForView(),
schemaHelper.getPrefixForSliceViewByPartitionFile(partitionPath, fileId))
.map(Pair::getValue).reduce(null,
(x, y) -> ((x == null) ? y
: (y == null) ? null
: HoodieTimeline.compareTimestamps(x.getBaseInstantTime(), y.getBaseInstantTime(),
HoodieTimeline.GREATER) ? x : y)));
}
@Override
protected Option<HoodieDataFile> fetchLatestDataFile(String partitionPath, String fileId) {
// Retries only file-slices of the file and filters for the latest
return Option.ofNullable(rocksDB.<HoodieDataFile>prefixSearch(schemaHelper.getColFamilyForView(),
schemaHelper.getPrefixForDataFileViewByPartitionFile(partitionPath, fileId))
.map(Pair::getValue)
.reduce(null, (x, y) ->
((x == null) ? y : (y == null) ? null : HoodieTimeline.compareTimestamps(x.getCommitTime(),
y.getCommitTime(), HoodieTimeline.GREATER) ? x : y)));
return Option
.ofNullable(rocksDB
.<HoodieDataFile>prefixSearch(schemaHelper.getColFamilyForView(),
schemaHelper.getPrefixForDataFileViewByPartitionFile(partitionPath, fileId))
.map(Pair::getValue).reduce(null,
(x, y) -> ((x == null) ? y
: (y == null) ? null
: HoodieTimeline.compareTimestamps(x.getCommitTime(), y.getCommitTime(), HoodieTimeline.GREATER)
? x
: y)));
}
@Override
Option<HoodieFileGroup> fetchHoodieFileGroup(String partitionPath, String fileId) {
return Option.fromJavaOptional(
getFileGroups(rocksDB.<FileSlice>prefixSearch(schemaHelper.getColFamilyForView(),
schemaHelper.getPrefixForSliceViewByPartitionFile(partitionPath, fileId))
.map(Pair::getValue)).findFirst());
Option<HoodieFileGroup> fetchHoodieFileGroup(String partitionPath, String fileId) {
return Option.fromJavaOptional(getFileGroups(rocksDB.<FileSlice>prefixSearch(schemaHelper.getColFamilyForView(),
schemaHelper.getPrefixForSliceViewByPartitionFile(partitionPath, fileId)).map(Pair::getValue)).findFirst());
}
private Stream<HoodieFileGroup> getFileGroups(Stream<FileSlice> sliceStream) {
return sliceStream.map(s -> Pair.of(Pair.of(s.getPartitionPath(), s.getFileId()), s))
.collect(Collectors.groupingBy(Pair::getKey)).entrySet().stream().map(slicePair -> {
HoodieFileGroup fg =
new HoodieFileGroup(slicePair.getKey().getKey(), slicePair.getKey().getValue(),
getVisibleCommitsAndCompactionTimeline());
HoodieFileGroup fg = new HoodieFileGroup(slicePair.getKey().getKey(), slicePair.getKey().getValue(),
getVisibleCommitsAndCompactionTimeline());
slicePair.getValue().forEach(e -> fg.addFileSlice(e.getValue()));
return fg;
});

View File

@@ -46,8 +46,8 @@ public class SpillableMapBasedFileSystemView extends HoodieTableFileSystemView {
private final long maxMemoryForPendingCompaction;
private final String baseStoreDir;
public SpillableMapBasedFileSystemView(HoodieTableMetaClient metaClient,
HoodieTimeline visibleActiveTimeline, FileSystemViewStorageConfig config) {
public SpillableMapBasedFileSystemView(HoodieTableMetaClient metaClient, HoodieTimeline visibleActiveTimeline,
FileSystemViewStorageConfig config) {
super(config.isIncrementalTimelineSyncEnabled());
this.maxMemoryForFileGroupMap = config.getMaxMemoryForFileGroupMap();
this.maxMemoryForPendingCompaction = config.getMaxMemoryForPendingCompaction();
@@ -55,8 +55,8 @@ public class SpillableMapBasedFileSystemView extends HoodieTableFileSystemView {
init(metaClient, visibleActiveTimeline);
}
public SpillableMapBasedFileSystemView(HoodieTableMetaClient metaClient,
HoodieTimeline visibleActiveTimeline, FileStatus[] fileStatuses, FileSystemViewStorageConfig config) {
public SpillableMapBasedFileSystemView(HoodieTableMetaClient metaClient, HoodieTimeline visibleActiveTimeline,
FileStatus[] fileStatuses, FileSystemViewStorageConfig config) {
this(metaClient, visibleActiveTimeline, config);
addFilesToView(fileStatuses);
}
@@ -64,12 +64,11 @@ public class SpillableMapBasedFileSystemView extends HoodieTableFileSystemView {
@Override
protected Map<String, List<HoodieFileGroup>> createPartitionToFileGroups() {
try {
log.info("Creating Partition To File groups map using external spillable Map. Max Mem="
+ maxMemoryForFileGroupMap + ", BaseDir=" + baseStoreDir);
log.info("Creating Partition To File groups map using external spillable Map. Max Mem=" + maxMemoryForFileGroupMap
+ ", BaseDir=" + baseStoreDir);
new File(baseStoreDir).mkdirs();
return (Map<String, List<HoodieFileGroup>>)
(new ExternalSpillableMap<>(maxMemoryForFileGroupMap, baseStoreDir, new DefaultSizeEstimator(),
new DefaultSizeEstimator<>()));
return (Map<String, List<HoodieFileGroup>>) (new ExternalSpillableMap<>(maxMemoryForFileGroupMap, baseStoreDir,
new DefaultSizeEstimator(), new DefaultSizeEstimator<>()));
} catch (IOException e) {
throw new RuntimeException(e);
}
@@ -78,12 +77,11 @@ public class SpillableMapBasedFileSystemView extends HoodieTableFileSystemView {
protected Map<HoodieFileGroupId, Pair<String, CompactionOperation>> createFileIdToPendingCompactionMap(
Map<HoodieFileGroupId, Pair<String, CompactionOperation>> fgIdToPendingCompaction) {
try {
log.info("Creating Pending Compaction map using external spillable Map. Max Mem="
+ maxMemoryForPendingCompaction + ", BaseDir=" + baseStoreDir);
log.info("Creating Pending Compaction map using external spillable Map. Max Mem=" + maxMemoryForPendingCompaction
+ ", BaseDir=" + baseStoreDir);
new File(baseStoreDir).mkdirs();
Map<HoodieFileGroupId, Pair<String, CompactionOperation>> pendingMap =
new ExternalSpillableMap<>(maxMemoryForPendingCompaction, baseStoreDir, new DefaultSizeEstimator(),
new DefaultSizeEstimator<>());
Map<HoodieFileGroupId, Pair<String, CompactionOperation>> pendingMap = new ExternalSpillableMap<>(
maxMemoryForPendingCompaction, baseStoreDir, new DefaultSizeEstimator(), new DefaultSizeEstimator<>());
pendingMap.putAll(fgIdToPendingCompaction);
return pendingMap;
} catch (IOException e) {
@@ -92,20 +90,20 @@ public class SpillableMapBasedFileSystemView extends HoodieTableFileSystemView {
}
public Stream<HoodieFileGroup> getAllFileGroups() {
return ((ExternalSpillableMap)partitionToFileGroupsMap).valueStream()
.flatMap(fg -> ((List<HoodieFileGroup>)fg).stream());
return ((ExternalSpillableMap) partitionToFileGroupsMap).valueStream()
.flatMap(fg -> ((List<HoodieFileGroup>) fg).stream());
}
@Override
Stream<Pair<String, CompactionOperation>> fetchPendingCompactionOperations() {
return ((ExternalSpillableMap)fgIdToPendingCompaction).valueStream();
return ((ExternalSpillableMap) fgIdToPendingCompaction).valueStream();
}
@Override
public Stream<HoodieFileGroup> fetchAllStoredFileGroups() {
return ((ExternalSpillableMap)partitionToFileGroupsMap).valueStream().flatMap(fg -> {
return ((List<HoodieFileGroup>)fg).stream();
return ((ExternalSpillableMap) partitionToFileGroupsMap).valueStream().flatMap(fg -> {
return ((List<HoodieFileGroup>) fg).stream();
});
}
}

View File

@@ -47,17 +47,15 @@ import org.apache.hudi.common.HoodieRollbackStat;
public class AvroUtils {
public static HoodieCleanMetadata convertCleanMetadata(String startCleanTime,
Option<Long> durationInMs, List<HoodieCleanStat> cleanStats) {
ImmutableMap.Builder<String, HoodieCleanPartitionMetadata> partitionMetadataBuilder =
ImmutableMap.builder();
public static HoodieCleanMetadata convertCleanMetadata(String startCleanTime, Option<Long> durationInMs,
List<HoodieCleanStat> cleanStats) {
ImmutableMap.Builder<String, HoodieCleanPartitionMetadata> partitionMetadataBuilder = ImmutableMap.builder();
int totalDeleted = 0;
String earliestCommitToRetain = null;
for (HoodieCleanStat stat : cleanStats) {
HoodieCleanPartitionMetadata metadata =
new HoodieCleanPartitionMetadata(stat.getPartitionPath(), stat.getPolicy().name(),
stat.getDeletePathPatterns(), stat.getSuccessDeleteFiles(),
stat.getDeletePathPatterns());
stat.getDeletePathPatterns(), stat.getSuccessDeleteFiles(), stat.getDeletePathPatterns());
partitionMetadataBuilder.put(stat.getPartitionPath(), metadata);
totalDeleted += stat.getSuccessDeleteFiles().size();
if (earliestCommitToRetain == null) {
@@ -65,78 +63,67 @@ public class AvroUtils {
earliestCommitToRetain = stat.getEarliestCommitToRetain();
}
}
return new HoodieCleanMetadata(startCleanTime, durationInMs.orElseGet(() -> -1L),
totalDeleted, earliestCommitToRetain, partitionMetadataBuilder.build());
return new HoodieCleanMetadata(startCleanTime, durationInMs.orElseGet(() -> -1L), totalDeleted,
earliestCommitToRetain, partitionMetadataBuilder.build());
}
public static HoodieRestoreMetadata convertRestoreMetadata(String startRestoreTime,
Option<Long> durationInMs, List<String> commits, Map<String, List<HoodieRollbackStat>> commitToStats) {
public static HoodieRestoreMetadata convertRestoreMetadata(String startRestoreTime, Option<Long> durationInMs,
List<String> commits, Map<String, List<HoodieRollbackStat>> commitToStats) {
ImmutableMap.Builder<String, List<HoodieRollbackMetadata>> commitToStatBuilder = ImmutableMap.builder();
for (Map.Entry<String, List<HoodieRollbackStat>> commitToStat : commitToStats.entrySet()) {
commitToStatBuilder.put(commitToStat.getKey(), Arrays.asList(convertRollbackMetadata(startRestoreTime,
durationInMs, commits, commitToStat.getValue())));
commitToStatBuilder.put(commitToStat.getKey(),
Arrays.asList(convertRollbackMetadata(startRestoreTime, durationInMs, commits, commitToStat.getValue())));
}
return new HoodieRestoreMetadata(startRestoreTime, durationInMs.orElseGet(() -> -1L), commits,
commitToStatBuilder.build());
}
public static HoodieRollbackMetadata convertRollbackMetadata(String startRollbackTime,
Option<Long> durationInMs, List<String> commits, List<HoodieRollbackStat> rollbackStats) {
ImmutableMap.Builder<String, HoodieRollbackPartitionMetadata> partitionMetadataBuilder =
ImmutableMap.builder();
public static HoodieRollbackMetadata convertRollbackMetadata(String startRollbackTime, Option<Long> durationInMs,
List<String> commits, List<HoodieRollbackStat> rollbackStats) {
ImmutableMap.Builder<String, HoodieRollbackPartitionMetadata> partitionMetadataBuilder = ImmutableMap.builder();
int totalDeleted = 0;
for (HoodieRollbackStat stat : rollbackStats) {
HoodieRollbackPartitionMetadata metadata =
new HoodieRollbackPartitionMetadata(stat.getPartitionPath(),
stat.getSuccessDeleteFiles(), stat.getFailedDeleteFiles());
partitionMetadataBuilder
.put(stat.getPartitionPath(), metadata);
HoodieRollbackPartitionMetadata metadata = new HoodieRollbackPartitionMetadata(stat.getPartitionPath(),
stat.getSuccessDeleteFiles(), stat.getFailedDeleteFiles());
partitionMetadataBuilder.put(stat.getPartitionPath(), metadata);
totalDeleted += stat.getSuccessDeleteFiles().size();
}
return new HoodieRollbackMetadata(startRollbackTime, durationInMs.orElseGet(() -> -1L),
totalDeleted, commits, partitionMetadataBuilder.build());
return new HoodieRollbackMetadata(startRollbackTime, durationInMs.orElseGet(() -> -1L), totalDeleted, commits,
partitionMetadataBuilder.build());
}
public static HoodieSavepointMetadata convertSavepointMetadata(String user, String comment,
Map<String, List<String>> latestFiles) {
ImmutableMap.Builder<String, HoodieSavepointPartitionMetadata> partitionMetadataBuilder =
ImmutableMap.builder();
ImmutableMap.Builder<String, HoodieSavepointPartitionMetadata> partitionMetadataBuilder = ImmutableMap.builder();
for (Map.Entry<String, List<String>> stat : latestFiles.entrySet()) {
HoodieSavepointPartitionMetadata metadata =
new HoodieSavepointPartitionMetadata(stat.getKey(), stat.getValue());
HoodieSavepointPartitionMetadata metadata = new HoodieSavepointPartitionMetadata(stat.getKey(), stat.getValue());
partitionMetadataBuilder.put(stat.getKey(), metadata);
}
return new HoodieSavepointMetadata(user, System.currentTimeMillis(), comment,
partitionMetadataBuilder.build());
return new HoodieSavepointMetadata(user, System.currentTimeMillis(), comment, partitionMetadataBuilder.build());
}
public static Option<byte[]> serializeCompactionPlan(HoodieCompactionPlan compactionWorkload)
throws IOException {
public static Option<byte[]> serializeCompactionPlan(HoodieCompactionPlan compactionWorkload) throws IOException {
return serializeAvroMetadata(compactionWorkload, HoodieCompactionPlan.class);
}
public static Option<byte[]> serializeCleanMetadata(HoodieCleanMetadata metadata)
throws IOException {
public static Option<byte[]> serializeCleanMetadata(HoodieCleanMetadata metadata) throws IOException {
return serializeAvroMetadata(metadata, HoodieCleanMetadata.class);
}
public static Option<byte[]> serializeSavepointMetadata(HoodieSavepointMetadata metadata)
throws IOException {
public static Option<byte[]> serializeSavepointMetadata(HoodieSavepointMetadata metadata) throws IOException {
return serializeAvroMetadata(metadata, HoodieSavepointMetadata.class);
}
public static Option<byte[]> serializeRollbackMetadata(
HoodieRollbackMetadata rollbackMetadata) throws IOException {
public static Option<byte[]> serializeRollbackMetadata(HoodieRollbackMetadata rollbackMetadata) throws IOException {
return serializeAvroMetadata(rollbackMetadata, HoodieRollbackMetadata.class);
}
public static Option<byte[]> serializeRestoreMetadata(
HoodieRestoreMetadata restoreMetadata) throws IOException {
public static Option<byte[]> serializeRestoreMetadata(HoodieRestoreMetadata restoreMetadata) throws IOException {
return serializeAvroMetadata(restoreMetadata, HoodieRestoreMetadata.class);
}
public static <T extends SpecificRecordBase> Option<byte[]> serializeAvroMetadata(T metadata,
Class<T> clazz) throws IOException {
public static <T extends SpecificRecordBase> Option<byte[]> serializeAvroMetadata(T metadata, Class<T> clazz)
throws IOException {
DatumWriter<T> datumWriter = new SpecificDatumWriter<>(clazz);
DataFileWriter<T> fileWriter = new DataFileWriter<>(datumWriter);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
@@ -146,28 +133,23 @@ public class AvroUtils {
return Option.of(baos.toByteArray());
}
public static HoodieCompactionPlan deserializeCompactionPlan(byte[] bytes)
throws IOException {
public static HoodieCompactionPlan deserializeCompactionPlan(byte[] bytes) throws IOException {
return deserializeAvroMetadata(bytes, HoodieCompactionPlan.class);
}
public static HoodieCleanMetadata deserializeHoodieCleanMetadata(byte[] bytes)
throws IOException {
public static HoodieCleanMetadata deserializeHoodieCleanMetadata(byte[] bytes) throws IOException {
return deserializeAvroMetadata(bytes, HoodieCleanMetadata.class);
}
public static HoodieSavepointMetadata deserializeHoodieSavepointMetadata(byte[] bytes)
throws IOException {
public static HoodieSavepointMetadata deserializeHoodieSavepointMetadata(byte[] bytes) throws IOException {
return deserializeAvroMetadata(bytes, HoodieSavepointMetadata.class);
}
public static <T extends SpecificRecordBase> T deserializeAvroMetadata(byte[] bytes,
Class<T> clazz) throws IOException {
public static <T extends SpecificRecordBase> T deserializeAvroMetadata(byte[] bytes, Class<T> clazz)
throws IOException {
DatumReader<T> reader = new SpecificDatumReader<>(clazz);
FileReader<T> fileReader =
DataFileReader.openReader(new SeekableByteArrayInput(bytes), reader);
Preconditions
.checkArgument(fileReader.hasNext(), "Could not deserialize metadata of type " + clazz);
FileReader<T> fileReader = DataFileReader.openReader(new SeekableByteArrayInput(bytes), reader);
Preconditions.checkArgument(fileReader.hasNext(), "Could not deserialize metadata of type " + clazz);
return fileReader.next();
}
}

View File

@@ -48,8 +48,8 @@ public class CompactionUtils {
/**
* Generate compaction operation from file-slice
*
* @param partitionPath Partition path
* @param fileSlice File Slice
* @param partitionPath Partition path
* @param fileSlice File Slice
* @param metricsCaptureFunction Metrics Capture function
* @return Compaction Operation
*/
@@ -74,17 +74,17 @@ public class CompactionUtils {
* Generate compaction plan from file-slices
*
* @param partitionFileSlicePairs list of partition file-slice pairs
* @param extraMetadata Extra Metadata
* @param metricsCaptureFunction Metrics Capture function
* @param extraMetadata Extra Metadata
* @param metricsCaptureFunction Metrics Capture function
*/
public static HoodieCompactionPlan buildFromFileSlices(
List<Pair<String, FileSlice>> partitionFileSlicePairs,
public static HoodieCompactionPlan buildFromFileSlices(List<Pair<String, FileSlice>> partitionFileSlicePairs,
Option<Map<String, String>> extraMetadata,
Option<Function<Pair<String, FileSlice>, Map<String, Double>>> metricsCaptureFunction) {
HoodieCompactionPlan.Builder builder = HoodieCompactionPlan.newBuilder();
extraMetadata.ifPresent(m -> builder.setExtraMetadata(m));
builder.setOperations(partitionFileSlicePairs.stream().map(pfPair ->
buildFromFileSlice(pfPair.getKey(), pfPair.getValue(), metricsCaptureFunction)).collect(Collectors.toList()));
builder.setOperations(partitionFileSlicePairs.stream()
.map(pfPair -> buildFromFileSlice(pfPair.getKey(), pfPair.getValue(), metricsCaptureFunction))
.collect(Collectors.toList()));
return builder.build();
}
@@ -92,12 +92,10 @@ public class CompactionUtils {
* Build Avro generated Compaction operation payload from compaction operation POJO for serialization
*/
public static HoodieCompactionOperation buildHoodieCompactionOperation(CompactionOperation op) {
return HoodieCompactionOperation.newBuilder().setFileId(op.getFileId())
.setBaseInstantTime(op.getBaseInstantTime())
return HoodieCompactionOperation.newBuilder().setFileId(op.getFileId()).setBaseInstantTime(op.getBaseInstantTime())
.setPartitionPath(op.getPartitionPath())
.setDataFilePath(op.getDataFilePath().isPresent() ? op.getDataFilePath().get() : null)
.setDeltaFilePaths(op.getDeltaFilePaths())
.setMetrics(op.getMetrics()).build();
.setDeltaFilePaths(op.getDeltaFilePaths()).setMetrics(op.getMetrics()).build();
}
/**
@@ -127,11 +125,10 @@ public class CompactionUtils {
}).collect(Collectors.toList());
}
public static HoodieCompactionPlan getCompactionPlan(HoodieTableMetaClient metaClient,
String compactionInstant) throws IOException {
HoodieCompactionPlan compactionPlan = AvroUtils.deserializeCompactionPlan(
metaClient.getActiveTimeline().getInstantAuxiliaryDetails(
HoodieTimeline.getCompactionRequestedInstant(compactionInstant)).get());
public static HoodieCompactionPlan getCompactionPlan(HoodieTableMetaClient metaClient, String compactionInstant)
throws IOException {
HoodieCompactionPlan compactionPlan = AvroUtils.deserializeCompactionPlan(metaClient.getActiveTimeline()
.getInstantAuxiliaryDetails(HoodieTimeline.getCompactionRequestedInstant(compactionInstant)).get());
return compactionPlan;
}
@@ -184,6 +181,7 @@ public class CompactionUtils {
/**
* Return all pending compaction instant times
*
* @return
*/
public static List<HoodieInstant> getPendingCompactionInstantTimes(HoodieTableMetaClient metaClient) {

View File

@@ -32,12 +32,12 @@ public interface ConsistencyGuard {
* File Visibility
*/
enum FileVisibility {
APPEAR,
DISAPPEAR,
APPEAR, DISAPPEAR,
}
/**
* Wait for file to be listable based on configurable timeout
*
* @param filePath
* @throws IOException when having trouble listing the path
* @throws TimeoutException when retries exhausted
@@ -46,6 +46,7 @@ public interface ConsistencyGuard {
/**
* Wait for file to be listable based on configurable timeout
*
* @param filePath
* @throws IOException when having trouble listing the path
* @throws TimeoutException when retries exhausted
@@ -65,8 +66,9 @@ public interface ConsistencyGuard {
/**
* Wait Till target visibility is reached
* @param dirPath Directory Path
* @param files Files
*
* @param dirPath Directory Path
* @param files Files
* @param targetVisibility Target Visibitlity
* @throws IOException
* @throws TimeoutException

View File

@@ -106,14 +106,14 @@ public class ConsistencyGuardConfig extends DefaultHoodieConfig {
}
public ConsistencyGuardConfig build() {
setDefaultOnCondition(props, !props.containsKey(CONSISTENCY_CHECK_ENABLED_PROP),
CONSISTENCY_CHECK_ENABLED_PROP, DEFAULT_CONSISTENCY_CHECK_ENABLED);
setDefaultOnCondition(props, !props.containsKey(CONSISTENCY_CHECK_ENABLED_PROP), CONSISTENCY_CHECK_ENABLED_PROP,
DEFAULT_CONSISTENCY_CHECK_ENABLED);
setDefaultOnCondition(props, !props.containsKey(INITIAL_CONSISTENCY_CHECK_INTERVAL_MS_PROP),
INITIAL_CONSISTENCY_CHECK_INTERVAL_MS_PROP, String.valueOf(DEFAULT_INITIAL_CONSISTENCY_CHECK_INTERVAL_MS));
setDefaultOnCondition(props, !props.containsKey(MAX_CONSISTENCY_CHECK_INTERVAL_MS_PROP),
MAX_CONSISTENCY_CHECK_INTERVAL_MS_PROP, String.valueOf(DEFAULT_MAX_CONSISTENCY_CHECK_INTERVAL_MS));
setDefaultOnCondition(props, !props.containsKey(MAX_CONSISTENCY_CHECKS_PROP),
MAX_CONSISTENCY_CHECKS_PROP, String.valueOf(DEFAULT_MAX_CONSISTENCY_CHECKS));
setDefaultOnCondition(props, !props.containsKey(MAX_CONSISTENCY_CHECKS_PROP), MAX_CONSISTENCY_CHECKS_PROP,
String.valueOf(DEFAULT_MAX_CONSISTENCY_CHECKS));
return new ConsistencyGuardConfig(props);
}

View File

@@ -63,7 +63,7 @@ public class DFSPropertiesConfiguration {
int ind = line.indexOf('=');
String k = line.substring(0, ind).trim();
String v = line.substring(ind + 1).trim();
return new String[]{k, v};
return new String[] {k, v};
}
private void visitFile(Path file) {
@@ -82,6 +82,7 @@ public class DFSPropertiesConfiguration {
/**
* Add properties from input stream
*
* @param reader Buffered Reader
* @throws IOException
*/

View File

@@ -20,12 +20,13 @@ package org.apache.hudi.common.util;
/**
* Default implementation of size-estimator that uses Twitter's ObjectSizeCalculator
*
* @param <T>
*/
public class DefaultSizeEstimator<T> implements SizeEstimator<T> {
@Override
public long sizeEstimate(T t) {
public long sizeEstimate(T t) {
return ObjectSizeCalculator.getObjectSize(t);
}
}

View File

@@ -83,10 +83,7 @@ public class FSUtils {
for (Entry<String, String> prop : System.getenv().entrySet()) {
if (prop.getKey().startsWith(HOODIE_ENV_PROPS_PREFIX)) {
LOG.info("Picking up value for hoodie env var :" + prop.getKey());
conf.set(prop.getKey()
.replace(HOODIE_ENV_PROPS_PREFIX, "")
.replaceAll("_DOT_", "."),
prop.getValue());
conf.set(prop.getKey().replace(HOODIE_ENV_PROPS_PREFIX, "").replaceAll("_DOT_", "."), prop.getValue());
}
}
return conf;
@@ -98,12 +95,10 @@ public class FSUtils {
try {
fs = new Path(path).getFileSystem(conf);
} catch (IOException e) {
throw new HoodieIOException("Failed to get instance of " + FileSystem.class.getName(),
e);
throw new HoodieIOException("Failed to get instance of " + FileSystem.class.getName(), e);
}
LOG.info(
String.format("Hadoop Configuration: fs.defaultFS: [%s], Config:[%s], FileSystem: [%s]",
conf.getRaw("fs.defaultFS"), conf.toString(), fs.toString()));
LOG.info(String.format("Hadoop Configuration: fs.defaultFS: [%s], Config:[%s], FileSystem: [%s]",
conf.getRaw("fs.defaultFS"), conf.toString(), fs.toString()));
return fs;
}
@@ -125,11 +120,11 @@ public class FSUtils {
public static String translateMarkerToDataPath(String basePath, String markerPath, String instantTs) {
Preconditions.checkArgument(markerPath.endsWith(HoodieTableMetaClient.MARKER_EXTN));
String markerRootPath = Path.getPathWithoutSchemeAndAuthority(new Path(
String.format("%s/%s/%s", basePath, HoodieTableMetaClient.TEMPFOLDER_NAME, instantTs))).toString();
String markerRootPath = Path.getPathWithoutSchemeAndAuthority(
new Path(String.format("%s/%s/%s", basePath, HoodieTableMetaClient.TEMPFOLDER_NAME, instantTs))).toString();
int begin = markerPath.indexOf(markerRootPath);
Preconditions.checkArgument(begin >= 0, "Not in marker dir. Marker Path=" + markerPath
+ ", Expected Marker Root=" + markerRootPath);
Preconditions.checkArgument(begin >= 0,
"Not in marker dir. Marker Path=" + markerPath + ", Expected Marker Root=" + markerRootPath);
String rPath = markerPath.substring(begin + markerRootPath.length() + 1);
return String.format("%s/%s%s", basePath, rPath.replace(HoodieTableMetaClient.MARKER_EXTN, ""),
HoodieFileFormat.PARQUET.getFileExtension());
@@ -159,42 +154,38 @@ public class FSUtils {
/**
* Gets all partition paths assuming date partitioning (year, month, day) three levels down.
*/
public static List<String> getAllPartitionFoldersThreeLevelsDown(FileSystem fs, String basePath)
throws IOException {
public static List<String> getAllPartitionFoldersThreeLevelsDown(FileSystem fs, String basePath) throws IOException {
List<String> datePartitions = new ArrayList<>();
// Avoid listing and including any folders under the metafolder
PathFilter filter = getExcludeMetaPathFilter();
FileStatus[] folders = fs.globStatus(new Path(basePath + "/*/*/*"), filter);
for (FileStatus status : folders) {
Path path = status.getPath();
datePartitions.add(String.format("%s/%s/%s", path.getParent().getParent().getName(),
path.getParent().getName(), path.getName()));
datePartitions.add(String.format("%s/%s/%s", path.getParent().getParent().getName(), path.getParent().getName(),
path.getName()));
}
return datePartitions;
}
/**
* Given a base partition and a partition path, return
* relative path of partition path to the base path
* Given a base partition and a partition path, return relative path of partition path to the base path
*/
public static String getRelativePartitionPath(Path basePath, Path partitionPath) {
basePath = Path.getPathWithoutSchemeAndAuthority(basePath);
partitionPath = Path.getPathWithoutSchemeAndAuthority(partitionPath);
String partitionFullPath = partitionPath.toString();
int partitionStartIndex = partitionFullPath.indexOf(
basePath.getName(),
int partitionStartIndex = partitionFullPath.indexOf(basePath.getName(),
basePath.getParent() == null ? 0 : basePath.getParent().toString().length());
// Partition-Path could be empty for non-partitioned tables
return partitionStartIndex + basePath.getName().length() == partitionFullPath.length() ? "" :
partitionFullPath.substring(partitionStartIndex + basePath.getName().length() + 1);
return partitionStartIndex + basePath.getName().length() == partitionFullPath.length() ? ""
: partitionFullPath.substring(partitionStartIndex + basePath.getName().length() + 1);
}
/**
* Obtain all the partition paths, that are present in this table, denoted by presence of {@link
* HoodiePartitionMetadata#HOODIE_PARTITION_METAFILE}
* Obtain all the partition paths, that are present in this table, denoted by presence of
* {@link HoodiePartitionMetadata#HOODIE_PARTITION_METAFILE}
*/
public static List<String> getAllFoldersWithPartitionMetaFile(FileSystem fs, String basePathStr)
throws IOException {
public static List<String> getAllFoldersWithPartitionMetaFile(FileSystem fs, String basePathStr) throws IOException {
final Path basePath = new Path(basePathStr);
final List<String> partitions = new ArrayList<>();
processFiles(fs, basePathStr, (locatedFileStatus) -> {
@@ -221,17 +212,18 @@ public class FSUtils {
}
/**
* Recursively processes all files in the base-path. If excludeMetaFolder is set, the meta-folder and all its
* subdirs are skipped
* @param fs File System
* @param basePathStr Base-Path
* @param consumer Callback for processing
* Recursively processes all files in the base-path. If excludeMetaFolder is set, the meta-folder and all its subdirs
* are skipped
*
* @param fs File System
* @param basePathStr Base-Path
* @param consumer Callback for processing
* @param excludeMetaFolder Exclude .hoodie folder
* @throws IOException
*/
@VisibleForTesting
static void processFiles(FileSystem fs, String basePathStr,
Function<FileStatus, Boolean> consumer, boolean excludeMetaFolder) throws IOException {
static void processFiles(FileSystem fs, String basePathStr, Function<FileStatus, Boolean> consumer,
boolean excludeMetaFolder) throws IOException {
PathFilter pathFilter = excludeMetaFolder ? getExcludeMetaPathFilter() : ALLOW_ALL_FILTER;
FileStatus[] topLevelStatuses = fs.listStatus(new Path(basePathStr));
for (int i = 0; i < topLevelStatuses.length; i++) {
@@ -254,8 +246,7 @@ public class FSUtils {
}
}
public static List<String> getAllPartitionPaths(FileSystem fs, String basePathStr,
boolean assumeDatePartitioning)
public static List<String> getAllPartitionPaths(FileSystem fs, String basePathStr, boolean assumeDatePartitioning)
throws IOException {
if (assumeDatePartitioning) {
return getAllPartitionFoldersThreeLevelsDown(fs, basePathStr);
@@ -304,8 +295,8 @@ public class FSUtils {
}
/**
* Get the first part of the file name in the log file. That will be the fileId. Log file do not
* have commitTime in the file name.
* Get the first part of the file name in the log file. That will be the fileId. Log file do not have commitTime in
* the file name.
*/
public static String getFileIdFromLogPath(Path path) {
Matcher matcher = LOG_FILE_PATTERN.matcher(path.getName());
@@ -326,8 +317,8 @@ public class FSUtils {
}
/**
* Get the first part of the file name in the log file. That will be the fileId. Log file do not
* have commitTime in the file name.
* Get the first part of the file name in the log file. That will be the fileId. Log file do not have commitTime in
* the file name.
*/
public static String getBaseCommitTimeFromLogPath(Path path) {
Matcher matcher = LOG_FILE_PATTERN.matcher(path.getName());
@@ -395,10 +386,11 @@ public class FSUtils {
return Integer.parseInt(matcher.group(4));
}
public static String makeLogFileName(String fileId, String logFileExtension,
String baseCommitTime, int version, String writeToken) {
String suffix = (writeToken == null) ? String.format("%s_%s%s.%d",fileId, baseCommitTime, logFileExtension, version)
: String.format("%s_%s%s.%d_%s", fileId, baseCommitTime, logFileExtension, version, writeToken);
public static String makeLogFileName(String fileId, String logFileExtension, String baseCommitTime, int version,
String writeToken) {
String suffix =
(writeToken == null) ? String.format("%s_%s%s.%d", fileId, baseCommitTime, logFileExtension, version)
: String.format("%s_%s%s.%d_%s", fileId, baseCommitTime, logFileExtension, version, writeToken);
return LOG_FILE_PREFIX + suffix;
}
@@ -420,12 +412,11 @@ public class FSUtils {
/**
* Get all the log files for the passed in FileId in the partition path
*/
public static Stream<HoodieLogFile> getAllLogFiles(FileSystem fs, Path partitionPath,
final String fileId, final String logFileExtension, final String baseCommitTime)
throws IOException {
return Arrays.stream(fs.listStatus(partitionPath,
path -> path.getName().startsWith("." + fileId) && path.getName()
.contains(logFileExtension)))
public static Stream<HoodieLogFile> getAllLogFiles(FileSystem fs, Path partitionPath, final String fileId,
final String logFileExtension, final String baseCommitTime) throws IOException {
return Arrays
.stream(fs.listStatus(partitionPath,
path -> path.getName().startsWith("." + fileId) && path.getName().contains(logFileExtension)))
.map(HoodieLogFile::new).filter(s -> s.getBaseCommitTime().equals(baseCommitTime));
}
@@ -433,14 +424,12 @@ public class FSUtils {
* Get the latest log version for the fileId in the partition path
*/
public static Option<Pair<Integer, String>> getLatestLogVersion(FileSystem fs, Path partitionPath,
final String fileId, final String logFileExtension, final String baseCommitTime)
throws IOException {
final String fileId, final String logFileExtension, final String baseCommitTime) throws IOException {
Option<HoodieLogFile> latestLogFile =
getLatestLogFile(
getAllLogFiles(fs, partitionPath, fileId, logFileExtension, baseCommitTime));
getLatestLogFile(getAllLogFiles(fs, partitionPath, fileId, logFileExtension, baseCommitTime));
if (latestLogFile.isPresent()) {
return Option.of(Pair.of(latestLogFile.get().getLogVersion(),
getWriteTokenFromLogPath(latestLogFile.get().getPath())));
return Option
.of(Pair.of(latestLogFile.get().getLogVersion(), getWriteTokenFromLogPath(latestLogFile.get().getPath())));
}
return Option.empty();
}
@@ -450,7 +439,7 @@ public class FSUtils {
*/
public static int computeNextLogVersion(FileSystem fs, Path partitionPath, final String fileId,
final String logFileExtension, final String baseCommitTime) throws IOException {
Option<Pair<Integer, String>> currentVersionWithWriteToken =
Option<Pair<Integer, String>> currentVersionWithWriteToken =
getLatestLogVersion(fs, partitionPath, fileId, logFileExtension, baseCommitTime);
// handle potential overflow
return (currentVersionWithWriteToken.isPresent()) ? currentVersionWithWriteToken.get().getKey() + 1
@@ -466,10 +455,9 @@ public class FSUtils {
}
/**
* When a file was opened and the task died without closing the stream, another task executor
* cannot open because the existing lease will be active. We will try to recover the lease, from
* HDFS. If a data node went down, it takes about 10 minutes for the lease to be rocovered. But if
* the client dies, this should be instant.
* When a file was opened and the task died without closing the stream, another task executor cannot open because the
* existing lease will be active. We will try to recover the lease, from HDFS. If a data node went down, it takes
* about 10 minutes for the lease to be rocovered. But if the client dies, this should be instant.
*/
public static boolean recoverDFSFileLease(final DistributedFileSystem dfs, final Path p)
throws IOException, InterruptedException {
@@ -489,44 +477,38 @@ public class FSUtils {
return recovered;
}
public static void deleteOlderCleanMetaFiles(FileSystem fs, String metaPath,
Stream<HoodieInstant> instants) {
//TODO - this should be archived when archival is made general for all meta-data
public static void deleteOlderCleanMetaFiles(FileSystem fs, String metaPath, Stream<HoodieInstant> instants) {
// TODO - this should be archived when archival is made general for all meta-data
// skip MIN_CLEAN_TO_KEEP and delete rest
instants.skip(MIN_CLEAN_TO_KEEP).map(s -> {
try {
return fs.delete(new Path(metaPath, s.getFileName()), false);
} catch (IOException e) {
throw new HoodieIOException("Could not delete clean meta files" + s.getFileName(),
e);
throw new HoodieIOException("Could not delete clean meta files" + s.getFileName(), e);
}
});
}
public static void deleteOlderRollbackMetaFiles(FileSystem fs, String metaPath,
Stream<HoodieInstant> instants) {
//TODO - this should be archived when archival is made general for all meta-data
public static void deleteOlderRollbackMetaFiles(FileSystem fs, String metaPath, Stream<HoodieInstant> instants) {
// TODO - this should be archived when archival is made general for all meta-data
// skip MIN_ROLLBACK_TO_KEEP and delete rest
instants.skip(MIN_ROLLBACK_TO_KEEP).map(s -> {
try {
return fs.delete(new Path(metaPath, s.getFileName()), false);
} catch (IOException e) {
throw new HoodieIOException(
"Could not delete rollback meta files " + s.getFileName(), e);
throw new HoodieIOException("Could not delete rollback meta files " + s.getFileName(), e);
}
});
}
public static void deleteOlderRestoreMetaFiles(FileSystem fs, String metaPath,
Stream<HoodieInstant> instants) {
//TODO - this should be archived when archival is made general for all meta-data
public static void deleteOlderRestoreMetaFiles(FileSystem fs, String metaPath, Stream<HoodieInstant> instants) {
// TODO - this should be archived when archival is made general for all meta-data
// skip MIN_ROLLBACK_TO_KEEP and delete rest
instants.skip(MIN_ROLLBACK_TO_KEEP).map(s -> {
try {
return fs.delete(new Path(metaPath, s.getFileName()), false);
} catch (IOException e) {
throw new HoodieIOException(
"Could not delete restore meta files " + s.getFileName(), e);
throw new HoodieIOException("Could not delete restore meta files " + s.getFileName(), e);
}
});
}
@@ -547,18 +529,18 @@ public class FSUtils {
public static Path getPartitionPath(Path basePath, String partitionPath) {
// FOr non-partitioned table, return only base-path
return ((partitionPath == null) || (partitionPath.isEmpty())) ? basePath :
new Path(basePath, partitionPath);
return ((partitionPath == null) || (partitionPath.isEmpty())) ? basePath : new Path(basePath, partitionPath);
}
/**
* This is due to HUDI-140 GCS has a different behavior for detecting EOF during seek().
*
* @param inputStream FSDataInputStream
* @return true if the inputstream or the wrapped one is of type GoogleHadoopFSInputStream
*/
public static boolean isGCSInputStream(FSDataInputStream inputStream) {
return inputStream.getClass().getCanonicalName().equals("com.google.cloud.hadoop.fs.gcs.GoogleHadoopFSInputStream")
|| inputStream.getWrappedStream().getClass().getCanonicalName()
.equals("com.google.cloud.hadoop.fs.gcs.GoogleHadoopFSInputStream");
.equals("com.google.cloud.hadoop.fs.gcs.GoogleHadoopFSInputStream");
}
}

View File

@@ -51,12 +51,11 @@ public class FailSafeConsistencyGuard implements ConsistencyGuard {
@Override
public void waitTillFileAppears(Path filePath) throws TimeoutException {
waitForFileVisibility(filePath, FileVisibility.APPEAR);
waitForFileVisibility(filePath, FileVisibility.APPEAR);
}
@Override
public void waitTillFileDisappears(Path filePath)
throws TimeoutException {
public void waitTillFileDisappears(Path filePath) throws TimeoutException {
waitForFileVisibility(filePath, FileVisibility.DISAPPEAR);
}
@@ -72,13 +71,13 @@ public class FailSafeConsistencyGuard implements ConsistencyGuard {
/**
* Helper function to wait for all files belonging to single directory to appear
*
* @param dirPath Dir Path
* @param files Files to appear/disappear
* @param event Appear/Disappear
* @throws TimeoutException
*/
public void waitForFilesVisibility(String dirPath, List<String> files, FileVisibility event)
throws TimeoutException {
public void waitForFilesVisibility(String dirPath, List<String> files, FileVisibility event) throws TimeoutException {
Path dir = new Path(dirPath);
List<String> filesWithoutSchemeAndAuthority =
files.stream().map(f -> Path.getPathWithoutSchemeAndAuthority(new Path(f))).map(p -> p.toString())
@@ -112,6 +111,7 @@ public class FailSafeConsistencyGuard implements ConsistencyGuard {
/**
* Helper to check of file visibility
*
* @param filePath File Path
* @param visibility Visibility
* @return
@@ -140,6 +140,7 @@ public class FailSafeConsistencyGuard implements ConsistencyGuard {
/**
* Helper function to wait till file either appears/disappears
*
* @param filePath File Path
* @param visibility
* @throws TimeoutException
@@ -166,6 +167,7 @@ public class FailSafeConsistencyGuard implements ConsistencyGuard {
/**
* Retries the predicate for condfigurable number of times till we the predicate returns success
*
* @param predicate Predicate Function
* @param timedOutMessage Timed-Out message for logging
* @throws TimeoutException when retries are exhausted

View File

@@ -39,10 +39,7 @@ public class FileIOUtils {
public static void deleteDirectory(File directory) throws IOException {
if (directory.exists()) {
Files.walk(directory.toPath())
.sorted(Comparator.reverseOrder())
.map(Path::toFile)
.forEach(File::delete);
Files.walk(directory.toPath()).sorted(Comparator.reverseOrder()).map(Path::toFile).forEach(File::delete);
directory.delete();
if (directory.exists()) {
throw new IOException("Unable to delete directory " + directory);

View File

@@ -56,9 +56,8 @@ public class HoodieAvroUtils {
private static ThreadLocal<BinaryDecoder> reuseDecoder = ThreadLocal.withInitial(() -> null);
// All metadata fields are optional strings.
private static final Schema METADATA_FIELD_SCHEMA = Schema.createUnion(Arrays.asList(
Schema.create(Schema.Type.NULL),
Schema.create(Schema.Type.STRING)));
private static final Schema METADATA_FIELD_SCHEMA =
Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.STRING)));
private static final Schema RECORD_KEY_SCHEMA = initRecordKeySchema();
@@ -66,8 +65,7 @@ public class HoodieAvroUtils {
* Convert a given avro record to bytes
*/
public static byte[] avroToBytes(GenericRecord record) throws IOException {
GenericDatumWriter<GenericRecord> writer =
new GenericDatumWriter<>(record.getSchema());
GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<>(record.getSchema());
ByteArrayOutputStream out = new ByteArrayOutputStream();
BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, reuseEncoder.get());
reuseEncoder.set(encoder);
@@ -101,16 +99,16 @@ public class HoodieAvroUtils {
public static Schema addMetadataFields(Schema schema) {
List<Schema.Field> parentFields = new ArrayList<>();
Schema.Field commitTimeField = new Schema.Field(HoodieRecord.COMMIT_TIME_METADATA_FIELD,
METADATA_FIELD_SCHEMA, "", NullNode.getInstance());
Schema.Field commitSeqnoField = new Schema.Field(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD,
METADATA_FIELD_SCHEMA, "", NullNode.getInstance());
Schema.Field recordKeyField = new Schema.Field(HoodieRecord.RECORD_KEY_METADATA_FIELD,
METADATA_FIELD_SCHEMA, "", NullNode.getInstance());
Schema.Field partitionPathField = new Schema.Field(HoodieRecord.PARTITION_PATH_METADATA_FIELD,
METADATA_FIELD_SCHEMA, "", NullNode.getInstance());
Schema.Field fileNameField = new Schema.Field(HoodieRecord.FILENAME_METADATA_FIELD,
METADATA_FIELD_SCHEMA, "", NullNode.getInstance());
Schema.Field commitTimeField =
new Schema.Field(HoodieRecord.COMMIT_TIME_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", NullNode.getInstance());
Schema.Field commitSeqnoField =
new Schema.Field(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", NullNode.getInstance());
Schema.Field recordKeyField =
new Schema.Field(HoodieRecord.RECORD_KEY_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", NullNode.getInstance());
Schema.Field partitionPathField =
new Schema.Field(HoodieRecord.PARTITION_PATH_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", NullNode.getInstance());
Schema.Field fileNameField =
new Schema.Field(HoodieRecord.FILENAME_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", NullNode.getInstance());
parentFields.add(commitTimeField);
parentFields.add(commitSeqnoField);
@@ -127,15 +125,14 @@ public class HoodieAvroUtils {
}
}
Schema mergedSchema = Schema
.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), false);
Schema mergedSchema = Schema.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), false);
mergedSchema.setFields(parentFields);
return mergedSchema;
}
private static Schema initRecordKeySchema() {
Schema.Field recordKeyField = new Schema.Field(HoodieRecord.RECORD_KEY_METADATA_FIELD,
METADATA_FIELD_SCHEMA, "", NullNode.getInstance());
Schema.Field recordKeyField =
new Schema.Field(HoodieRecord.RECORD_KEY_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", NullNode.getInstance());
Schema recordKeySchema = Schema.createRecord("HoodieRecordKey", "", "", false);
recordKeySchema.setFields(Arrays.asList(recordKeyField));
return recordKeySchema;
@@ -145,8 +142,8 @@ public class HoodieAvroUtils {
return RECORD_KEY_SCHEMA;
}
public static GenericRecord addHoodieKeyToRecord(GenericRecord record, String recordKey,
String partitionPath, String fileName) {
public static GenericRecord addHoodieKeyToRecord(GenericRecord record, String recordKey, String partitionPath,
String fileName) {
record.put(HoodieRecord.FILENAME_METADATA_FIELD, fileName);
record.put(HoodieRecord.PARTITION_PATH_METADATA_FIELD, partitionPath);
record.put(HoodieRecord.RECORD_KEY_METADATA_FIELD, recordKey);
@@ -154,9 +151,9 @@ public class HoodieAvroUtils {
}
/**
* Add null fields to passed in schema. Caller is responsible for ensuring there is no duplicates.
* As different query engines have varying constraints regarding treating the case-sensitivity of fields, its best
* to let caller determine that.
* Add null fields to passed in schema. Caller is responsible for ensuring there is no duplicates. As different query
* engines have varying constraints regarding treating the case-sensitivity of fields, its best to let caller
* determine that.
*
* @param schema Passed in schema
* @param newFieldNames Null Field names to be added
@@ -176,8 +173,7 @@ public class HoodieAvroUtils {
/**
* Adds the Hoodie commit metadata into the provided Generic Record.
*/
public static GenericRecord addCommitMetadataToRecord(GenericRecord record, String commitTime,
String commitSeqno) {
public static GenericRecord addCommitMetadataToRecord(GenericRecord record, String commitTime, String commitSeqno) {
record.put(HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitTime);
record.put(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, commitSeqno);
return record;
@@ -207,8 +203,7 @@ public class HoodieAvroUtils {
}
if (!GenericData.get().validate(newSchema, newRecord)) {
throw new SchemaCompatabilityException(
"Unable to validate the rewritten record " + record + " against schema "
+ newSchema);
"Unable to validate the rewritten record " + record + " against schema " + newSchema);
}
return newRecord;
}

View File

@@ -27,6 +27,7 @@ import org.apache.log4j.Logger;
/**
* Size Estimator for Hoodie record payload
*
* @param <T>
*/
public class HoodieRecordSizeEstimator<T extends HoodieRecordPayload> implements SizeEstimator<HoodieRecord<T>> {

View File

@@ -23,9 +23,8 @@ import java.util.Deque;
import org.apache.hudi.exception.HoodieException;
/**
* Timing utility to help keep track of execution times of code blocks. This class helps to allow multiple
* timers started at the same time and automatically returns the execution time in the order in which the
* timers are stopped.
* Timing utility to help keep track of execution times of code blocks. This class helps to allow multiple timers
* started at the same time and automatically returns the execution time in the order in which the timers are stopped.
*/
public class HoodieTimer {

View File

@@ -49,8 +49,8 @@ public class LogReaderUtils {
HoodieLogBlock block = reader.prev();
if (block instanceof HoodieAvroDataBlock && block != null) {
HoodieAvroDataBlock lastBlock = (HoodieAvroDataBlock) block;
if (completedTimeline.containsOrBeforeTimelineStarts(lastBlock.getLogBlockHeader().get(HeaderMetadataType
.INSTANT_TIME))) {
if (completedTimeline
.containsOrBeforeTimelineStarts(lastBlock.getLogBlockHeader().get(HeaderMetadataType.INSTANT_TIME))) {
writerSchema = Schema.parse(lastBlock.getLogBlockHeader().get(HeaderMetadataType.SCHEMA));
break;
}

View File

@@ -27,12 +27,10 @@ import org.apache.hadoop.fs.Path;
public class NoOpConsistencyGuard implements ConsistencyGuard {
@Override
public void waitTillFileAppears(Path filePath) {
}
public void waitTillFileAppears(Path filePath) {}
@Override
public void waitTillFileDisappears(Path filePath) {
}
public void waitTillFileDisappears(Path filePath) {}
@Override
public void waitTillAllFilesAppear(String dirPath, List<String> files) {

View File

@@ -54,17 +54,13 @@ import java.util.List;
import java.util.Set;
/**
* Contains utility methods for calculating the memory usage of objects. It
* only works on the HotSpot JVM, and infers the actual memory layout (32 bit
* vs. 64 bit word size, compressed object pointers vs. uncompressed) from
* best available indicators. It can reliably detect a 32 bit vs. 64 bit JVM.
* It can only make an educated guess at whether compressed OOPs are used,
* though; specifically, it knows what the JVM's default choice of OOP
* compression would be based on HotSpot version and maximum heap sizes, but if
* the choice is explicitly overridden with the <tt>-XX:{+|-}UseCompressedOops</tt> command line
* switch, it can not detect
* this fact and will report incorrect sizes, as it will presume the default JVM
* behavior.
* Contains utility methods for calculating the memory usage of objects. It only works on the HotSpot JVM, and infers
* the actual memory layout (32 bit vs. 64 bit word size, compressed object pointers vs. uncompressed) from best
* available indicators. It can reliably detect a 32 bit vs. 64 bit JVM. It can only make an educated guess at whether
* compressed OOPs are used, though; specifically, it knows what the JVM's default choice of OOP compression would be
* based on HotSpot version and maximum heap sizes, but if the choice is explicitly overridden with the
* <tt>-XX:{+|-}UseCompressedOops</tt> command line switch, it can not detect this fact and will report incorrect sizes,
* as it will presume the default JVM behavior.
*
* @author Attila Szegedi
*/
@@ -104,8 +100,7 @@ public class ObjectSizeCalculator {
int getReferenceSize();
/**
* Returns the quantum field size for a field owned by one of an object's ancestor superclasses
* in this JVM.
* Returns the quantum field size for a field owned by one of an object's ancestor superclasses in this JVM.
*
* @return the quantum field size for a superclass field.
*/
@@ -114,24 +109,18 @@ public class ObjectSizeCalculator {
private static class CurrentLayout {
private static final MemoryLayoutSpecification SPEC =
getEffectiveMemoryLayoutSpecification();
private static final MemoryLayoutSpecification SPEC = getEffectiveMemoryLayoutSpecification();
}
/**
* Given an object, returns the total allocated size, in bytes, of the object
* and all other objects reachable from it. Attempts to to detect the current JVM memory layout,
* but may fail with {@link UnsupportedOperationException};
* Given an object, returns the total allocated size, in bytes, of the object and all other objects reachable from it.
* Attempts to to detect the current JVM memory layout, but may fail with {@link UnsupportedOperationException};
*
* @param obj the object; can be null. Passing in a {@link java.lang.Class} object doesn't do
* anything special, it measures the size of all objects
* reachable through it (which will include its class loader, and by
* extension, all other Class objects loaded by
* the same loader, and all the parent class loaders). It doesn't provide the
* size of the static fields in the JVM class that the Class object
* represents.
* @return the total allocated size of the object and all other objects it
* retains.
* @param obj the object; can be null. Passing in a {@link java.lang.Class} object doesn't do anything special, it
* measures the size of all objects reachable through it (which will include its class loader, and by
* extension, all other Class objects loaded by the same loader, and all the parent class loaders). It doesn't
* provide the size of the static fields in the JVM class that the Class object represents.
* @return the total allocated size of the object and all other objects it retains.
* @throws UnsupportedOperationException if the current vm memory layout cannot be detected.
*/
public static long getObjectSize(Object obj) throws UnsupportedOperationException {
@@ -164,8 +153,7 @@ public class ObjectSizeCalculator {
private long size;
/**
* Creates an object size calculator that can calculate object sizes for a given
* {@code memoryLayoutSpecification}.
* Creates an object size calculator that can calculate object sizes for a given {@code memoryLayoutSpecification}.
*
* @param memoryLayoutSpecification a description of the JVM memory layout.
*/
@@ -179,24 +167,19 @@ public class ObjectSizeCalculator {
}
/**
* Given an object, returns the total allocated size, in bytes, of the object
* and all other objects reachable from it.
* Given an object, returns the total allocated size, in bytes, of the object and all other objects reachable from it.
*
* @param obj the object; can be null. Passing in a {@link java.lang.Class} object doesn't do
* anything special, it measures the size of all objects
* reachable through it (which will include its class loader, and by
* extension, all other Class objects loaded by
* the same loader, and all the parent class loaders). It doesn't provide the
* size of the static fields in the JVM class that the Class object
* represents.
* @return the total allocated size of the object and all other objects it
* retains.
* @param obj the object; can be null. Passing in a {@link java.lang.Class} object doesn't do anything special, it
* measures the size of all objects reachable through it (which will include its class loader, and by
* extension, all other Class objects loaded by the same loader, and all the parent class loaders). It doesn't
* provide the size of the static fields in the JVM class that the Class object represents.
* @return the total allocated size of the object and all other objects it retains.
*/
public synchronized long calculateObjectSize(Object obj) {
// Breadth-first traversal instead of naive depth-first with recursive
// implementation, so we don't blow the stack traversing long linked lists.
try {
for (; ; ) {
for (;;) {
visit(obj);
if (pending.isEmpty()) {
return size;
@@ -324,8 +307,7 @@ public class ObjectSizeCalculator {
}
this.fieldsSize = fieldsSize;
this.objectSize = roundTo(objectHeaderSize + fieldsSize, objectPadding);
this.referenceFields = referenceFields.toArray(
new Field[referenceFields.size()]);
this.referenceFields = referenceFields.toArray(new Field[referenceFields.size()]);
}
void visit(Object obj, ObjectSizeCalculator calc) {
@@ -338,8 +320,7 @@ public class ObjectSizeCalculator {
try {
calc.enqueue(f.get(obj));
} catch (IllegalAccessException e) {
final AssertionError ae = new AssertionError(
"Unexpected denial of access to " + f);
final AssertionError ae = new AssertionError("Unexpected denial of access to " + f);
ae.initCause(e);
throw ae;
}
@@ -360,17 +341,15 @@ public class ObjectSizeCalculator {
if (type == long.class || type == double.class) {
return 8;
}
throw new AssertionError("Encountered unexpected primitive type "
+ type.getName());
throw new AssertionError("Encountered unexpected primitive type " + type.getName());
}
@VisibleForTesting
static MemoryLayoutSpecification getEffectiveMemoryLayoutSpecification() {
final String vmName = System.getProperty("java.vm.name");
if (vmName == null || !(vmName.startsWith("Java HotSpot(TM) ")
|| vmName.startsWith("OpenJDK") || vmName.startsWith("TwitterJDK"))) {
throw new UnsupportedOperationException(
"ObjectSizeCalculator only supported on HotSpot VM");
if (vmName == null || !(vmName.startsWith("Java HotSpot(TM) ") || vmName.startsWith("OpenJDK")
|| vmName.startsWith("TwitterJDK"))) {
throw new UnsupportedOperationException("ObjectSizeCalculator only supported on HotSpot VM");
}
final String dataModel = System.getProperty("sun.arch.data.model");
@@ -403,13 +382,12 @@ public class ObjectSizeCalculator {
}
};
} else if (!"64".equals(dataModel)) {
throw new UnsupportedOperationException("Unrecognized value '"
+ dataModel + "' of sun.arch.data.model system property");
throw new UnsupportedOperationException(
"Unrecognized value '" + dataModel + "' of sun.arch.data.model system property");
}
final String strVmVersion = System.getProperty("java.vm.version");
final int vmVersion = Integer.parseInt(strVmVersion.substring(0,
strVmVersion.indexOf('.')));
final int vmVersion = Integer.parseInt(strVmVersion.substring(0, strVmVersion.indexOf('.')));
if (vmVersion >= 17) {
long maxMemory = 0;
for (MemoryPoolMXBean mp : ManagementFactory.getMemoryPoolMXBeans()) {

View File

@@ -54,13 +54,13 @@ public final class Option<T> implements Serializable {
}
/**
* Returns an empty {@code Option} instance. No value is present for this Option.
* Returns an empty {@code Option} instance. No value is present for this Option.
*
* @param <T> Type of the non-existent value
* @return an empty {@code Option}
* @apiNote Though it may be tempting to do so, avoid testing if an object is empty by comparing with {@code ==}
* against instances returned by {@code Option.empty()}. There is no guarantee that it is a singleton. Instead, use
* {@link #isPresent()}.
* against instances returned by {@code Option.empty()}. There is no guarantee that it is a singleton.
* Instead, use {@link #isPresent()}.
*/
public static <T> Option<T> empty() {
@SuppressWarnings("unchecked")
@@ -143,7 +143,7 @@ public final class Option<T> implements Serializable {
*
* @param predicate a predicate to apply to the value, if present
* @return an {@code Option} describing the value of this {@code Option} if a value is present and the value matches
* the given predicate, otherwise an empty {@code Option}
* the given predicate, otherwise an empty {@code Option}
* @throws NullPointerException if the predicate is null
*/
public Option<T> filter(Predicate<? super T> predicate) {
@@ -157,25 +157,27 @@ public final class Option<T> implements Serializable {
/**
* If a value is present, apply the provided mapping function to it, and if the result is non-null, return an {@code
* Option} describing the result. Otherwise return an empty {@code Option}.
* Option} describing the result. Otherwise return an empty {@code Option}.
*
* @param <U> The type of the result of the mapping function
* @param mapper a mapping function to apply to the value, if present
* @return an {@code Option} describing the result of applying a mapping function to the value of this {@code Option},
* if a value is present, otherwise an empty {@code Option}
* if a value is present, otherwise an empty {@code Option}
* @throws NullPointerException if the mapping function is null
* @apiNote This method supports post-processing on optional values, without the need to explicitly check for a return
* status. For example, the following code traverses a stream of file names, selects one that has not yet been
* processed, and then opens that file, returning an {@code Option<FileInputStream>}:
* status. For example, the following code traverses a stream of file names, selects one that has not yet
* been processed, and then opens that file, returning an {@code Option<FileInputStream>}:
*
* <pre>{@code
* <pre>
* {@code
* Option<FileInputStream> fis =
* names.stream().filter(name -> !isProcessedYet(name))
* .findFirst()
* .map(name -> new FileInputStream(name));
* }</pre>
* }
* </pre>
*
* Here, {@code findFirst} returns an {@code Option<String>}, and then {@code map} returns an {@code
* Here, {@code findFirst} returns an {@code Option<String>}, and then {@code map} returns an {@code
* Option<FileInputStream>} for the desired file if one exists.
*/
public <U> Option<U> map(Function<? super T, ? extends U> mapper) {
@@ -189,14 +191,14 @@ public final class Option<T> implements Serializable {
/**
* If a value is present, apply the provided {@code Option}-bearing mapping function to it, return that result,
* otherwise return an empty {@code Option}. This method is similar to {@link #map(Function)}, but the provided
* mapper is one whose result is already an {@code Option}, and if invoked, {@code flatMap} does not wrap it with an
* otherwise return an empty {@code Option}. This method is similar to {@link #map(Function)}, but the provided mapper
* is one whose result is already an {@code Option}, and if invoked, {@code flatMap} does not wrap it with an
* additional {@code Option}.
*
* @param <U> The type parameter to the {@code Option} returned by
* @param mapper a mapping function to apply to the value, if present the mapping function
* @return the result of applying an {@code Option}-bearing mapping function to the value of this {@code Option}, if a
* value is present, otherwise an empty {@code Option}
* value is present, otherwise an empty {@code Option}
* @throws NullPointerException if the mapping function is null or returns a null result
*/
public <U> Option<U> flatMap(Function<? super T, Option<U>> mapper) {
@@ -238,7 +240,7 @@ public final class Option<T> implements Serializable {
* @throws X if there is no value present
* @throws NullPointerException if no value is present and {@code exceptionSupplier} is null
* @apiNote A method reference to the exception constructor with an empty argument list can be used as the supplier.
* For example, {@code IllegalStateException::new}
* For example, {@code IllegalStateException::new}
*/
public <X extends Throwable> T orElseThrow(Supplier<? extends X> exceptionSupplier) throws X {
if (value != null) {
@@ -289,13 +291,11 @@ public final class Option<T> implements Serializable {
*
* @return the string representation of this instance
* @implSpec If a value is present the result must include its string representation in the result. Empty and present
* Optionals must be unambiguously differentiable.
* Optionals must be unambiguously differentiable.
*/
@Override
public String toString() {
return value != null
? String.format("Option[%s]", value)
: "Option.empty";
return value != null ? String.format("Option[%s]", value) : "Option.empty";
}
/**

View File

@@ -51,22 +51,22 @@ public class ParquetUtils {
/**
* Read the rowKey list from the given parquet file.
*
* @param filePath The parquet file path.
* @param filePath The parquet file path.
* @param configuration configuration to build fs object
* @return Set Set of row keys
* @return Set Set of row keys
*/
public static Set<String> readRowKeysFromParquet(Configuration configuration, Path filePath) {
return filterParquetRowKeys(configuration, filePath, new HashSet<>());
}
/**
* Read the rowKey list matching the given filter, from the given parquet file. If the filter is empty,
* then this will return all the rowkeys.
* Read the rowKey list matching the given filter, from the given parquet file. If the filter is empty, then this will
* return all the rowkeys.
*
* @param filePath The parquet file path.
* @param configuration configuration to build fs object
* @param filter record keys filter
* @return Set Set of row keys matching candidateRecordKeys
* @param filePath The parquet file path.
* @param configuration configuration to build fs object
* @param filter record keys filter
* @return Set Set of row keys matching candidateRecordKeys
*/
public static Set<String> filterParquetRowKeys(Configuration configuration, Path filePath, Set<String> filter) {
Option<RecordKeysFilterFunction> filterFunction = Option.empty();
@@ -102,11 +102,9 @@ public class ParquetUtils {
ParquetMetadata footer;
try {
// TODO(vc): Should we use the parallel reading version here?
footer = ParquetFileReader
.readFooter(FSUtils.getFs(parquetFilePath.toString(), conf).getConf(), parquetFilePath);
footer = ParquetFileReader.readFooter(FSUtils.getFs(parquetFilePath.toString(), conf).getConf(), parquetFilePath);
} catch (IOException e) {
throw new HoodieIOException("Failed to read footer for parquet " + parquetFilePath,
e);
throw new HoodieIOException("Failed to read footer for parquet " + parquetFilePath, e);
}
return footer;
}
@@ -127,8 +125,8 @@ public class ParquetUtils {
if (metadata.containsKey(footerName)) {
footerVals.add(metadata.get(footerName));
} else {
throw new MetadataNotFoundException("Could not find index in Parquet footer. "
+ "Looked for key " + footerName + " in " + parquetFilePath);
throw new MetadataNotFoundException(
"Could not find index in Parquet footer. " + "Looked for key " + footerName + " in " + parquetFilePath);
}
}
return footerVals;
@@ -141,21 +139,20 @@ public class ParquetUtils {
/**
* Read out the bloom filter from the parquet file meta data.
*/
public static BloomFilter readBloomFilterFromParquetMetadata(Configuration configuration,
Path parquetFilePath) {
String footerVal = readParquetFooter(configuration, parquetFilePath,
HoodieAvroWriteSupport.HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY).get(0);
public static BloomFilter readBloomFilterFromParquetMetadata(Configuration configuration, Path parquetFilePath) {
String footerVal =
readParquetFooter(configuration, parquetFilePath, HoodieAvroWriteSupport.HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY)
.get(0);
return new BloomFilter(footerVal);
}
public static String[] readMinMaxRecordKeys(Configuration configuration, Path parquetFilePath) {
List<String> minMaxKeys = readParquetFooter(configuration, parquetFilePath,
HoodieAvroWriteSupport.HOODIE_MIN_RECORD_KEY_FOOTER,
HoodieAvroWriteSupport.HOODIE_MAX_RECORD_KEY_FOOTER);
HoodieAvroWriteSupport.HOODIE_MIN_RECORD_KEY_FOOTER, HoodieAvroWriteSupport.HOODIE_MAX_RECORD_KEY_FOOTER);
if (minMaxKeys.size() != 2) {
throw new HoodieException(String.format(
"Could not read min/max record key out of footer correctly from %s. read) : %s",
parquetFilePath, minMaxKeys));
throw new HoodieException(
String.format("Could not read min/max record key out of footer correctly from %s. read) : %s",
parquetFilePath, minMaxKeys));
}
return new String[] {minMaxKeys.get(0), minMaxKeys.get(1)};
}

View File

@@ -56,14 +56,11 @@ public class ReflectionUtils {
/**
* Instantiate a given class with a generic record payload
*/
public static <T extends HoodieRecordPayload> T loadPayload(String recordPayloadClass,
Object[] payloadArgs,
public static <T extends HoodieRecordPayload> T loadPayload(String recordPayloadClass, Object[] payloadArgs,
Class<?>... constructorArgTypes) {
try {
return (T) getClass(recordPayloadClass).getConstructor(constructorArgTypes)
.newInstance(payloadArgs);
} catch (InstantiationException | IllegalAccessException
| InvocationTargetException | NoSuchMethodException e) {
return (T) getClass(recordPayloadClass).getConstructor(constructorArgTypes).newInstance(payloadArgs);
} catch (InstantiationException | IllegalAccessException | InvocationTargetException | NoSuchMethodException e) {
throw new HoodieException("Unable to instantiate payload class ", e);
}
}
@@ -74,8 +71,7 @@ public class ReflectionUtils {
public static Object loadClass(String clazz, Class<?>[] constructorArgTypes, Object... constructorArgs) {
try {
return getClass(clazz).getConstructor(constructorArgTypes).newInstance(constructorArgs);
} catch (InstantiationException | IllegalAccessException
| InvocationTargetException | NoSuchMethodException e) {
} catch (InstantiationException | IllegalAccessException | InvocationTargetException | NoSuchMethodException e) {
throw new HoodieException("Unable to instantiate class ", e);
}
}
@@ -84,13 +80,13 @@ public class ReflectionUtils {
* Creates an instance of the given class. Constructor arg types are inferred.
*/
public static Object loadClass(String clazz, Object... constructorArgs) {
Class<?>[] constructorArgTypes = Arrays.stream(constructorArgs)
.map(Object::getClass).toArray(Class<?>[]::new);
Class<?>[] constructorArgTypes = Arrays.stream(constructorArgs).map(Object::getClass).toArray(Class<?>[]::new);
return loadClass(clazz, constructorArgTypes, constructorArgs);
}
/**
* Return stream of top level class names in the same class path as passed-in class
*
* @param clazz
*/
public static Stream<String> getTopLevelClassesInClasspath(Class clazz) {

View File

@@ -64,8 +64,8 @@ public class RocksDBDAO {
public RocksDBDAO(String basePath, String rocksDBBasePath) {
this.basePath = basePath;
this.rocksDBBasePath = String.format("%s/%s/%s", rocksDBBasePath,
this.basePath.replace("/", "_"), UUID.randomUUID().toString());
this.rocksDBBasePath =
String.format("%s/%s/%s", rocksDBBasePath, this.basePath.replace("/", "_"), UUID.randomUUID().toString());
init();
}
@@ -137,8 +137,8 @@ public class RocksDBDAO {
managedColumnFamilies.add(getColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY));
} else {
log.info("Loading column families :" + existing.stream().map(String::new).collect(Collectors.toList()));
managedColumnFamilies.addAll(existing.stream()
.map(RocksDBDAO::getColumnFamilyDescriptor).collect(Collectors.toList()));
managedColumnFamilies
.addAll(existing.stream().map(RocksDBDAO::getColumnFamilyDescriptor).collect(Collectors.toList()));
}
return managedColumnFamilies;
}
@@ -350,9 +350,8 @@ public class RocksDBDAO {
}
}
log.info("Prefix Search for (query=" + prefix + ") on " + columnFamilyName
+ ". Total Time Taken (msec)=" + timer.endTimer()
+ ". Serialization Time taken(micro)=" + timeTakenMicro + ", num entries=" + results.size());
log.info("Prefix Search for (query=" + prefix + ") on " + columnFamilyName + ". Total Time Taken (msec)="
+ timer.endTimer() + ". Serialization Time taken(micro)=" + timeTakenMicro + ", num entries=" + results.size());
return results.stream();
}
@@ -368,7 +367,7 @@ public class RocksDBDAO {
log.info("Prefix DELETE (query=" + prefix + ") on " + columnFamilyName);
final RocksIterator it = getRocksDB().newIterator(managedHandlesMap.get(columnFamilyName));
it.seek(prefix.getBytes());
//Find first and last keys to be deleted
// Find first and last keys to be deleted
String firstEntry = null;
String lastEntry = null;
while (it.isValid() && new String(it.key()).startsWith(prefix)) {
@@ -384,9 +383,8 @@ public class RocksDBDAO {
if (null != firstEntry) {
try {
// This will not delete the last entry
getRocksDB().deleteRange(managedHandlesMap.get(columnFamilyName), firstEntry.getBytes(),
lastEntry.getBytes());
//Delete the last entry
getRocksDB().deleteRange(managedHandlesMap.get(columnFamilyName), firstEntry.getBytes(), lastEntry.getBytes());
// Delete the last entry
getRocksDB().delete(lastEntry.getBytes());
} catch (RocksDBException e) {
log.error("Got exception performing range delete");

View File

@@ -28,27 +28,17 @@ import org.apache.hudi.common.table.HoodieTableMetaClient;
/**
* Helper class to generate Key and column names for rocksdb based view
*
* For RocksDB, 3 colFamilies are used for storing file-system view for each dataset.
* (a) View
* (b) Partitions Cached
* (c) Pending Compactions
*
*
* View : Key : Store both slice and Data file stored.
* Slice :
* Key = "type=slice,part=<PartitionPath>,id=<FileId>,instant=<Timestamp>"
* Value = Serialized FileSlice
* Data File :
* Key = "type=df,part=<PartitionPath>,id=<FileId>,instant=<Timestamp>"
* Value = Serialized DataFile
*
* Partitions :
* Key = "part=<PartitionPath>"
* Value = Boolean
*
* For RocksDB, 3 colFamilies are used for storing file-system view for each dataset. (a) View (b) Partitions Cached (c)
* Pending Compactions
* Key = "part=<PartitionPath>,id=<FileId>"
* Value = Pair<CompactionTime, CompactionOperation>
*
*
* View : Key : Store both slice and Data file stored. Slice : Key =
* "type=slice,part=<PartitionPath>,id=<FileId>,instant=<Timestamp>" Value = Serialized FileSlice Data File : Key =
* "type=df,part=<PartitionPath>,id=<FileId>,instant=<Timestamp>" Value = Serialized DataFile
*
* Partitions : Key = "part=<PartitionPath>" Value = Boolean
*
* Pending Compactions Key = "part=<PartitionPath>,id=<FileId>" Value = Pair<CompactionTime, CompactionOperation>
*/
public class RocksDBSchemaHelper {
@@ -80,15 +70,15 @@ public class RocksDBSchemaHelper {
}
public String getKeyForSliceView(String partitionPath, String fileId, String instantTime) {
return String.format("type=slice,part=%s,id=%s,instant=%s",partitionPath, fileId, instantTime);
return String.format("type=slice,part=%s,id=%s,instant=%s", partitionPath, fileId, instantTime);
}
public String getPrefixForSliceViewByPartitionFile(String partitionPath, String fileId) {
return String.format("type=slice,part=%s,id=%s,instant=",partitionPath, fileId);
return String.format("type=slice,part=%s,id=%s,instant=", partitionPath, fileId);
}
public String getPrefixForDataFileViewByPartitionFile(String partitionPath, String fileId) {
return String.format("type=df,part=%s,id=%s,instant=",partitionPath, fileId);
return String.format("type=df,part=%s,id=%s,instant=", partitionPath, fileId);
}
public String getKeyForDataFileView(HoodieFileGroup fileGroup, FileSlice slice) {

View File

@@ -34,8 +34,7 @@ import org.objenesis.instantiator.ObjectInstantiator;
/**
* {@link SerializationUtils} class internally uses {@link Kryo} serializer for serializing /
* deserializing objects.
* {@link SerializationUtils} class internally uses {@link Kryo} serializer for serializing / deserializing objects.
*/
public class SerializationUtils {
@@ -44,10 +43,12 @@ public class SerializationUtils {
ThreadLocal.withInitial(() -> new KryoSerializerInstance());
// Serialize
//-----------------------------------------------------------------------
// -----------------------------------------------------------------------
/**
* <p>Serializes an {@code Object} to a byte array for storage/serialization.</p>
* <p>
* Serializes an {@code Object} to a byte array for storage/serialization.
* </p>
*
* @param obj the object to serialize to bytes
* @return a byte[] with the converted Serializable
@@ -58,15 +59,18 @@ public class SerializationUtils {
}
// Deserialize
//-----------------------------------------------------------------------
// -----------------------------------------------------------------------
/**
* <p> Deserializes a single {@code Object} from an array of bytes. </p>
* <p>
* Deserializes a single {@code Object} from an array of bytes.
* </p>
*
* <p> If the call site incorrectly types the return value, a {@link ClassCastException} is thrown
* from the call site. Without Generics in this declaration, the call site must type cast and can
* cause the same ClassCastException. Note that in both cases, the ClassCastException is in the
* call site, not in this method. </p>
* <p>
* If the call site incorrectly types the return value, a {@link ClassCastException} is thrown from the call site.
* Without Generics in this declaration, the call site must type cast and can cause the same ClassCastException. Note
* that in both cases, the ClassCastException is in the call site, not in this method.
* </p>
*
* @param <T> the object type to be deserialized
* @param objectData the serialized object, must not be null
@@ -109,8 +113,8 @@ public class SerializationUtils {
}
/**
* This class has a no-arg constructor, suitable for use with reflection instantiation.
* For Details checkout com.twitter.chill.KryoBase.
* This class has a no-arg constructor, suitable for use with reflection instantiation. For Details checkout
* com.twitter.chill.KryoBase.
*/
private static class KryoInstantiator implements Serializable {
@@ -153,8 +157,8 @@ public class SerializationUtils {
final Constructor constructor = type.getConstructor();
constructor.setAccessible(true);
return constructor.newInstance();
} catch (NoSuchMethodException | IllegalAccessException
| InstantiationException | InvocationTargetException e) {
} catch (NoSuchMethodException | IllegalAccessException | InstantiationException
| InvocationTargetException e) {
// ignore this exception. we will fall back to default instantiation strategy.
}
return super.getInstantiatorStrategy().newInstantiatorOf(type).newInstance();

View File

@@ -20,14 +20,14 @@ package org.apache.hudi.common.util;
/**
* An interface to estimate the size of payload in memory
*
* @param <T>
*/
public interface SizeEstimator<T> {
/**
* This method is used to estimate the size of a payload in memory.
* The default implementation returns the total allocated size, in bytes, of the object
* and all other objects reachable from it
* This method is used to estimate the size of a payload in memory. The default implementation returns the total
* allocated size, in bytes, of the object and all other objects reachable from it
*/
long sizeEstimate(T t);
}

View File

@@ -43,8 +43,7 @@ public class SpillableMapUtils {
/**
* |crc|timestamp|sizeOfKey|SizeOfValue|key|value|
*/
private static FileEntry readInternal(RandomAccessFile file, long valuePosition,
int valueLength) throws IOException {
private static FileEntry readInternal(RandomAccessFile file, long valuePosition, int valueLength) throws IOException {
file.seek(valuePosition);
long crc = file.readLong();
long timestamp = file.readLong();
@@ -59,24 +58,22 @@ public class SpillableMapUtils {
file.read(value, 0, valueSize);
long crcOfReadValue = generateChecksum(value);
if (!(crc == crcOfReadValue)) {
throw new HoodieCorruptedDataException("checksum of payload written to external disk does not match, "
+ "data may be corrupted");
throw new HoodieCorruptedDataException(
"checksum of payload written to external disk does not match, " + "data may be corrupted");
}
return new FileEntry(crc, keySize, valueSize, key, value, timestamp);
}
/**
* Write Value and other metadata necessary to disk. Each entry has the following sequence of data <p>
* Write Value and other metadata necessary to disk. Each entry has the following sequence of data
* <p>
* |crc|timestamp|sizeOfKey|SizeOfValue|key|value|
*/
public static long spillToDisk(SizeAwareDataOutputStream outputStream,
FileEntry fileEntry) throws IOException {
public static long spillToDisk(SizeAwareDataOutputStream outputStream, FileEntry fileEntry) throws IOException {
return spill(outputStream, fileEntry);
}
private static long spill(SizeAwareDataOutputStream outputStream,
FileEntry fileEntry)
throws IOException {
private static long spill(SizeAwareDataOutputStream outputStream, FileEntry fileEntry) throws IOException {
outputStream.writeLong(fileEntry.getCrc());
outputStream.writeLong(fileEntry.getTimestamp());
outputStream.writeInt(fileEntry.getSizeOfKey());
@@ -107,15 +104,10 @@ public class SpillableMapUtils {
* Utility method to convert bytes to HoodieRecord using schema and payload class
*/
public static <R> R convertToHoodieRecordPayload(GenericRecord rec, String payloadClazz) {
String recKey = rec.get(HoodieRecord.RECORD_KEY_METADATA_FIELD)
.toString();
String partitionPath =
rec.get(HoodieRecord.PARTITION_PATH_METADATA_FIELD)
.toString();
HoodieRecord<? extends HoodieRecordPayload> hoodieRecord = new HoodieRecord<>(
new HoodieKey(recKey, partitionPath),
ReflectionUtils
.loadPayload(payloadClazz, new Object[]{Option.of(rec)}, Option.class));
String recKey = rec.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
String partitionPath = rec.get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString();
HoodieRecord<? extends HoodieRecordPayload> hoodieRecord = new HoodieRecord<>(new HoodieKey(recKey, partitionPath),
ReflectionUtils.loadPayload(payloadClazz, new Object[] {Option.of(rec)}, Option.class));
return (R) hoodieRecord;
}
@@ -123,10 +115,8 @@ public class SpillableMapUtils {
* Utility method to convert bytes to HoodieRecord using schema and payload class
*/
public static <R> R generateEmptyPayload(String recKey, String partitionPath, String payloadClazz) {
HoodieRecord<? extends HoodieRecordPayload> hoodieRecord = new HoodieRecord<>(
new HoodieKey(recKey, partitionPath),
ReflectionUtils
.loadPayload(payloadClazz, new Object[]{Option.empty()}, Option.class));
HoodieRecord<? extends HoodieRecordPayload> hoodieRecord = new HoodieRecord<>(new HoodieKey(recKey, partitionPath),
ReflectionUtils.loadPayload(payloadClazz, new Object[] {Option.empty()}, Option.class));
return (R) hoodieRecord;
}
}

View File

@@ -24,12 +24,14 @@ package org.apache.hudi.common.util;
public class StringUtils {
/**
* <p>Joins the elements of the provided array into a single String
* containing the provided list of elements.</p>
* <p>
* Joins the elements of the provided array into a single String containing the provided list of elements.
* </p>
*
* <p>No separator is added to the joined String.
* Null objects or empty strings within the array are represented by
* empty strings.</p>
* <p>
* No separator is added to the joined String. Null objects or empty strings within the array are represented by empty
* strings.
* </p>
*
* <pre>
* StringUtils.join(null) = null
@@ -56,7 +58,7 @@ public class StringUtils {
public static String toHexString(byte[] bytes) {
StringBuilder sb = new StringBuilder(bytes.length * 2);
for (byte b: bytes) {
for (byte b : bytes) {
sb.append(String.format("%02x", b));
}
return sb.toString();

View File

@@ -55,19 +55,19 @@ public class TimelineDiffHelper {
// Check If any pending compaction is lost. If so, do not allow incremental timeline sync
List<Pair<HoodieInstant, HoodieInstant>> compactionInstants = getPendingCompactionTransitions(oldT, newT);
List<HoodieInstant> lostPendingCompactions =
compactionInstants.stream().filter(instantPair -> instantPair.getValue() == null).map(Pair::getKey)
.collect(Collectors.toList());
List<HoodieInstant> lostPendingCompactions = compactionInstants.stream()
.filter(instantPair -> instantPair.getValue() == null).map(Pair::getKey).collect(Collectors.toList());
if (!lostPendingCompactions.isEmpty()) {
// If a compaction is unscheduled, fall back to complete refresh of fs view since some log files could have been
// moved. Its unsafe to incrementally sync in that case.
log.warn("Some pending compactions are no longer in new timeline (unscheduled ?)."
+ "They are :" + lostPendingCompactions);
log.warn("Some pending compactions are no longer in new timeline (unscheduled ?)." + "They are :"
+ lostPendingCompactions);
return TimelineDiffResult.UNSAFE_SYNC_RESULT;
}
List<HoodieInstant> finishedCompactionInstants = compactionInstants.stream().filter(instantPair ->
instantPair.getValue().getAction().equals(HoodieTimeline.COMMIT_ACTION)
&& instantPair.getValue().isCompleted()).map(Pair::getKey).collect(Collectors.toList());
List<HoodieInstant> finishedCompactionInstants = compactionInstants.stream()
.filter(instantPair -> instantPair.getValue().getAction().equals(HoodieTimeline.COMMIT_ACTION)
&& instantPair.getValue().isCompleted())
.map(Pair::getKey).collect(Collectors.toList());
newT.getInstants().filter(instant -> !oldTimelineInstants.contains(instant)).forEach(newInstants::add);
return new TimelineDiffResult(newInstants, finishedCompactionInstants, true);
@@ -125,11 +125,8 @@ public class TimelineDiffHelper {
@Override
public String toString() {
return "TimelineDiffResult{"
+ "newlySeenInstants=" + newlySeenInstants
+ ", finishedCompactionInstants=" + finishedCompactionInstants
+ ", canSyncIncrementally=" + canSyncIncrementally
+ '}';
return "TimelineDiffResult{" + "newlySeenInstants=" + newlySeenInstants + ", finishedCompactionInstants="
+ finishedCompactionInstants + ", canSyncIncrementally=" + canSyncIncrementally + '}';
}
}
}

View File

@@ -83,9 +83,10 @@ public final class DiskBasedMap<T extends Serializable, R extends Serializable>
/**
* RandomAcessFile is not thread-safe. This API opens a new file handle per thread and returns.
*
* @return
*/
private RandomAccessFile getRandomAccessFile() {
private RandomAccessFile getRandomAccessFile() {
try {
RandomAccessFile readHandle = randomAccessFile.get();
if (readHandle == null) {
@@ -109,9 +110,9 @@ public final class DiskBasedMap<T extends Serializable, R extends Serializable>
writeOnlyFile.getParentFile().mkdir();
}
writeOnlyFile.createNewFile();
log.info(
"Spilling to file location " + writeOnlyFile.getAbsolutePath() + " in host (" + InetAddress.getLocalHost()
.getHostAddress() + ") with hostname (" + InetAddress.getLocalHost().getHostName() + ")");
log.info("Spilling to file location " + writeOnlyFile.getAbsolutePath() + " in host ("
+ InetAddress.getLocalHost().getHostAddress() + ") with hostname (" + InetAddress.getLocalHost().getHostName()
+ ")");
// Make sure file is deleted when JVM exits
writeOnlyFile.deleteOnExit();
addShutDownHook();
@@ -200,8 +201,8 @@ public final class DiskBasedMap<T extends Serializable, R extends Serializable>
public static <R> R get(ValueMetadata entry, RandomAccessFile file) {
try {
return SerializationUtils.deserialize(SpillableMapUtils.readBytesFromDisk(file,
entry.getOffsetOfValue(), entry.getSizeOfValue()));
return SerializationUtils
.deserialize(SpillableMapUtils.readBytesFromDisk(file, entry.getOffsetOfValue(), entry.getSizeOfValue()));
} catch (IOException e) {
throw new HoodieIOException("Unable to readFromDisk Hoodie Record from disk", e);
}
@@ -216,8 +217,8 @@ public final class DiskBasedMap<T extends Serializable, R extends Serializable>
this.valueMetadataMap.put(key,
new DiskBasedMap.ValueMetadata(this.filePath, valueSize, filePosition.get(), timestamp));
byte[] serializedKey = SerializationUtils.serialize(key);
filePosition.set(SpillableMapUtils.spillToDisk(writeOnlyFileHandle,
new FileEntry(SpillableMapUtils.generateChecksum(val),
filePosition
.set(SpillableMapUtils.spillToDisk(writeOnlyFileHandle, new FileEntry(SpillableMapUtils.generateChecksum(val),
serializedKey.length, valueSize, serializedKey, val, timestamp)));
} catch (IOException io) {
throw new HoodieIOException("Unable to store data in Disk Based map", io);
@@ -258,8 +259,7 @@ public final class DiskBasedMap<T extends Serializable, R extends Serializable>
public Stream<R> valueStream() {
final RandomAccessFile file = getRandomAccessFile();
return valueMetadataMap.values().stream().sorted().sequential()
.map(valueMetaData -> (R)get(valueMetaData, file));
return valueMetadataMap.values().stream().sorted().sequential().map(valueMetaData -> (R) get(valueMetaData, file));
}
@Override
@@ -286,8 +286,7 @@ public final class DiskBasedMap<T extends Serializable, R extends Serializable>
// Current timestamp when the value was written to disk
private Long timestamp;
public FileEntry(long crc, int sizeOfKey, int sizeOfValue, byte[] key, byte[] value,
long timestamp) {
public FileEntry(long crc, int sizeOfKey, int sizeOfValue, byte[] key, byte[] value, long timestamp) {
this.crc = crc;
this.sizeOfKey = sizeOfKey;
this.sizeOfValue = sizeOfValue;

View File

@@ -36,13 +36,19 @@ import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
/**
* An external map that spills content to disk when there is insufficient space for it to grow. <p> This map holds 2
* types of data structures : <p> (1) Key-Value pairs in a in-memory map (2) Key-ValueMetadata pairs in an in-memory map
* which keeps a marker to the values spilled to disk <p> NOTE : Values are only appended to disk. If a remove() is
* called, the entry is marked removed from the in-memory key-valueMetadata map but it's values will be lying around in
* the temp file on disk until the file is cleaned. <p> The setting of the spill threshold faces the following
* trade-off: If the spill threshold is too high, the in-memory map may occupy more memory than is available, resulting
* in OOM. However, if the spill threshold is too low, we spill frequently and incur unnecessary disk writes.
* An external map that spills content to disk when there is insufficient space for it to grow.
* <p>
* This map holds 2 types of data structures :
* <p>
* (1) Key-Value pairs in a in-memory map (2) Key-ValueMetadata pairs in an in-memory map which keeps a marker to the
* values spilled to disk
* <p>
* NOTE : Values are only appended to disk. If a remove() is called, the entry is marked removed from the in-memory
* key-valueMetadata map but it's values will be lying around in the temp file on disk until the file is cleaned.
* <p>
* The setting of the spill threshold faces the following trade-off: If the spill threshold is too high, the in-memory
* map may occupy more memory than is available, resulting in OOM. However, if the spill threshold is too low, we spill
* frequently and incur unnecessary disk writes.
*/
public class ExternalSpillableMap<T extends Serializable, R extends Serializable> implements Map<T, R> {
@@ -70,14 +76,13 @@ public class ExternalSpillableMap<T extends Serializable, R extends Serializable
private boolean shouldEstimatePayloadSize = true;
// Base File Path
private final String baseFilePath;
public ExternalSpillableMap(Long maxInMemorySizeInBytes, String baseFilePath,
SizeEstimator<T> keySizeEstimator, SizeEstimator<R> valueSizeEstimator) throws IOException {
public ExternalSpillableMap(Long maxInMemorySizeInBytes, String baseFilePath, SizeEstimator<T> keySizeEstimator,
SizeEstimator<R> valueSizeEstimator) throws IOException {
this.inMemoryMap = new HashMap<>();
this.baseFilePath = baseFilePath;
this.diskBasedMap = new DiskBasedMap<>(baseFilePath);
this.maxInMemorySizeInBytes = (long) Math
.floor(maxInMemorySizeInBytes * sizingFactorForInMemoryMap);
this.maxInMemorySizeInBytes = (long) Math.floor(maxInMemorySizeInBytes * sizingFactorForInMemoryMap);
this.currentInMemoryMapSize = 0L;
this.keySizeEstimator = keySizeEstimator;
this.valueSizeEstimator = valueSizeEstimator;
@@ -169,11 +174,9 @@ public class ExternalSpillableMap<T extends Serializable, R extends Serializable
if (shouldEstimatePayloadSize && estimatedPayloadSize == 0) {
// At first, use the sizeEstimate of a record being inserted into the spillable map.
// Note, the converter may over estimate the size of a record in the JVM
this.estimatedPayloadSize =
keySizeEstimator.sizeEstimate(key) + valueSizeEstimator.sizeEstimate(value);
this.estimatedPayloadSize = keySizeEstimator.sizeEstimate(key) + valueSizeEstimator.sizeEstimate(value);
log.info("Estimated Payload size => " + estimatedPayloadSize);
} else if (shouldEstimatePayloadSize
&& inMemoryMap.size() % NUMBER_OF_RECORDS_TO_ESTIMATE_PAYLOAD_SIZE == 0) {
} else if (shouldEstimatePayloadSize && inMemoryMap.size() % NUMBER_OF_RECORDS_TO_ESTIMATE_PAYLOAD_SIZE == 0) {
// Re-estimate the size of a record by calculating the size of the entire map containing
// N entries and then dividing by the number of entries present (N). This helps to get a
// correct estimation of the size of each record in the JVM.

View File

@@ -20,14 +20,19 @@ package org.apache.hudi.common.util.collection;
/**
* (NOTE: Adapted from Apache commons-lang3)
* <p>An immutable pair consisting of two {@code Object} elements.</p>
* <p>
* An immutable pair consisting of two {@code Object} elements.
* </p>
*
* <p>Although the implementation is immutable, there is no restriction on the objects
* that may be stored. If mutable objects are stored in the pair, then the pair
* itself effectively becomes mutable. The class is also {@code final}, so a subclass
* can not add undesirable behaviour.</p>
* <p>
* Although the implementation is immutable, there is no restriction on the objects that may be stored. If mutable
* objects are stored in the pair, then the pair itself effectively becomes mutable. The class is also {@code final}, so
* a subclass can not add undesirable behaviour.
* </p>
*
* <p>#ThreadSafe# if both paired objects are thread-safe</p>
* <p>
* #ThreadSafe# if both paired objects are thread-safe
* </p>
*
* @param <L> the left element type
* @param <R> the right element type
@@ -49,10 +54,13 @@ public final class ImmutablePair<L, R> extends Pair<L, R> {
public final R right;
/**
* <p>Obtains an immutable pair of from two objects inferring the generic types.</p>
* <p>
* Obtains an immutable pair of from two objects inferring the generic types.
* </p>
*
* <p>This factory allows the pair to be created using inference to
* obtain the generic types.</p>
* <p>
* This factory allows the pair to be created using inference to obtain the generic types.
* </p>
*
* @param <L> the left element type
* @param <R> the right element type
@@ -76,7 +84,7 @@ public final class ImmutablePair<L, R> extends Pair<L, R> {
this.right = right;
}
//-----------------------------------------------------------------------
// -----------------------------------------------------------------------
/**
* {@inheritDoc}
@@ -95,9 +103,13 @@ public final class ImmutablePair<L, R> extends Pair<L, R> {
}
/**
* <p>Throws {@code UnsupportedOperationException}.</p>
* <p>
* Throws {@code UnsupportedOperationException}.
* </p>
*
* <p>This pair is immutable, so this operation is not supported.</p>
* <p>
* This pair is immutable, so this operation is not supported.
* </p>
*
* @param value the value to set
* @return never

View File

@@ -20,14 +20,19 @@ package org.apache.hudi.common.util.collection;
/**
* (NOTE: Adapted from Apache commons-lang3)
* <p>An immutable triple consisting of three {@code Object} elements.</p>
* <p>
* An immutable triple consisting of three {@code Object} elements.
* </p>
*
* <p>Although the implementation is immutable, there is no restriction on the objects
* that may be stored. If mutable objects are stored in the triple, then the triple
* itself effectively becomes mutable. The class is also {@code final}, so a subclass
* can not add undesirable behaviour.</p>
* <p>
* Although the implementation is immutable, there is no restriction on the objects that may be stored. If mutable
* objects are stored in the triple, then the triple itself effectively becomes mutable. The class is also
* {@code final}, so a subclass can not add undesirable behaviour.
* </p>
*
* <p>#ThreadSafe# if all three objects are thread-safe</p>
* <p>
* #ThreadSafe# if all three objects are thread-safe
* </p>
*
* @param <L> the left element type
* @param <M> the middle element type
@@ -54,10 +59,13 @@ public final class ImmutableTriple<L, M, R> extends Triple<L, M, R> {
public final R right;
/**
* <p>Obtains an immutable triple of from three objects inferring the generic types.</p>
* <p>
* Obtains an immutable triple of from three objects inferring the generic types.
* </p>
*
* <p>This factory allows the triple to be created using inference to
* obtain the generic types.</p>
* <p>
* This factory allows the triple to be created using inference to obtain the generic types.
* </p>
*
* @param <L> the left element type
* @param <M> the middle element type
@@ -85,7 +93,7 @@ public final class ImmutableTriple<L, M, R> extends Triple<L, M, R> {
this.right = right;
}
//-----------------------------------------------------------------------
// -----------------------------------------------------------------------
/**
* {@inheritDoc}

View File

@@ -66,12 +66,9 @@ public class LazyFileIterable<T, R> implements Iterable<R> {
readOnlyFileHandle.seek(0);
// sort the map in increasing order of offset of value so disk seek is only in one(forward) direction
this.metadataIterator = map
.entrySet()
.stream()
.sorted(
(Map.Entry<T, DiskBasedMap.ValueMetadata> o1, Map.Entry<T, DiskBasedMap.ValueMetadata> o2) ->
o1.getValue().getOffsetOfValue().compareTo(o2.getValue().getOffsetOfValue()))
this.metadataIterator = map.entrySet().stream()
.sorted((Map.Entry<T, DiskBasedMap.ValueMetadata> o1, Map.Entry<T, DiskBasedMap.ValueMetadata> o2) -> o1
.getValue().getOffsetOfValue().compareTo(o2.getValue().getOffsetOfValue()))
.collect(Collectors.toList()).iterator();
this.addShutdownHook();
}

View File

@@ -23,15 +23,20 @@ import java.util.Map;
/**
* (NOTE: Adapted from Apache commons-lang3)
* <p>A pair consisting of two elements.</p>
* <p>
* A pair consisting of two elements.
* </p>
*
* <p>This class is an abstract implementation defining the basic API.
* It refers to the elements as 'left' and 'right'. It also implements the
* {@code Map.Entry} interface where the key is 'left' and the value is 'right'.</p>
* <p>
* This class is an abstract implementation defining the basic API. It refers to the elements as 'left' and 'right'. It
* also implements the {@code Map.Entry} interface where the key is 'left' and the value is 'right'.
* </p>
*
* <p>Subclass implementations may be mutable or immutable.
* However, there is no restriction on the type of the stored objects that may be stored.
* If mutable objects are stored in the pair, then the pair itself effectively becomes mutable.</p>
* <p>
* Subclass implementations may be mutable or immutable. However, there is no restriction on the type of the stored
* objects that may be stored. If mutable objects are stored in the pair, then the pair itself effectively becomes
* mutable.
* </p>
*
* @param <L> the left element type
* @param <R> the right element type
@@ -44,10 +49,13 @@ public abstract class Pair<L, R> implements Map.Entry<L, R>, Comparable<Pair<L,
private static final long serialVersionUID = 4954918890077093841L;
/**
* <p>Obtains an immutable pair of from two objects inferring the generic types.</p>
* <p>
* Obtains an immutable pair of from two objects inferring the generic types.
* </p>
*
* <p>This factory allows the pair to be created using inference to
* obtain the generic types.</p>
* <p>
* This factory allows the pair to be created using inference to obtain the generic types.
* </p>
*
* @param <L> the left element type
* @param <R> the right element type
@@ -59,31 +67,42 @@ public abstract class Pair<L, R> implements Map.Entry<L, R>, Comparable<Pair<L,
return new ImmutablePair<>(left, right);
}
//-----------------------------------------------------------------------
// -----------------------------------------------------------------------
/**
* <p>Gets the left element from this pair.</p>
* <p>
* Gets the left element from this pair.
* </p>
*
* <p>When treated as a key-value pair, this is the key.</p>
* <p>
* When treated as a key-value pair, this is the key.
* </p>
*
* @return the left element, may be null
*/
public abstract L getLeft();
/**
* <p>Gets the right element from this pair.</p>
* <p>
* Gets the right element from this pair.
* </p>
*
* <p>When treated as a key-value pair, this is the value.</p>
* <p>
* When treated as a key-value pair, this is the value.
* </p>
*
* @return the right element, may be null
*/
public abstract R getRight();
/**
* <p>Gets the key from this pair.</p>
* <p>
* Gets the key from this pair.
* </p>
*
* <p>This method implements the {@code Map.Entry} interface returning the
* left element as the key.</p>
* <p>
* This method implements the {@code Map.Entry} interface returning the left element as the key.
* </p>
*
* @return the left element as the key, may be null
*/
@@ -93,10 +112,13 @@ public abstract class Pair<L, R> implements Map.Entry<L, R>, Comparable<Pair<L,
}
/**
* <p>Gets the value from this pair.</p>
* <p>
* Gets the value from this pair.
* </p>
*
* <p>This method implements the {@code Map.Entry} interface returning the
* right element as the value.</p>
* <p>
* This method implements the {@code Map.Entry} interface returning the right element as the value.
* </p>
*
* @return the right element as the value, may be null
*/
@@ -105,11 +127,12 @@ public abstract class Pair<L, R> implements Map.Entry<L, R>, Comparable<Pair<L,
return getRight();
}
//-----------------------------------------------------------------------
// -----------------------------------------------------------------------
/**
* <p>Compares the pair based on the left element followed by the right element.
* The types must be {@code Comparable}.</p>
* <p>
* Compares the pair based on the left element followed by the right element. The types must be {@code Comparable}.
* </p>
*
* @param other the other pair, not null
* @return negative if this is less, zero if equal, positive if greater
@@ -133,7 +156,9 @@ public abstract class Pair<L, R> implements Map.Entry<L, R>, Comparable<Pair<L,
}
/**
* <p>Compares this pair to another based on the two elements.</p>
* <p>
* Compares this pair to another based on the two elements.
* </p>
*
* @param obj the object to compare to, null returns false
* @return true if the elements of the pair are equal
@@ -145,27 +170,28 @@ public abstract class Pair<L, R> implements Map.Entry<L, R>, Comparable<Pair<L,
}
if (obj instanceof Map.Entry<?, ?>) {
final Map.Entry<?, ?> other = (Map.Entry<?, ?>) obj;
return getKey().equals(other.getKey())
&& getValue().equals(other.getValue());
return getKey().equals(other.getKey()) && getValue().equals(other.getValue());
}
return false;
}
/**
* <p>Returns a suitable hash code.
* The hash code follows the definition in {@code Map.Entry}.</p>
* <p>
* Returns a suitable hash code. The hash code follows the definition in {@code Map.Entry}.
* </p>
*
* @return the hash code
*/
@Override
public int hashCode() {
// see Map.Entry API specification
return (getKey() == null ? 0 : getKey().hashCode())
^ (getValue() == null ? 0 : getValue().hashCode());
return (getKey() == null ? 0 : getKey().hashCode()) ^ (getValue() == null ? 0 : getValue().hashCode());
}
/**
* <p>Returns a String representation of this pair using the format {@code ($left,$right)}.</p>
* <p>
* Returns a String representation of this pair using the format {@code ($left,$right)}.
* </p>
*
* @return a string describing this object, not null
*/
@@ -175,12 +201,15 @@ public abstract class Pair<L, R> implements Map.Entry<L, R>, Comparable<Pair<L,
}
/**
* <p>Formats the receiver using the given format.</p>
* <p>
* Formats the receiver using the given format.
* </p>
*
* <p>This uses {@link java.util.Formattable} to perform the formatting. Two variables may
* be used to embed the left and right elements. Use {@code %1$s} for the left
* element (key) and {@code %2$s} for the right element (value).
* The default format used by {@code toString()} is {@code (%1$s,%2$s)}.</p>
* <p>
* This uses {@link java.util.Formattable} to perform the formatting. Two variables may be used to embed the left and
* right elements. Use {@code %1$s} for the left element (key) and {@code %2$s} for the right element (value). The
* default format used by {@code toString()} is {@code (%1$s,%2$s)}.
* </p>
*
* @param format the format string, optionally containing {@code %1$s} and {@code %2$s}, not null
* @return the formatted string, not null

View File

@@ -41,7 +41,7 @@ public final class RocksDBBasedMap<K extends Serializable, R extends Serializabl
@Override
public int size() {
return (int)getRocksDBDAO().prefixSearch(columnFamilyName, "").count();
return (int) getRocksDBDAO().prefixSearch(columnFamilyName, "").count();
}
@Override
@@ -62,7 +62,7 @@ public final class RocksDBBasedMap<K extends Serializable, R extends Serializabl
@Override
public R get(Object key) {
return getRocksDBDAO().get(columnFamilyName, (Serializable)key);
return getRocksDBDAO().get(columnFamilyName, (Serializable) key);
}
@Override
@@ -119,7 +119,6 @@ public final class RocksDBBasedMap<K extends Serializable, R extends Serializabl
}
public Iterator<R> iterator() {
return getRocksDBDAO().prefixSearch(columnFamilyName, "")
.map(p -> (R)(p.getValue())).iterator();
return getRocksDBDAO().prefixSearch(columnFamilyName, "").map(p -> (R) (p.getValue())).iterator();
}
}

View File

@@ -22,14 +22,20 @@ import java.io.Serializable;
/**
* (NOTE: Adapted from Apache commons-lang3)
* <p>A triple consisting of three elements.</p>
* <p>
* A triple consisting of three elements.
* </p>
*
* <p>This class is an abstract implementation defining the basic API.
* It refers to the elements as 'left', 'middle' and 'right'.</p>
* <p>
* This class is an abstract implementation defining the basic API. It refers to the elements as 'left', 'middle' and
* 'right'.
* </p>
*
* <p>Subclass implementations may be mutable or immutable.
* However, there is no restriction on the type of the stored objects that may be stored.
* If mutable objects are stored in the triple, then the triple itself effectively becomes mutable.</p>
* <p>
* Subclass implementations may be mutable or immutable. However, there is no restriction on the type of the stored
* objects that may be stored. If mutable objects are stored in the triple, then the triple itself effectively becomes
* mutable.
* </p>
*
* @param <L> the left element type
* @param <M> the middle element type
@@ -43,10 +49,13 @@ public abstract class Triple<L, M, R> implements Comparable<Triple<L, M, R>>, Se
private static final long serialVersionUID = 1L;
/**
* <p>Obtains an immutable triple of from three objects inferring the generic types.</p>
* <p>
* Obtains an immutable triple of from three objects inferring the generic types.
* </p>
*
* <p>This factory allows the triple to be created using inference to
* obtain the generic types.</p>
* <p>
* This factory allows the triple to be created using inference to obtain the generic types.
* </p>
*
* @param <L> the left element type
* @param <M> the middle element type
@@ -60,35 +69,42 @@ public abstract class Triple<L, M, R> implements Comparable<Triple<L, M, R>>, Se
return new ImmutableTriple<L, M, R>(left, middle, right);
}
//-----------------------------------------------------------------------
// -----------------------------------------------------------------------
/**
* <p>Gets the left element from this triple.</p>
* <p>
* Gets the left element from this triple.
* </p>
*
* @return the left element, may be null
*/
public abstract L getLeft();
/**
* <p>Gets the middle element from this triple.</p>
* <p>
* Gets the middle element from this triple.
* </p>
*
* @return the middle element, may be null
*/
public abstract M getMiddle();
/**
* <p>Gets the right element from this triple.</p>
* <p>
* Gets the right element from this triple.
* </p>
*
* @return the right element, may be null
*/
public abstract R getRight();
//-----------------------------------------------------------------------
// -----------------------------------------------------------------------
/**
* <p>Compares the triple based on the left element, followed by the middle element,
* finally the right element.
* The types must be {@code Comparable}.</p>
* <p>
* Compares the triple based on the left element, followed by the middle element, finally the right element. The types
* must be {@code Comparable}.
* </p>
*
* @param other the other triple, not null
* @return negative if this is less, zero if equal, positive if greater
@@ -109,7 +125,9 @@ public abstract class Triple<L, M, R> implements Comparable<Triple<L, M, R>>, Se
}
/**
* <p>Compares this triple to another based on the three elements.</p>
* <p>
* Compares this triple to another based on the three elements.
* </p>
*
* @param obj the object to compare to, null returns false
* @return true if the elements of the triple are equal
@@ -122,27 +140,29 @@ public abstract class Triple<L, M, R> implements Comparable<Triple<L, M, R>>, Se
}
if (obj instanceof Triple<?, ?, ?>) {
final Triple<?, ?, ?> other = (Triple<?, ?, ?>) obj;
return getLeft().equals(other.getLeft())
&& getMiddle().equals(other.getMiddle())
return getLeft().equals(other.getLeft()) && getMiddle().equals(other.getMiddle())
&& getRight().equals(other.getRight());
}
return false;
}
/**
* <p>Returns a suitable hash code.</p>
* <p>
* Returns a suitable hash code.
* </p>
*
* @return the hash code
*/
@Override
public int hashCode() {
return (getLeft() == null ? 0 : getLeft().hashCode())
^ (getMiddle() == null ? 0 : getMiddle().hashCode())
return (getLeft() == null ? 0 : getLeft().hashCode()) ^ (getMiddle() == null ? 0 : getMiddle().hashCode())
^ (getRight() == null ? 0 : getRight().hashCode());
}
/**
* <p>Returns a String representation of this triple using the format {@code ($left,$middle,$right)}.</p>
* <p>
* Returns a String representation of this triple using the format {@code ($left,$middle,$right)}.
* </p>
*
* @return a string describing this object, not null
*/
@@ -153,12 +173,15 @@ public abstract class Triple<L, M, R> implements Comparable<Triple<L, M, R>>, Se
}
/**
* <p>Formats the receiver using the given format.</p>
* <p>
* Formats the receiver using the given format.
* </p>
*
* <p>This uses {@link java.util.Formattable} to perform the formatting. Three variables may
* be used to embed the left and right elements. Use {@code %1$s} for the left
* element, {@code %2$s} for the middle and {@code %3$s} for the right element.
* The default format used by {@code toString()} is {@code (%1$s,%2$s,%3$s)}.</p>
* <p>
* This uses {@link java.util.Formattable} to perform the formatting. Three variables may be used to embed the left
* and right elements. Use {@code %1$s} for the left element, {@code %2$s} for the middle and {@code %3$s} for the
* right element. The default format used by {@code toString()} is {@code (%1$s,%2$s,%3$s)}.
* </p>
*
* @param format the format string, optionally containing {@code %1$s}, {@code %2$s} and {@code %3$s}, not null
* @return the formatted string, not null

View File

@@ -37,9 +37,9 @@ import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
/**
* Executor which orchestrates concurrent producers and consumers communicating through a bounded in-memory queue.
* This class takes as input the size limit, queue producer(s), consumer and transformer
* and exposes API to orchestrate concurrent execution of these actors communicating through a central bounded queue
* Executor which orchestrates concurrent producers and consumers communicating through a bounded in-memory queue. This
* class takes as input the size limit, queue producer(s), consumer and transformer and exposes API to orchestrate
* concurrent execution of these actors communicating through a central bounded queue
*/
public class BoundedInMemoryExecutor<I, O, E> {
@@ -54,17 +54,13 @@ public class BoundedInMemoryExecutor<I, O, E> {
// Consumer
private final Option<BoundedInMemoryQueueConsumer<O, E>> consumer;
public BoundedInMemoryExecutor(final long bufferLimitInBytes,
BoundedInMemoryQueueProducer<I> producer,
Option<BoundedInMemoryQueueConsumer<O, E>> consumer,
final Function<I, O> transformFunction) {
public BoundedInMemoryExecutor(final long bufferLimitInBytes, BoundedInMemoryQueueProducer<I> producer,
Option<BoundedInMemoryQueueConsumer<O, E>> consumer, final Function<I, O> transformFunction) {
this(bufferLimitInBytes, Arrays.asList(producer), consumer, transformFunction, new DefaultSizeEstimator<>());
}
public BoundedInMemoryExecutor(final long bufferLimitInBytes,
List<BoundedInMemoryQueueProducer<I>> producers,
Option<BoundedInMemoryQueueConsumer<O, E>> consumer,
final Function<I, O> transformFunction,
public BoundedInMemoryExecutor(final long bufferLimitInBytes, List<BoundedInMemoryQueueProducer<I>> producers,
Option<BoundedInMemoryQueueConsumer<O, E>> consumer, final Function<I, O> transformFunction,
final SizeEstimator<O> sizeEstimator) {
this.producers = producers;
this.consumer = consumer;
@@ -74,8 +70,7 @@ public class BoundedInMemoryExecutor<I, O, E> {
}
/**
* Callback to implement environment specific behavior before executors (producers/consumer)
* run.
* Callback to implement environment specific behavior before executors (producers/consumer) run.
*/
public void preExecute() {
// Do Nothing in general context
@@ -118,20 +113,19 @@ public class BoundedInMemoryExecutor<I, O, E> {
*/
private Future<E> startConsumer() {
return consumer.map(consumer -> {
return executorService.submit(
() -> {
logger.info("starting consumer thread");
preExecute();
try {
E result = consumer.consume(queue);
logger.info("Queue Consumption is done; notifying producer threads");
return result;
} catch (Exception e) {
logger.error("error consuming records", e);
queue.markAsFailed(e);
throw e;
}
});
return executorService.submit(() -> {
logger.info("starting consumer thread");
preExecute();
try {
E result = consumer.consume(queue);
logger.info("Queue Consumption is done; notifying producer threads");
return result;
} catch (Exception e) {
logger.error("error consuming records", e);
queue.markAsFailed(e);
throw e;
}
});
}).orElse(CompletableFuture.completedFuture(null));
}

View File

@@ -36,12 +36,12 @@ import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
/**
* Used for enqueueing input records. Queue limit is controlled by {@link #memoryLimit}.
* Unlike standard bounded queue implementations, this queue bounds the size by memory bytes occupied by its
* tenants. The standard implementation bounds by the number of entries in the queue.
* Used for enqueueing input records. Queue limit is controlled by {@link #memoryLimit}. Unlike standard bounded queue
* implementations, this queue bounds the size by memory bytes occupied by its tenants. The standard implementation
* bounds by the number of entries in the queue.
*
* It internally samples every {@link #RECORD_SAMPLING_RATE}th record and adjusts number of records in
* queue accordingly. This is done to ensure that we don't OOM.
* It internally samples every {@link #RECORD_SAMPLING_RATE}th record and adjusts number of records in queue
* accordingly. This is done to ensure that we don't OOM.
*
* This queue supports multiple producer single consumer pattern.
*
@@ -65,8 +65,7 @@ public class BoundedInMemoryQueue<I, O> implements Iterable<O> {
// used for sampling records with "RECORD_SAMPLING_RATE" frequency.
public final AtomicLong samplingRecordCounter = new AtomicLong(-1);
// internal queue for records.
private final LinkedBlockingQueue<Option<O>> queue = new
LinkedBlockingQueue<>();
private final LinkedBlockingQueue<Option<O>> queue = new LinkedBlockingQueue<>();
// maximum amount of memory to be used for queueing records.
private final long memoryLimit;
// it holds the root cause of the exception in case either queueing records (consuming from
@@ -96,24 +95,21 @@ public class BoundedInMemoryQueue<I, O> implements Iterable<O> {
/**
* Construct BoundedInMemoryQueue with default SizeEstimator
*
* @param memoryLimit MemoryLimit in bytes
* @param memoryLimit MemoryLimit in bytes
* @param transformFunction Transformer Function to convert input payload type to stored payload type
*/
public BoundedInMemoryQueue(final long memoryLimit, final Function<I, O> transformFunction) {
this(memoryLimit, transformFunction, new DefaultSizeEstimator() {
});
this(memoryLimit, transformFunction, new DefaultSizeEstimator() {});
}
/**
* Construct BoundedInMemoryQueue with passed in size estimator
*
* @param memoryLimit MemoryLimit in bytes
* @param transformFunction Transformer Function to convert input payload type to stored payload type
* @param memoryLimit MemoryLimit in bytes
* @param transformFunction Transformer Function to convert input payload type to stored payload type
* @param payloadSizeEstimator Payload Size Estimator
*/
public BoundedInMemoryQueue(
final long memoryLimit,
final Function<I, O> transformFunction,
public BoundedInMemoryQueue(final long memoryLimit, final Function<I, O> transformFunction,
final SizeEstimator<O> payloadSizeEstimator) {
this.memoryLimit = memoryLimit;
this.transformFunction = transformFunction;
@@ -127,9 +123,9 @@ public class BoundedInMemoryQueue<I, O> implements Iterable<O> {
}
/**
* Samples records with "RECORD_SAMPLING_RATE" frequency and computes average record size in bytes. It is used
* for determining how many maximum records to queue. Based on change in avg size it ma increase or decrease
* available permits.
* Samples records with "RECORD_SAMPLING_RATE" frequency and computes average record size in bytes. It is used for
* determining how many maximum records to queue. Based on change in avg size it ma increase or decrease available
* permits.
*
* @param payload Payload to size
*/
@@ -139,10 +135,10 @@ public class BoundedInMemoryQueue<I, O> implements Iterable<O> {
}
final long recordSizeInBytes = payloadSizeEstimator.sizeEstimate(payload);
final long newAvgRecordSizeInBytes = Math
.max(1, (avgRecordSizeInBytes * numSamples + recordSizeInBytes) / (numSamples + 1));
final int newRateLimit = (int) Math
.min(RECORD_CACHING_LIMIT, Math.max(1, this.memoryLimit / newAvgRecordSizeInBytes));
final long newAvgRecordSizeInBytes =
Math.max(1, (avgRecordSizeInBytes * numSamples + recordSizeInBytes) / (numSamples + 1));
final int newRateLimit =
(int) Math.min(RECORD_CACHING_LIMIT, Math.max(1, this.memoryLimit / newAvgRecordSizeInBytes));
// If there is any change in number of records to cache then we will either release (if it increased) or acquire
// (if it decreased) to adjust rate limiting to newly computed value.
@@ -187,8 +183,8 @@ public class BoundedInMemoryQueue<I, O> implements Iterable<O> {
}
/**
* Reader interface but never exposed to outside world as this is a single consumer queue.
* Reading is done through a singleton iterator for this queue.
* Reader interface but never exposed to outside world as this is a single consumer queue. Reading is done through a
* singleton iterator for this queue.
*/
private Option<O> readNextRecord() {
if (this.isReadDone.get()) {

View File

@@ -19,8 +19,7 @@
package org.apache.hudi.common.util.queue;
/**
* Producer for BoundedInMemoryQueue. Memory Bounded Buffer supports
* multiple producers single consumer pattern.
* Producer for BoundedInMemoryQueue. Memory Bounded Buffer supports multiple producers single consumer pattern.
*
* @param <I> Input type for buffer items produced
*/

View File

@@ -32,15 +32,13 @@ public class DefaultHoodieConfig implements Serializable {
this.props = props;
}
public static void setDefaultOnCondition(Properties props, boolean condition, String propName,
String defaultValue) {
public static void setDefaultOnCondition(Properties props, boolean condition, String propName, String defaultValue) {
if (condition) {
props.setProperty(propName, defaultValue);
}
}
public static void setDefaultOnCondition(Properties props, boolean condition,
DefaultHoodieConfig config) {
public static void setDefaultOnCondition(Properties props, boolean condition, DefaultHoodieConfig config) {
if (condition) {
props.putAll(config.getProps());
}

View File

@@ -23,7 +23,9 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
/**
* <p> Exception thrown to indicate that a hoodie dataset was not found on the path provided <p>
* <p>
* Exception thrown to indicate that a hoodie dataset was not found on the path provided
* <p>
*/
public class DatasetNotFoundException extends HoodieException {
@@ -50,8 +52,7 @@ public class DatasetNotFoundException extends HoodieException {
// if the base path is file:///, then we have a IllegalArgumentException
throw new DatasetNotFoundException(metaPathDir.toString());
} catch (IOException e) {
throw new HoodieIOException(
"Could not check if dataset " + basePathDir + " is valid dataset", e);
throw new HoodieIOException("Could not check if dataset " + basePathDir + " is valid dataset", e);
}
}
}

Some files were not shown because too many files have changed in this diff Show More