Fix javadoc issues. Prelude to pushing libs to maven central
This commit is contained in:
@@ -60,7 +60,7 @@ import scala.Tuple2;
|
||||
* Provides first class support for accessing Hoodie tables for data processing via Apache Spark.
|
||||
*
|
||||
*
|
||||
* TODO: Need to move all read operations here, since Hoodie is a single writer & multiple reader
|
||||
* TODO: Need to move all read operations here, since Hoodie is a single writer and multiple reader
|
||||
*/
|
||||
public class HoodieReadClient implements Serializable {
|
||||
|
||||
@@ -247,7 +247,7 @@ public class HoodieReadClient implements Serializable {
|
||||
|
||||
/**
|
||||
* Checks if the given [Keys] exists in the hoodie table and returns [Key,
|
||||
* Optional<FullFilePath>] If the optional FullFilePath value is not present, then the key is
|
||||
* Optional[FullFilePath]] If the optional FullFilePath value is not present, then the key is
|
||||
* not found. If the FullFilePath value is present, it is the path component (without scheme) of
|
||||
* the URI underlying file
|
||||
*/
|
||||
|
||||
@@ -105,7 +105,6 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
|
||||
* @param jsc
|
||||
* @param clientConfig
|
||||
* @param rollbackInFlight
|
||||
* @throws Exception
|
||||
*/
|
||||
public HoodieWriteClient(JavaSparkContext jsc, HoodieWriteConfig clientConfig, boolean rollbackInFlight) {
|
||||
this.fs = FSUtils.getFs();
|
||||
@@ -234,7 +233,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
|
||||
*
|
||||
* @param records HoodieRecords to insert
|
||||
* @param commitTime Commit Time handle
|
||||
* @return JavaRDD<WriteStatus> - RDD of WriteStatus to inspect errors and counts
|
||||
* @return JavaRDD[WriteStatus] - RDD of WriteStatus to inspect errors and counts
|
||||
*
|
||||
*/
|
||||
public JavaRDD<WriteStatus> insert(JavaRDD<HoodieRecord<T>> records, final String commitTime) {
|
||||
|
||||
@@ -33,7 +33,7 @@ import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* Base class for different types of indexes to determine the mapping from uuid
|
||||
* <p/>
|
||||
*
|
||||
* TODO(vc): need methods for recovery and rollback
|
||||
*/
|
||||
public abstract class HoodieIndex<T extends HoodieRecordPayload> implements Serializable {
|
||||
@@ -53,7 +53,7 @@ public abstract class HoodieIndex<T extends HoodieRecordPayload> implements Seri
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the given [Keys] exists in the hoodie table and returns [Key, Optional<FullFilePath>]
|
||||
* Checks if the given [Keys] exists in the hoodie table and returns [Key, Optional[FullFilePath]]
|
||||
* If the optional FullFilePath value is not present, then the key is not found. If the FullFilePath
|
||||
* value is present, it is the path component (without scheme) of the URI underlying file
|
||||
*
|
||||
@@ -74,7 +74,7 @@ public abstract class HoodieIndex<T extends HoodieRecordPayload> implements Seri
|
||||
|
||||
/**
|
||||
* Extracts the location of written records, and updates the index.
|
||||
* <p/>
|
||||
*
|
||||
* TODO(vc): We may need to propagate the record as well in a WriteStatus class
|
||||
*/
|
||||
public abstract JavaRDD<WriteStatus> updateLocation(JavaRDD<WriteStatus> writeStatusRDD,
|
||||
|
||||
@@ -64,9 +64,9 @@ public class HoodieInsertHandle<T extends HoodieRecordPayload> extends HoodieIOH
|
||||
|
||||
/**
|
||||
* Determines whether we can accept the incoming records, into the current file, depending on
|
||||
* <p/>
|
||||
*
|
||||
* - Whether it belongs to the same partitionPath as existing records
|
||||
* - Whether the current file written bytes < max file size
|
||||
* - Whether the current file written bytes lt max file size
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
|
||||
@@ -26,11 +26,7 @@ import org.apache.commons.configuration.ConfigurationException;
|
||||
import java.io.Closeable;
|
||||
|
||||
/**
|
||||
* This is the main class of the metrics system. To use it,
|
||||
* users need to call the {@link #init(HoodieMetricsConfig) init} method to initialize the system.
|
||||
* Input for {@link #init(HoodieMetricsConfig) init} includes a configuration object, where
|
||||
* users can specify the reporter type, and special configs for that reporter.
|
||||
* Refer to {@see MetricsConfiguration} for more configurable fields.
|
||||
* This is the main class of the metrics system.
|
||||
*/
|
||||
public class Metrics {
|
||||
private static volatile boolean initialized = false;
|
||||
|
||||
@@ -115,7 +115,7 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper class for a bucket's type (INSERT & UPDATE) and its file location
|
||||
* Helper class for a bucket's type (INSERT and UPDATE) and its file location
|
||||
*/
|
||||
class BucketInfo implements Serializable {
|
||||
BucketType bucketType;
|
||||
|
||||
@@ -62,147 +62,6 @@ import org.apache.hadoop.util.MergeSort;
|
||||
import org.apache.hadoop.util.PriorityQueue;
|
||||
import org.apache.hadoop.util.Time;
|
||||
|
||||
/**
|
||||
* <code>SequenceFile</code>s are flat files consisting of binary key/value
|
||||
* pairs.
|
||||
*
|
||||
* <p><code>SequenceFile</code> provides {@link HoodieAppendLog.Writer},
|
||||
* {@link HoodieAppendLog.Reader} and {@link Sorter} classes for writing,
|
||||
* reading and sorting respectively.</p>
|
||||
*
|
||||
* There are three <code>SequenceFile</code> <code>Writer</code>s based on the
|
||||
* {@link CompressionType} used to compress key/value pairs:
|
||||
* <ol>
|
||||
* <li>
|
||||
* <code>Writer</code> : Uncompressed records.
|
||||
* </li>
|
||||
* <li>
|
||||
* <code>RecordCompressWriter</code> : Record-compressed files, only compress
|
||||
* values.
|
||||
* </li>
|
||||
* <li>
|
||||
* <code>BlockCompressWriter</code> : Block-compressed files, both keys &
|
||||
* values are collected in 'blocks'
|
||||
* separately and compressed. The size of
|
||||
* the 'block' is configurable.
|
||||
* </ol>
|
||||
*
|
||||
* <p>The actual compression algorithm used to compress key and/or values can be
|
||||
* specified by using the appropriate {@link CompressionCodec}.</p>
|
||||
*
|
||||
* <p>The recommended way is to use the static <tt>createWriter</tt> methods
|
||||
* provided by the <code>SequenceFile</code> to chose the preferred format.</p>
|
||||
*
|
||||
* <p>The {@link HoodieAppendLog.Reader} acts as the bridge and can read any of the
|
||||
* above <code>SequenceFile</code> formats.</p>
|
||||
*
|
||||
* <h4 id="Formats">SequenceFile Formats</h4>
|
||||
*
|
||||
* <p>Essentially there are 3 different formats for <code>SequenceFile</code>s
|
||||
* depending on the <code>CompressionType</code> specified. All of them share a
|
||||
* <a href="#Header">common header</a> described below.
|
||||
*
|
||||
* <h5 id="Header">SequenceFile Header</h5>
|
||||
* <ul>
|
||||
* <li>
|
||||
* version - 3 bytes of magic header <b>SEQ</b>, followed by 1 byte of actual
|
||||
* version number (e.g. SEQ4 or SEQ6)
|
||||
* </li>
|
||||
* <li>
|
||||
* keyClassName -key class
|
||||
* </li>
|
||||
* <li>
|
||||
* valueClassName - value class
|
||||
* </li>
|
||||
* <li>
|
||||
* compression - A boolean which specifies if compression is turned on for
|
||||
* keys/values in this file.
|
||||
* </li>
|
||||
* <li>
|
||||
* blockCompression - A boolean which specifies if block-compression is
|
||||
* turned on for keys/values in this file.
|
||||
* </li>
|
||||
* <li>
|
||||
* compression codec - <code>CompressionCodec</code> class which is used for
|
||||
* compression of keys and/or values (if compression is
|
||||
* enabled).
|
||||
* </li>
|
||||
* <li>
|
||||
* metadata - {@link Metadata} for this file.
|
||||
* </li>
|
||||
* <li>
|
||||
* sync - A sync marker to denote end of the header.
|
||||
* </li>
|
||||
* </ul>
|
||||
*
|
||||
* <h5 id="#UncompressedFormat">Uncompressed SequenceFile Format</h5>
|
||||
* <ul>
|
||||
* <li>
|
||||
* <a href="#Header">Header</a>
|
||||
* </li>
|
||||
* <li>
|
||||
* Record
|
||||
* <ul>
|
||||
* <li>Record length</li>
|
||||
* <li>Key length</li>
|
||||
* <li>Key</li>
|
||||
* <li>Value</li>
|
||||
* </ul>
|
||||
* </li>
|
||||
* <li>
|
||||
* A sync-marker every few <code>100</code> bytes or so.
|
||||
* </li>
|
||||
* </ul>
|
||||
*
|
||||
* <h5 id="#RecordCompressedFormat">Record-Compressed SequenceFile Format</h5>
|
||||
* <ul>
|
||||
* <li>
|
||||
* <a href="#Header">Header</a>
|
||||
* </li>
|
||||
* <li>
|
||||
* Record
|
||||
* <ul>
|
||||
* <li>Record length</li>
|
||||
* <li>Key length</li>
|
||||
* <li>Key</li>
|
||||
* <li><i>Compressed</i> Value</li>
|
||||
* </ul>
|
||||
* </li>
|
||||
* <li>
|
||||
* A sync-marker every few <code>100</code> bytes or so.
|
||||
* </li>
|
||||
* </ul>
|
||||
*
|
||||
* <h5 id="#BlockCompressedFormat">Block-Compressed SequenceFile Format</h5>
|
||||
* <ul>
|
||||
* <li>
|
||||
* <a href="#Header">Header</a>
|
||||
* </li>
|
||||
* <li>
|
||||
* Record <i>Block</i>
|
||||
* <ul>
|
||||
* <li>Uncompressed number of records in the block</li>
|
||||
* <li>Compressed key-lengths block-size</li>
|
||||
* <li>Compressed key-lengths block</li>
|
||||
* <li>Compressed keys block-size</li>
|
||||
* <li>Compressed keys block</li>
|
||||
* <li>Compressed value-lengths block-size</li>
|
||||
* <li>Compressed value-lengths block</li>
|
||||
* <li>Compressed values block-size</li>
|
||||
* <li>Compressed values block</li>
|
||||
* </ul>
|
||||
* </li>
|
||||
* <li>
|
||||
* A sync-marker every block.
|
||||
* </li>
|
||||
* </ul>
|
||||
*
|
||||
* <p>The compressed blocks of key lengths and value lengths consist of the
|
||||
* actual lengths of individual keys/values encoded in ZeroCompressedInteger
|
||||
* format.</p>
|
||||
*
|
||||
* @see CompressionCodec
|
||||
*/
|
||||
@InterfaceAudience.Public
|
||||
@InterfaceStability.Stable
|
||||
public class HoodieAppendLog {
|
||||
@@ -1930,7 +1789,7 @@ public class HoodieAppendLog {
|
||||
* @param fs The file system used to open the file.
|
||||
* @param file The file being read.
|
||||
* @param bufferSize The buffer size used to read the file.
|
||||
* @param length The length being read if it is >= 0. Otherwise,
|
||||
* @param length The length being read if it is gteq 0. Otherwise,
|
||||
* the length is not available.
|
||||
* @return The opened stream.
|
||||
* @throws IOException
|
||||
|
||||
@@ -128,7 +128,7 @@ public class HoodieCommits implements Serializable {
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the nth commit from the latest commit such that lastCommit(0) => lastCommit()
|
||||
* Returns the nth commit from the latest commit such that lastCommit(0) gteq lastCommit()
|
||||
*/
|
||||
public String lastCommit(int n) {
|
||||
if (commitList.size() < n + 1) {
|
||||
|
||||
@@ -77,7 +77,6 @@ public class HoodieTableMetadata implements Serializable {
|
||||
* @param fs
|
||||
* @param basePath
|
||||
* @param tableName
|
||||
* @throws IOException
|
||||
*/
|
||||
public HoodieTableMetadata(FileSystem fs, String basePath, String tableName) {
|
||||
this(fs, basePath, tableName, true);
|
||||
@@ -87,7 +86,6 @@ public class HoodieTableMetadata implements Serializable {
|
||||
* Constructor which loads the hoodie table metadata, It requires the meta-data to be present already
|
||||
* @param fs
|
||||
* @param basePath
|
||||
* @throws IOException
|
||||
*/
|
||||
public HoodieTableMetadata(FileSystem fs, String basePath) {
|
||||
this(fs, basePath, null, false);
|
||||
@@ -137,8 +135,7 @@ public class HoodieTableMetadata implements Serializable {
|
||||
* Returns all the commit metadata for this table. Reads all the commit files from HDFS.
|
||||
* Expensive operation, use with caution.
|
||||
*
|
||||
* @return SortedMap of CommitTime,<class>HoodieCommitMetadata</class>
|
||||
* @throws IOException
|
||||
* @return SortedMap of CommitTime,HoodieCommitMetadata
|
||||
*/
|
||||
public SortedMap<String, HoodieCommitMetadata> getAllCommitMetadata() {
|
||||
try {
|
||||
@@ -169,7 +166,7 @@ public class HoodieTableMetadata implements Serializable {
|
||||
|
||||
/**
|
||||
* Lookup the file name for specified <code>HoodieRecord</code>
|
||||
* <p/>
|
||||
*
|
||||
* TODO(vc): This metadata needs to be cached in each executor, statically, and used across, if
|
||||
* we need to be nicer to the NameNode
|
||||
*/
|
||||
@@ -200,7 +197,7 @@ public class HoodieTableMetadata implements Serializable {
|
||||
|
||||
|
||||
/**
|
||||
* Get only the latest file in the partition with precondition commitTime(file) < maxCommitTime
|
||||
* Get only the latest file in the partition with precondition commitTime(file) lt maxCommitTime
|
||||
*
|
||||
* @param fs
|
||||
* @param partitionPathStr
|
||||
|
||||
@@ -138,7 +138,7 @@ public class SchemaUtil {
|
||||
* Returns equivalent Hive table schema read from a parquet file
|
||||
*
|
||||
* @param messageType : Parquet Schema
|
||||
* @return : Hive Table schema read from parquet file MAP<String,String>
|
||||
* @return : Hive Table schema read from parquet file MAP[String,String]
|
||||
* @throws IOException
|
||||
*/
|
||||
public static Map<String, String> convertParquetSchemaToHiveSchema(MessageType messageType)
|
||||
|
||||
8
pom.xml
8
pom.xml
@@ -22,6 +22,10 @@
|
||||
<artifactId>hoodie</artifactId>
|
||||
<packaging>pom</packaging>
|
||||
<version>0.2.7-SNAPSHOT</version>
|
||||
<description>Hoodie is a Apache Spark library that provides the ability to efficiently do incremental processing on datasets in HDFS</description>
|
||||
<url>https://github.com/uber/hoodie</url>
|
||||
<name>Hoodie</name>
|
||||
|
||||
<modules>
|
||||
<module>hoodie-common</module>
|
||||
<module>hoodie-client</module>
|
||||
@@ -498,10 +502,6 @@
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
<configuration>
|
||||
<failOnError>false</failOnError>
|
||||
<quiet>true</quiet>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
|
||||
Reference in New Issue
Block a user