1
0

Fix javadoc issues. Prelude to pushing libs to maven central

This commit is contained in:
Prasanna Rajaperumal
2016-12-21 17:34:29 -08:00
parent 12885ab60a
commit b6d8e857f0
11 changed files with 20 additions and 169 deletions

View File

@@ -62,147 +62,6 @@ import org.apache.hadoop.util.MergeSort;
import org.apache.hadoop.util.PriorityQueue;
import org.apache.hadoop.util.Time;
/**
* <code>SequenceFile</code>s are flat files consisting of binary key/value
* pairs.
*
* <p><code>SequenceFile</code> provides {@link HoodieAppendLog.Writer},
* {@link HoodieAppendLog.Reader} and {@link Sorter} classes for writing,
* reading and sorting respectively.</p>
*
* There are three <code>SequenceFile</code> <code>Writer</code>s based on the
* {@link CompressionType} used to compress key/value pairs:
* <ol>
* <li>
* <code>Writer</code> : Uncompressed records.
* </li>
* <li>
* <code>RecordCompressWriter</code> : Record-compressed files, only compress
* values.
* </li>
* <li>
* <code>BlockCompressWriter</code> : Block-compressed files, both keys &
* values are collected in 'blocks'
* separately and compressed. The size of
* the 'block' is configurable.
* </ol>
*
* <p>The actual compression algorithm used to compress key and/or values can be
* specified by using the appropriate {@link CompressionCodec}.</p>
*
* <p>The recommended way is to use the static <tt>createWriter</tt> methods
* provided by the <code>SequenceFile</code> to chose the preferred format.</p>
*
* <p>The {@link HoodieAppendLog.Reader} acts as the bridge and can read any of the
* above <code>SequenceFile</code> formats.</p>
*
* <h4 id="Formats">SequenceFile Formats</h4>
*
* <p>Essentially there are 3 different formats for <code>SequenceFile</code>s
* depending on the <code>CompressionType</code> specified. All of them share a
* <a href="#Header">common header</a> described below.
*
* <h5 id="Header">SequenceFile Header</h5>
* <ul>
* <li>
* version - 3 bytes of magic header <b>SEQ</b>, followed by 1 byte of actual
* version number (e.g. SEQ4 or SEQ6)
* </li>
* <li>
* keyClassName -key class
* </li>
* <li>
* valueClassName - value class
* </li>
* <li>
* compression - A boolean which specifies if compression is turned on for
* keys/values in this file.
* </li>
* <li>
* blockCompression - A boolean which specifies if block-compression is
* turned on for keys/values in this file.
* </li>
* <li>
* compression codec - <code>CompressionCodec</code> class which is used for
* compression of keys and/or values (if compression is
* enabled).
* </li>
* <li>
* metadata - {@link Metadata} for this file.
* </li>
* <li>
* sync - A sync marker to denote end of the header.
* </li>
* </ul>
*
* <h5 id="#UncompressedFormat">Uncompressed SequenceFile Format</h5>
* <ul>
* <li>
* <a href="#Header">Header</a>
* </li>
* <li>
* Record
* <ul>
* <li>Record length</li>
* <li>Key length</li>
* <li>Key</li>
* <li>Value</li>
* </ul>
* </li>
* <li>
* A sync-marker every few <code>100</code> bytes or so.
* </li>
* </ul>
*
* <h5 id="#RecordCompressedFormat">Record-Compressed SequenceFile Format</h5>
* <ul>
* <li>
* <a href="#Header">Header</a>
* </li>
* <li>
* Record
* <ul>
* <li>Record length</li>
* <li>Key length</li>
* <li>Key</li>
* <li><i>Compressed</i> Value</li>
* </ul>
* </li>
* <li>
* A sync-marker every few <code>100</code> bytes or so.
* </li>
* </ul>
*
* <h5 id="#BlockCompressedFormat">Block-Compressed SequenceFile Format</h5>
* <ul>
* <li>
* <a href="#Header">Header</a>
* </li>
* <li>
* Record <i>Block</i>
* <ul>
* <li>Uncompressed number of records in the block</li>
* <li>Compressed key-lengths block-size</li>
* <li>Compressed key-lengths block</li>
* <li>Compressed keys block-size</li>
* <li>Compressed keys block</li>
* <li>Compressed value-lengths block-size</li>
* <li>Compressed value-lengths block</li>
* <li>Compressed values block-size</li>
* <li>Compressed values block</li>
* </ul>
* </li>
* <li>
* A sync-marker every block.
* </li>
* </ul>
*
* <p>The compressed blocks of key lengths and value lengths consist of the
* actual lengths of individual keys/values encoded in ZeroCompressedInteger
* format.</p>
*
* @see CompressionCodec
*/
@InterfaceAudience.Public
@InterfaceStability.Stable
public class HoodieAppendLog {
@@ -1930,7 +1789,7 @@ public class HoodieAppendLog {
* @param fs The file system used to open the file.
* @param file The file being read.
* @param bufferSize The buffer size used to read the file.
* @param length The length being read if it is >= 0. Otherwise,
* @param length The length being read if it is gteq 0. Otherwise,
* the length is not available.
* @return The opened stream.
* @throws IOException

View File

@@ -128,7 +128,7 @@ public class HoodieCommits implements Serializable {
}
/**
* Returns the nth commit from the latest commit such that lastCommit(0) => lastCommit()
* Returns the nth commit from the latest commit such that lastCommit(0) gteq lastCommit()
*/
public String lastCommit(int n) {
if (commitList.size() < n + 1) {

View File

@@ -77,7 +77,6 @@ public class HoodieTableMetadata implements Serializable {
* @param fs
* @param basePath
* @param tableName
* @throws IOException
*/
public HoodieTableMetadata(FileSystem fs, String basePath, String tableName) {
this(fs, basePath, tableName, true);
@@ -87,7 +86,6 @@ public class HoodieTableMetadata implements Serializable {
* Constructor which loads the hoodie table metadata, It requires the meta-data to be present already
* @param fs
* @param basePath
* @throws IOException
*/
public HoodieTableMetadata(FileSystem fs, String basePath) {
this(fs, basePath, null, false);
@@ -137,8 +135,7 @@ public class HoodieTableMetadata implements Serializable {
* Returns all the commit metadata for this table. Reads all the commit files from HDFS.
* Expensive operation, use with caution.
*
* @return SortedMap of CommitTime,<class>HoodieCommitMetadata</class>
* @throws IOException
* @return SortedMap of CommitTime,HoodieCommitMetadata
*/
public SortedMap<String, HoodieCommitMetadata> getAllCommitMetadata() {
try {
@@ -169,7 +166,7 @@ public class HoodieTableMetadata implements Serializable {
/**
* Lookup the file name for specified <code>HoodieRecord</code>
* <p/>
*
* TODO(vc): This metadata needs to be cached in each executor, statically, and used across, if
* we need to be nicer to the NameNode
*/
@@ -200,7 +197,7 @@ public class HoodieTableMetadata implements Serializable {
/**
* Get only the latest file in the partition with precondition commitTime(file) < maxCommitTime
* Get only the latest file in the partition with precondition commitTime(file) lt maxCommitTime
*
* @param fs
* @param partitionPathStr