[HUDI-379] Refactor the codes based on new JavadocStyle code style rule (#1079)
This commit is contained in:
@@ -61,7 +61,7 @@ import java.util.stream.Collectors;
|
||||
import static org.apache.hudi.common.table.HoodieTimeline.COMPACTION_ACTION;
|
||||
|
||||
/**
|
||||
* Client to perform admin operations related to compaction
|
||||
* Client to perform admin operations related to compaction.
|
||||
*/
|
||||
public class CompactionAdminClient extends AbstractHoodieClient {
|
||||
|
||||
@@ -214,7 +214,7 @@ public class CompactionAdminClient extends AbstractHoodieClient {
|
||||
}
|
||||
|
||||
/**
|
||||
* Construction Compaction Plan from compaction instant
|
||||
* Construction Compaction Plan from compaction instant.
|
||||
*/
|
||||
private static HoodieCompactionPlan getCompactionPlan(HoodieTableMetaClient metaClient, String compactionInstant)
|
||||
throws IOException {
|
||||
@@ -273,7 +273,7 @@ public class CompactionAdminClient extends AbstractHoodieClient {
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a compaction operation is valid
|
||||
* Check if a compaction operation is valid.
|
||||
*
|
||||
* @param metaClient Hoodie Table Meta client
|
||||
* @param compactionInstant Compaction Instant
|
||||
@@ -342,7 +342,7 @@ public class CompactionAdminClient extends AbstractHoodieClient {
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute Renaming operation
|
||||
* Execute Renaming operation.
|
||||
*
|
||||
* @param metaClient HoodieTable MetaClient
|
||||
* @param renameActions List of rename operations
|
||||
@@ -484,7 +484,7 @@ public class CompactionAdminClient extends AbstractHoodieClient {
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds Operation result for Renaming
|
||||
* Holds Operation result for Renaming.
|
||||
*/
|
||||
public static class RenameOpResult extends OperationResult<RenameInfo> {
|
||||
|
||||
@@ -505,7 +505,7 @@ public class CompactionAdminClient extends AbstractHoodieClient {
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds Operation result for Renaming
|
||||
* Holds Operation result for Renaming.
|
||||
*/
|
||||
public static class ValidationOpResult extends OperationResult<CompactionOperation> {
|
||||
|
||||
|
||||
@@ -103,7 +103,7 @@ public class HoodieCleanClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a Cleaner plan if there are files to be cleaned and stores them in instant file
|
||||
* Creates a Cleaner plan if there are files to be cleaned and stores them in instant file.
|
||||
*
|
||||
* @param startCleanTime Cleaner Instant Time
|
||||
* @return Cleaner Plan if generated
|
||||
@@ -133,7 +133,7 @@ public class HoodieCleanClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
}
|
||||
|
||||
/**
|
||||
* Executes the Cleaner plan stored in the instant metadata
|
||||
* Executes the Cleaner plan stored in the instant metadata.
|
||||
*
|
||||
* @param table Hoodie Table
|
||||
* @param cleanInstantTs Cleaner Instant Timestamp
|
||||
|
||||
@@ -145,7 +145,7 @@ public class HoodieReadClient<T extends HoodieRecordPayload> extends AbstractHoo
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a bunch of hoodie keys, fetches all the individual records out as a data frame
|
||||
* Given a bunch of hoodie keys, fetches all the individual records out as a data frame.
|
||||
*
|
||||
* @return a dataframe
|
||||
*/
|
||||
|
||||
@@ -159,7 +159,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
}
|
||||
|
||||
/**
|
||||
* Upserts a bunch of new records into the Hoodie table, at the supplied commitTime
|
||||
* Upserts a bunch of new records into the Hoodie table, at the supplied commitTime.
|
||||
*/
|
||||
public JavaRDD<WriteStatus> upsert(JavaRDD<HoodieRecord<T>> records, final String commitTime) {
|
||||
HoodieTable<T> table = getTableAndInitCtx(OperationType.UPSERT);
|
||||
@@ -505,14 +505,14 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit changes performed at the given commitTime marker
|
||||
* Commit changes performed at the given commitTime marker.
|
||||
*/
|
||||
public boolean commit(String commitTime, JavaRDD<WriteStatus> writeStatuses) {
|
||||
return commit(commitTime, writeStatuses, Option.empty());
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit changes performed at the given commitTime marker
|
||||
* Commit changes performed at the given commitTime marker.
|
||||
*/
|
||||
public boolean commit(String commitTime, JavaRDD<WriteStatus> writeStatuses,
|
||||
Option<Map<String, String>> extraMetadata) {
|
||||
@@ -988,7 +988,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
}
|
||||
|
||||
/**
|
||||
* Provides a new commit time for a write operation (insert/update)
|
||||
* Provides a new commit time for a write operation (insert/update).
|
||||
*/
|
||||
public String startCommit() {
|
||||
// NOTE : Need to ensure that rollback is done before a new commit is started
|
||||
@@ -1027,7 +1027,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
}
|
||||
|
||||
/**
|
||||
* Schedules a new compaction instant
|
||||
* Schedules a new compaction instant.
|
||||
*/
|
||||
public Option<String> scheduleCompaction(Option<Map<String, String>> extraMetadata) throws IOException {
|
||||
String instantTime = HoodieActiveTimeline.createNewCommitTime();
|
||||
@@ -1037,7 +1037,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
}
|
||||
|
||||
/**
|
||||
* Schedules a new compaction instant with passed-in instant time
|
||||
* Schedules a new compaction instant with passed-in instant time.
|
||||
*
|
||||
* @param instantTime Compaction Instant Time
|
||||
* @param extraMetadata Extra Metadata to be stored
|
||||
@@ -1074,7 +1074,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs Compaction for the workload stored in instant-time
|
||||
* Performs Compaction for the workload stored in instant-time.
|
||||
*
|
||||
* @param compactionInstantTime Compaction Instant Time
|
||||
*/
|
||||
@@ -1141,7 +1141,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleanup all inflight commits
|
||||
* Cleanup all inflight commits.
|
||||
*/
|
||||
private void rollbackInflightCommits() {
|
||||
HoodieTable<T> table = HoodieTable.getHoodieTable(createMetaClient(true), config, jsc);
|
||||
@@ -1197,7 +1197,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
*/
|
||||
|
||||
/**
|
||||
* Ensures compaction instant is in expected state and performs Compaction for the workload stored in instant-time
|
||||
* Ensures compaction instant is in expected state and performs Compaction for the workload stored in instant-time.
|
||||
*
|
||||
* @param compactionInstantTime Compaction Instant Time
|
||||
*/
|
||||
@@ -1226,7 +1226,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform compaction operations as specified in the compaction commit file
|
||||
* Perform compaction operations as specified in the compaction commit file.
|
||||
*
|
||||
* @param compactionInstant Compacton Instant time
|
||||
* @param activeTimeline Active Timeline
|
||||
@@ -1254,7 +1254,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit Compaction and track metrics
|
||||
* Commit Compaction and track metrics.
|
||||
*
|
||||
* @param compactedStatuses Compaction Write status
|
||||
* @param table Hoodie Table
|
||||
@@ -1404,7 +1404,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
}
|
||||
|
||||
/**
|
||||
* Refers to different operation types
|
||||
* Refers to different operation types.
|
||||
*/
|
||||
enum OperationType {
|
||||
INSERT,
|
||||
|
||||
@@ -33,7 +33,7 @@ import org.apache.spark.SparkConf;
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Timeline Service that runs as part of write client
|
||||
* Timeline Service that runs as part of write client.
|
||||
*/
|
||||
public class EmbeddedTimelineService {
|
||||
|
||||
@@ -86,7 +86,7 @@ public class EmbeddedTimelineService {
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves proper view storage configs for remote clients to access this service
|
||||
* Retrieves proper view storage configs for remote clients to access this service.
|
||||
*/
|
||||
public FileSystemViewStorageConfig getRemoteFileSystemViewConfig() {
|
||||
return FileSystemViewStorageConfig.newBuilder().withStorageType(FileSystemViewStorageType.REMOTE_FIRST)
|
||||
|
||||
@@ -26,7 +26,7 @@ import org.apache.spark.api.java.JavaSparkContext;
|
||||
public class ClientUtils {
|
||||
|
||||
/**
|
||||
* Create Consistency Aware MetaClient
|
||||
* Create Consistency Aware MetaClient.
|
||||
*
|
||||
* @param jsc JavaSparkContext
|
||||
* @param config HoodieWriteConfig
|
||||
|
||||
@@ -33,7 +33,7 @@ import java.io.IOException;
|
||||
import java.util.Properties;
|
||||
|
||||
/**
|
||||
* Compaction related config
|
||||
* Compaction related config.
|
||||
*/
|
||||
@Immutable
|
||||
public class HoodieCompactionConfig extends DefaultHoodieConfig {
|
||||
@@ -55,8 +55,8 @@ public class HoodieCompactionConfig extends DefaultHoodieConfig {
|
||||
// By default, treat any file <= 100MB as a small file.
|
||||
public static final String DEFAULT_PARQUET_SMALL_FILE_LIMIT_BYTES = String.valueOf(104857600);
|
||||
/**
|
||||
* Configs related to specific table types
|
||||
**/
|
||||
* Configs related to specific table types.
|
||||
*/
|
||||
// Number of inserts, that will be put each partition/bucket for writing
|
||||
public static final String COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE = "hoodie.copyonwrite.insert" + ".split.size";
|
||||
// The rationale to pick the insert parallelism is the following. Writing out 100MB files,
|
||||
|
||||
@@ -34,17 +34,17 @@ public class HoodieHBaseIndexConfig extends DefaultHoodieConfig {
|
||||
public static final String HBASE_ZK_ZNODEPARENT = "hoodie.index.hbase.zknode.path";
|
||||
/**
|
||||
* Note that if HBASE_PUT_BATCH_SIZE_AUTO_COMPUTE_PROP is set to true, this batch size will not be honored for HBase
|
||||
* Puts
|
||||
* Puts.
|
||||
*/
|
||||
public static final String HBASE_PUT_BATCH_SIZE_PROP = "hoodie.index.hbase.put.batch.size";
|
||||
|
||||
/**
|
||||
* Property to set which implementation of HBase QPS resource allocator to be used
|
||||
* Property to set which implementation of HBase QPS resource allocator to be used.
|
||||
*/
|
||||
public static final String HBASE_INDEX_QPS_ALLOCATOR_CLASS = "hoodie.index.hbase.qps.allocator.class";
|
||||
public static final String DEFAULT_HBASE_INDEX_QPS_ALLOCATOR_CLASS = DefaultHBaseQPSResourceAllocator.class.getName();
|
||||
/**
|
||||
* Property to set to enable auto computation of put batch size
|
||||
* Property to set to enable auto computation of put batch size.
|
||||
*/
|
||||
public static final String HBASE_PUT_BATCH_SIZE_AUTO_COMPUTE_PROP = "hoodie.index.hbase.put.batch.size.autocompute";
|
||||
public static final String DEFAULT_HBASE_PUT_BATCH_SIZE_AUTO_COMPUTE = "false";
|
||||
@@ -62,7 +62,7 @@ public class HoodieHBaseIndexConfig extends DefaultHoodieConfig {
|
||||
*/
|
||||
public static String HBASE_MAX_QPS_PER_REGION_SERVER_PROP = "hoodie.index.hbase.max.qps.per.region.server";
|
||||
/**
|
||||
* Default batch size, used only for Get, but computed for Put
|
||||
* Default batch size, used only for Get, but computed for Put.
|
||||
*/
|
||||
public static final int DEFAULT_HBASE_BATCH_SIZE = 100;
|
||||
/**
|
||||
@@ -70,17 +70,17 @@ public class HoodieHBaseIndexConfig extends DefaultHoodieConfig {
|
||||
*/
|
||||
public static final int DEFAULT_HBASE_MAX_QPS_PER_REGION_SERVER = 1000;
|
||||
/**
|
||||
* Default is 50%, which means a total of 2 jobs can run using HbaseIndex without overwhelming Region Servers
|
||||
* Default is 50%, which means a total of 2 jobs can run using HbaseIndex without overwhelming Region Servers.
|
||||
*/
|
||||
public static final float DEFAULT_HBASE_QPS_FRACTION = 0.5f;
|
||||
|
||||
/**
|
||||
* Property to decide if HBASE_QPS_FRACTION_PROP is dynamically calculated based on volume
|
||||
* Property to decide if HBASE_QPS_FRACTION_PROP is dynamically calculated based on volume.
|
||||
*/
|
||||
public static final String HOODIE_INDEX_COMPUTE_QPS_DYNAMICALLY = "hoodie.index.hbase.dynamic_qps";
|
||||
public static final boolean DEFAULT_HOODIE_INDEX_COMPUTE_QPS_DYNAMICALLY = false;
|
||||
/**
|
||||
* Min and Max for HBASE_QPS_FRACTION_PROP to stabilize skewed volume workloads
|
||||
* Min and Max for HBASE_QPS_FRACTION_PROP to stabilize skewed volume workloads.
|
||||
*/
|
||||
public static final String HBASE_MIN_QPS_FRACTION_PROP = "hoodie.index.hbase.min.qps.fraction";
|
||||
public static final String DEFAULT_HBASE_MIN_QPS_FRACTION_PROP = "0.002";
|
||||
@@ -88,7 +88,7 @@ public class HoodieHBaseIndexConfig extends DefaultHoodieConfig {
|
||||
public static final String HBASE_MAX_QPS_FRACTION_PROP = "hoodie.index.hbase.max.qps.fraction";
|
||||
public static final String DEFAULT_HBASE_MAX_QPS_FRACTION_PROP = "0.06";
|
||||
/**
|
||||
* Hoodie index desired puts operation time in seconds
|
||||
* Hoodie index desired puts operation time in seconds.
|
||||
*/
|
||||
public static final String HOODIE_INDEX_DESIRED_PUTS_TIME_IN_SECS = "hoodie.index.hbase.desired_puts_time_in_secs";
|
||||
public static final int DEFAULT_HOODIE_INDEX_DESIRED_PUTS_TIME_IN_SECS = 600;
|
||||
|
||||
@@ -29,7 +29,7 @@ import java.util.Properties;
|
||||
|
||||
|
||||
/**
|
||||
* Indexing related config
|
||||
* Indexing related config.
|
||||
*/
|
||||
@Immutable
|
||||
public class HoodieIndexConfig extends DefaultHoodieConfig {
|
||||
|
||||
@@ -29,7 +29,7 @@ import java.io.IOException;
|
||||
import java.util.Properties;
|
||||
|
||||
/**
|
||||
* Memory related config
|
||||
* Memory related config.
|
||||
*/
|
||||
@Immutable
|
||||
public class HoodieMemoryConfig extends DefaultHoodieConfig {
|
||||
|
||||
@@ -26,7 +26,7 @@ import java.io.IOException;
|
||||
import java.util.Properties;
|
||||
|
||||
/**
|
||||
* Storage related config
|
||||
* Storage related config.
|
||||
*/
|
||||
@Immutable
|
||||
public class HoodieStorageConfig extends DefaultHoodieConfig {
|
||||
|
||||
@@ -42,7 +42,7 @@ import java.util.Map;
|
||||
import java.util.Properties;
|
||||
|
||||
/**
|
||||
* Class storing configs for the {@link HoodieWriteClient}
|
||||
* Class storing configs for the {@link HoodieWriteClient}.
|
||||
*/
|
||||
@Immutable
|
||||
public class HoodieWriteConfig extends DefaultHoodieConfig {
|
||||
@@ -115,8 +115,8 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
|
||||
}
|
||||
|
||||
/**
|
||||
* base properties
|
||||
**/
|
||||
* base properties.
|
||||
*/
|
||||
public String getBasePath() {
|
||||
return props.getProperty(BASE_PATH_PROP);
|
||||
}
|
||||
@@ -210,8 +210,8 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
|
||||
}
|
||||
|
||||
/**
|
||||
* compaction properties
|
||||
**/
|
||||
* compaction properties.
|
||||
*/
|
||||
public HoodieCleaningPolicy getCleanerPolicy() {
|
||||
return HoodieCleaningPolicy.valueOf(props.getProperty(HoodieCompactionConfig.CLEANER_POLICY_PROP));
|
||||
}
|
||||
@@ -297,8 +297,8 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
|
||||
}
|
||||
|
||||
/**
|
||||
* index properties
|
||||
**/
|
||||
* index properties.
|
||||
*/
|
||||
public HoodieIndex.IndexType getIndexType() {
|
||||
return HoodieIndex.IndexType.valueOf(props.getProperty(HoodieIndexConfig.INDEX_TYPE_PROP));
|
||||
}
|
||||
@@ -417,8 +417,8 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
|
||||
}
|
||||
|
||||
/**
|
||||
* storage properties
|
||||
**/
|
||||
* storage properties.
|
||||
*/
|
||||
public long getParquetMaxFileSize() {
|
||||
return Long.parseLong(props.getProperty(HoodieStorageConfig.PARQUET_FILE_MAX_BYTES));
|
||||
}
|
||||
@@ -452,8 +452,8 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
|
||||
}
|
||||
|
||||
/**
|
||||
* metrics properties
|
||||
**/
|
||||
* metrics properties.
|
||||
*/
|
||||
public boolean isMetricsOn() {
|
||||
return Boolean.parseBoolean(props.getProperty(HoodieMetricsConfig.METRICS_ON));
|
||||
}
|
||||
@@ -483,7 +483,7 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
|
||||
}
|
||||
|
||||
/**
|
||||
* memory configs
|
||||
* memory configs.
|
||||
*/
|
||||
public Double getMaxMemoryFractionPerPartitionMerge() {
|
||||
return Double.valueOf(props.getProperty(HoodieMemoryConfig.MAX_MEMORY_FRACTION_FOR_MERGE_PROP));
|
||||
|
||||
@@ -20,7 +20,7 @@ package org.apache.hudi.exception;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Exception thrown for any higher level errors when <code>HoodieClient</code> is doing a delta commit
|
||||
* Exception thrown for any higher level errors when <code>HoodieClient</code> is doing a delta commit.
|
||||
* </p>
|
||||
*/
|
||||
public class HoodieAppendException extends HoodieException {
|
||||
|
||||
@@ -20,7 +20,7 @@ package org.apache.hudi.exception;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Exception thrown for any higher level errors when <code>HoodieClient</code> is doing a Commit
|
||||
* Exception thrown for any higher level errors when <code>HoodieClient</code> is doing a Commit.
|
||||
* </p>
|
||||
*/
|
||||
public class HoodieCommitException extends HoodieException {
|
||||
|
||||
@@ -20,7 +20,7 @@ package org.apache.hudi.exception;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Exception thrown when dependent system is not available
|
||||
* Exception thrown when dependent system is not available.
|
||||
* </p>
|
||||
*/
|
||||
public class HoodieDependentSystemUnavailableException extends HoodieException {
|
||||
|
||||
@@ -20,7 +20,7 @@ package org.apache.hudi.exception;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Exception thrown for any higher level errors when <code>HoodieClient</code> is doing a bulk insert
|
||||
* Exception thrown for any higher level errors when <code>HoodieClient</code> is doing a bulk insert.
|
||||
* </p>
|
||||
*/
|
||||
public class HoodieInsertException extends HoodieException {
|
||||
|
||||
@@ -20,7 +20,7 @@ package org.apache.hudi.exception;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Exception thrown for any higher level errors when <code>HoodieClient</code> is doing a incremental upsert
|
||||
* Exception thrown for any higher level errors when <code>HoodieClient</code> is doing a incremental upsert.
|
||||
* </p>
|
||||
*/
|
||||
public class HoodieUpsertException extends HoodieException {
|
||||
|
||||
@@ -30,7 +30,7 @@ import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Map function that handles a sorted stream of HoodieRecords
|
||||
* Map function that handles a sorted stream of HoodieRecords.
|
||||
*/
|
||||
public class BulkInsertMapFunction<T extends HoodieRecordPayload>
|
||||
implements Function2<Integer, Iterator<HoodieRecord<T>>, Iterator<List<WriteStatus>>> {
|
||||
|
||||
@@ -122,7 +122,7 @@ public class CopyOnWriteLazyInsertIterable<T extends HoodieRecordPayload>
|
||||
}
|
||||
|
||||
/**
|
||||
* Consumes stream of hoodie records from in-memory queue and writes to one or more create-handles
|
||||
* Consumes stream of hoodie records from in-memory queue and writes to one or more create-handles.
|
||||
*/
|
||||
protected class CopyOnWriteInsertHandler
|
||||
extends BoundedInMemoryQueueConsumer<HoodieInsertValueGenResult<HoodieRecord>, List<WriteStatus>> {
|
||||
|
||||
@@ -43,7 +43,7 @@ public abstract class LazyIterableIterator<I, O> implements Iterable<O>, Iterato
|
||||
}
|
||||
|
||||
/**
|
||||
* Called once, before any elements are processed
|
||||
* Called once, before any elements are processed.
|
||||
*/
|
||||
protected abstract void start();
|
||||
|
||||
|
||||
@@ -39,7 +39,7 @@ import org.apache.spark.api.java.JavaSparkContext;
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* Base class for different types of indexes to determine the mapping from uuid
|
||||
* Base class for different types of indexes to determine the mapping from uuid.
|
||||
*/
|
||||
public abstract class HoodieIndex<T extends HoodieRecordPayload> implements Serializable {
|
||||
|
||||
|
||||
@@ -99,7 +99,7 @@ public class InMemoryHashIndex<T extends HoodieRecordPayload> extends HoodieInde
|
||||
}
|
||||
|
||||
/**
|
||||
* Only looks up by recordKey
|
||||
* Only looks up by recordKey.
|
||||
*/
|
||||
@Override
|
||||
public boolean isGlobal() {
|
||||
|
||||
@@ -23,7 +23,7 @@ import com.google.common.base.Objects;
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* Metadata about a given file group, useful for index lookup
|
||||
* Metadata about a given file group, useful for index lookup.
|
||||
*/
|
||||
public class BloomIndexFileInfo implements Serializable {
|
||||
|
||||
@@ -62,7 +62,7 @@ public class BloomIndexFileInfo implements Serializable {
|
||||
}
|
||||
|
||||
/**
|
||||
* Does the given key fall within the range (inclusive)
|
||||
* Does the given key fall within the range (inclusive).
|
||||
*/
|
||||
public boolean isKeyInRange(String recordKey) {
|
||||
return minRecordKey.compareTo(recordKey) <= 0 && maxRecordKey.compareTo(recordKey) >= 0;
|
||||
|
||||
@@ -141,7 +141,7 @@ public class HoodieBloomIndex<T extends HoodieRecordPayload> extends HoodieIndex
|
||||
|
||||
/**
|
||||
* Lookup the location for each record key and return the pair<record_key,location> for all record keys already
|
||||
* present and drop the record keys if not present
|
||||
* present and drop the record keys if not present.
|
||||
*/
|
||||
private JavaPairRDD<HoodieKey, HoodieRecordLocation> lookupIndex(
|
||||
JavaPairRDD<String, String> partitionRecordKeyPairRDD, final JavaSparkContext jsc,
|
||||
@@ -167,7 +167,7 @@ public class HoodieBloomIndex<T extends HoodieRecordPayload> extends HoodieIndex
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the estimated number of bloom filter comparisons to be performed on each file group
|
||||
* Compute the estimated number of bloom filter comparisons to be performed on each file group.
|
||||
*/
|
||||
private Map<String, Long> computeComparisonsPerFileGroup(final Map<String, Long> recordsPerPartition,
|
||||
final Map<String, List<BloomIndexFileInfo>> partitionToFileInfo,
|
||||
@@ -278,7 +278,7 @@ public class HoodieBloomIndex<T extends HoodieRecordPayload> extends HoodieIndex
|
||||
}
|
||||
|
||||
/**
|
||||
* This is not global, since we depend on the partitionPath to do the lookup
|
||||
* This is not global, since we depend on the partitionPath to do the lookup.
|
||||
*/
|
||||
@Override
|
||||
public boolean isGlobal() {
|
||||
|
||||
@@ -37,7 +37,7 @@ import java.util.List;
|
||||
import scala.Tuple2;
|
||||
|
||||
/**
|
||||
* Function performing actual checking of RDD partition containing (fileId, hoodieKeys) against the actual files
|
||||
* Function performing actual checking of RDD partition containing (fileId, hoodieKeys) against the actual files.
|
||||
*/
|
||||
public class HoodieBloomIndexCheckFunction
|
||||
implements Function2<Integer, Iterator<Tuple2<String, HoodieKey>>, Iterator<List<KeyLookupResult>>> {
|
||||
|
||||
@@ -106,7 +106,7 @@ public class HoodieGlobalBloomIndex<T extends HoodieRecordPayload> extends Hoodi
|
||||
|
||||
|
||||
/**
|
||||
* Tagging for global index should only consider the record key
|
||||
* Tagging for global index should only consider the record key.
|
||||
*/
|
||||
@Override
|
||||
protected JavaRDD<HoodieRecord<T>> tagLocationBacktoRecords(
|
||||
|
||||
@@ -36,7 +36,7 @@ class IntervalTreeBasedGlobalIndexFileFilter implements IndexFileFilter {
|
||||
private final Set<String> filesWithNoRanges = new HashSet<>();
|
||||
|
||||
/**
|
||||
* Instantiates {@link IntervalTreeBasedGlobalIndexFileFilter}
|
||||
* Instantiates {@link IntervalTreeBasedGlobalIndexFileFilter}.
|
||||
*
|
||||
* @param partitionToFileIndexInfo Map of partition to List of {@link BloomIndexFileInfo}s
|
||||
*/
|
||||
|
||||
@@ -35,7 +35,7 @@ class IntervalTreeBasedIndexFileFilter implements IndexFileFilter {
|
||||
private final Map<String, Set<String>> partitionToFilesWithNoRanges = new HashMap<>();
|
||||
|
||||
/**
|
||||
* Instantiates {@link IntervalTreeBasedIndexFileFilter}
|
||||
* Instantiates {@link IntervalTreeBasedIndexFileFilter}.
|
||||
*
|
||||
* @param partitionToFileIndexInfo Map of partition to List of {@link BloomIndexFileInfo}s
|
||||
*/
|
||||
|
||||
@@ -21,7 +21,7 @@ package org.apache.hudi.index.bloom;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Encapsulates the result from a key lookup
|
||||
* Encapsulates the result from a key lookup.
|
||||
*/
|
||||
public class KeyLookupResult {
|
||||
|
||||
|
||||
@@ -39,7 +39,7 @@ class KeyRangeNode implements Comparable<KeyRangeNode>, Serializable {
|
||||
private KeyRangeNode right = null;
|
||||
|
||||
/**
|
||||
* Instantiates a new {@link KeyRangeNode}
|
||||
* Instantiates a new {@link KeyRangeNode}.
|
||||
*
|
||||
* @param minRecordKey min record key of the index file
|
||||
* @param maxRecordKey max record key of the index file
|
||||
|
||||
@@ -26,7 +26,7 @@ import java.util.Set;
|
||||
class ListBasedGlobalIndexFileFilter extends ListBasedIndexFileFilter {
|
||||
|
||||
/**
|
||||
* Instantiates {@link ListBasedGlobalIndexFileFilter}
|
||||
* Instantiates {@link ListBasedGlobalIndexFileFilter}.
|
||||
*
|
||||
* @param partitionToFileIndexInfo Map of partition to List of {@link BloomIndexFileInfo}
|
||||
*/
|
||||
|
||||
@@ -32,7 +32,7 @@ class ListBasedIndexFileFilter implements IndexFileFilter {
|
||||
final Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo;
|
||||
|
||||
/**
|
||||
* Instantiates {@link ListBasedIndexFileFilter}
|
||||
* Instantiates {@link ListBasedIndexFileFilter}.
|
||||
*
|
||||
* @param partitionToFileIndexInfo Map of partition to List of {@link BloomIndexFileInfo}
|
||||
*/
|
||||
|
||||
@@ -67,7 +67,7 @@ import java.util.List;
|
||||
import scala.Tuple2;
|
||||
|
||||
/**
|
||||
* Hoodie Index implementation backed by HBase
|
||||
* Hoodie Index implementation backed by HBase.
|
||||
*/
|
||||
public class HBaseIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
|
||||
|
||||
@@ -89,7 +89,7 @@ public class HBaseIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
|
||||
private int maxQpsPerRegionServer;
|
||||
/**
|
||||
* multiPutBatchSize will be computed and re-set in updateLocation if
|
||||
* {@link HoodieIndexConfig.HBASE_PUT_BATCH_SIZE_AUTO_COMPUTE_PROP} is set to true
|
||||
* {@link HoodieIndexConfig.HBASE_PUT_BATCH_SIZE_AUTO_COMPUTE_PROP} is set to true.
|
||||
*/
|
||||
private Integer multiPutBatchSize;
|
||||
private Integer numRegionServersForTable;
|
||||
@@ -150,7 +150,7 @@ public class HBaseIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
|
||||
|
||||
/**
|
||||
* Since we are sharing the HbaseConnection across tasks in a JVM, make sure the HbaseConnectio is closed when JVM
|
||||
* exits
|
||||
* exits.
|
||||
*/
|
||||
private void addShutDownHook() {
|
||||
Runtime.getRuntime().addShutdownHook(new Thread() {
|
||||
@@ -342,7 +342,7 @@ public class HBaseIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method to facilitate performing puts and deletes in Hbase
|
||||
* Helper method to facilitate performing puts and deletes in Hbase.
|
||||
*/
|
||||
private void doPutsAndDeletes(HTable hTable, List<Put> puts, List<Delete> deletes) throws IOException {
|
||||
if (puts.size() > 0) {
|
||||
@@ -500,7 +500,7 @@ public class HBaseIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
|
||||
}
|
||||
|
||||
/**
|
||||
* Only looks up by recordKey
|
||||
* Only looks up by recordKey.
|
||||
*/
|
||||
@Override
|
||||
public boolean isGlobal() {
|
||||
|
||||
@@ -21,7 +21,7 @@ package org.apache.hudi.index.hbase;
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* <code>HBaseIndexQPSResourceAllocator</code> defines methods to manage resource allocation for HBase index operations
|
||||
* <code>HBaseIndexQPSResourceAllocator</code> defines methods to manage resource allocation for HBase index operations.
|
||||
*/
|
||||
public interface HBaseIndexQPSResourceAllocator extends Serializable {
|
||||
|
||||
@@ -45,7 +45,7 @@ public interface HBaseIndexQPSResourceAllocator extends Serializable {
|
||||
float acquireQPSResources(final float desiredQPSFraction, final long numPuts);
|
||||
|
||||
/**
|
||||
* This method releases the acquired QPS Fraction
|
||||
* This method releases the acquired QPS Fraction.
|
||||
*/
|
||||
void releaseQPSResources();
|
||||
}
|
||||
|
||||
@@ -301,7 +301,7 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload> extends HoodieWri
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the number of records have reached the set threshold and then flushes the records to disk
|
||||
* Checks if the number of records have reached the set threshold and then flushes the records to disk.
|
||||
*/
|
||||
private void flushToDiskIfRequired(HoodieRecord record) {
|
||||
// Append if max number of records reached to achieve block size
|
||||
|
||||
@@ -51,7 +51,7 @@ import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* Cleaner is responsible for garbage collecting older files in a given partition path, such that
|
||||
* Cleaner is responsible for garbage collecting older files in a given partition path. Such that
|
||||
* <p>
|
||||
* 1) It provides sufficient time for existing queries running on older versions, to close
|
||||
* <p>
|
||||
@@ -83,7 +83,8 @@ public class HoodieCleanHelper<T extends HoodieRecordPayload<T>> implements Seri
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns list of partitions where clean operations needs to be performed
|
||||
* Returns list of partitions where clean operations needs to be performed.
|
||||
*
|
||||
* @param newInstantToRetain New instant to be retained after this cleanup operation
|
||||
* @return list of partitions to scan for cleaning
|
||||
* @throws IOException when underlying file-system throws this exception
|
||||
@@ -294,7 +295,7 @@ public class HoodieCleanHelper<T extends HoodieRecordPayload<T>> implements Seri
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine if file slice needed to be preserved for pending compaction
|
||||
* Determine if file slice needed to be preserved for pending compaction.
|
||||
*
|
||||
* @param fileSlice File Slice
|
||||
* @return true if file slice needs to be preserved, false otherwise.
|
||||
|
||||
@@ -64,7 +64,7 @@ import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
/**
|
||||
* Archiver to bound the growth of <action>.commit files
|
||||
* Archiver to bound the growth of <action>.commit files.
|
||||
*/
|
||||
public class HoodieCommitArchiveLog {
|
||||
|
||||
@@ -201,7 +201,7 @@ public class HoodieCommitArchiveLog {
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove older instants from auxiliary meta folder
|
||||
* Remove older instants from auxiliary meta folder.
|
||||
*
|
||||
* @param thresholdInstant Hoodie Instant
|
||||
* @return success if all eligible file deleted successfully
|
||||
|
||||
@@ -77,7 +77,7 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload> extends HoodieWri
|
||||
}
|
||||
|
||||
/**
|
||||
* Called by the compactor code path
|
||||
* Called by the compactor code path.
|
||||
*/
|
||||
public HoodieCreateHandle(HoodieWriteConfig config, String commitTime, HoodieTable<T> hoodieTable,
|
||||
String partitionPath, String fileId, Iterator<HoodieRecord<T>> recordIterator) {
|
||||
@@ -124,7 +124,7 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload> extends HoodieWri
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes all records passed
|
||||
* Writes all records passed.
|
||||
*/
|
||||
public void write() {
|
||||
try {
|
||||
@@ -147,7 +147,7 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload> extends HoodieWri
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs actions to durably, persist the current changes and returns a WriteStatus object
|
||||
* Performs actions to durably, persist the current changes and returns a WriteStatus object.
|
||||
*/
|
||||
@Override
|
||||
public WriteStatus close() {
|
||||
|
||||
@@ -124,7 +124,7 @@ public class HoodieKeyLookupHandle<T extends HoodieRecordPayload> extends Hoodie
|
||||
}
|
||||
|
||||
/**
|
||||
* Encapsulates the result from a key lookup
|
||||
* Encapsulates the result from a key lookup.
|
||||
*/
|
||||
public static class KeyLookupResult {
|
||||
|
||||
|
||||
@@ -77,7 +77,7 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieWrit
|
||||
}
|
||||
|
||||
/**
|
||||
* Called by compactor code path
|
||||
* Called by compactor code path.
|
||||
*/
|
||||
public HoodieMergeHandle(HoodieWriteConfig config, String commitTime, HoodieTable<T> hoodieTable,
|
||||
Map<String, HoodieRecord<T>> keyToNewRecords, String fileId, HoodieDataFile dataFileToBeMerged) {
|
||||
@@ -108,7 +108,7 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieWrit
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether we can accept the incoming records, into the current file, depending on
|
||||
* Determines whether we can accept the incoming records, into the current file. Depending on
|
||||
* <p>
|
||||
* - Whether it belongs to the same partitionPath as existing records - Whether the current file written bytes lt max
|
||||
* file size
|
||||
@@ -139,14 +139,14 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieWrit
|
||||
}
|
||||
|
||||
/**
|
||||
* Rewrite the GenericRecord with the Schema containing the Hoodie Metadata fields
|
||||
* Rewrite the GenericRecord with the Schema containing the Hoodie Metadata fields.
|
||||
*/
|
||||
protected GenericRecord rewriteRecord(GenericRecord record) {
|
||||
return HoodieAvroUtils.rewriteRecord(record, writerSchema);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract old file path, initialize StorageWriter and WriteStatus
|
||||
* Extract old file path, initialize StorageWriter and WriteStatus.
|
||||
*/
|
||||
private void init(String fileId, String partitionPath, HoodieDataFile dataFileToBeMerged) {
|
||||
logger.info("partitionPath:" + partitionPath + ", fileId to be merged:" + fileId);
|
||||
@@ -189,7 +189,7 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieWrit
|
||||
}
|
||||
|
||||
/**
|
||||
* Load the new incoming records in a map and return partitionPath
|
||||
* Load the new incoming records in a map and return partitionPath.
|
||||
*/
|
||||
private String init(String fileId, Iterator<HoodieRecord<T>> newRecordsItr) {
|
||||
try {
|
||||
|
||||
@@ -28,7 +28,7 @@ import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
||||
/**
|
||||
* Extract range information for a given file slice
|
||||
* Extract range information for a given file slice.
|
||||
*/
|
||||
public class HoodieRangeInfoHandle<T extends HoodieRecordPayload> extends HoodieReadHandle<T> {
|
||||
|
||||
|
||||
@@ -90,7 +90,7 @@ public abstract class HoodieWriteHandle<T extends HoodieRecordPayload> extends H
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an empty marker file corresponding to storage writer path
|
||||
* Creates an empty marker file corresponding to storage writer path.
|
||||
*
|
||||
* @param partitionPath Partition path
|
||||
*/
|
||||
@@ -105,7 +105,7 @@ public abstract class HoodieWriteHandle<T extends HoodieRecordPayload> extends H
|
||||
}
|
||||
|
||||
/**
|
||||
* THe marker path will be <base-path>/.hoodie/.temp/<instant_ts>/2019/04/25/filename
|
||||
* THe marker path will be <base-path>/.hoodie/.temp/<instant_ts>/2019/04/25/filename.
|
||||
*/
|
||||
private Path makeNewMarkerPath(String partitionPath) {
|
||||
Path markerRootPath = new Path(hoodieTable.getMetaClient().getMarkerFolderPath(instantTime));
|
||||
@@ -123,7 +123,7 @@ public abstract class HoodieWriteHandle<T extends HoodieRecordPayload> extends H
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether we can accept the incoming records, into the current file, depending on
|
||||
* Determines whether we can accept the incoming records, into the current file. Depending on
|
||||
* <p>
|
||||
* - Whether it belongs to the same partitionPath as existing records - Whether the current file written bytes lt max
|
||||
* file size
|
||||
@@ -154,7 +154,7 @@ public abstract class HoodieWriteHandle<T extends HoodieRecordPayload> extends H
|
||||
}
|
||||
|
||||
/**
|
||||
* Rewrite the GenericRecord with the Schema containing the Hoodie Metadata fields
|
||||
* Rewrite the GenericRecord with the Schema containing the Hoodie Metadata fields.
|
||||
*/
|
||||
protected GenericRecord rewriteRecord(GenericRecord record) {
|
||||
return HoodieAvroUtils.rewriteRecord(record, writerSchema);
|
||||
|
||||
@@ -32,12 +32,12 @@ import java.io.Serializable;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* A HoodieCompactor runs compaction on a hoodie table
|
||||
* A HoodieCompactor runs compaction on a hoodie table.
|
||||
*/
|
||||
public interface HoodieCompactor extends Serializable {
|
||||
|
||||
/**
|
||||
* Generate a new compaction plan for scheduling
|
||||
* Generate a new compaction plan for scheduling.
|
||||
*
|
||||
* @param jsc Spark Context
|
||||
* @param hoodieTable Hoodie Table
|
||||
@@ -51,7 +51,7 @@ public interface HoodieCompactor extends Serializable {
|
||||
String compactionCommitTime, Set<HoodieFileGroupId> fgIdsInPendingCompactions) throws IOException;
|
||||
|
||||
/**
|
||||
* Execute compaction operations and report back status
|
||||
* Execute compaction operations and report back status.
|
||||
*/
|
||||
JavaRDD<WriteStatus> compact(JavaSparkContext jsc, HoodieCompactionPlan compactionPlan, HoodieTable hoodieTable,
|
||||
HoodieWriteConfig config, String compactionInstantTime) throws IOException;
|
||||
|
||||
@@ -28,7 +28,7 @@ import java.util.List;
|
||||
|
||||
/**
|
||||
* CompactionStrategy which looks at total IO to be done for the compaction (read + write) and limits the list of
|
||||
* compactions to be under a configured limit on the IO
|
||||
* compactions to be under a configured limit on the IO.
|
||||
*
|
||||
* @see CompactionStrategy
|
||||
*/
|
||||
|
||||
@@ -116,7 +116,7 @@ public abstract class CompactionStrategy implements Serializable {
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter the partition paths based on compaction strategy
|
||||
* Filter the partition paths based on compaction strategy.
|
||||
*
|
||||
* @param writeConfig
|
||||
* @param allPartitionPaths
|
||||
|
||||
@@ -32,7 +32,7 @@ import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* LogFileSizeBasedCompactionStrategy orders the compactions based on the total log files size and limits the
|
||||
* compactions within a configured IO bound
|
||||
* compactions within a configured IO bound.
|
||||
*
|
||||
* @see BoundedIOCompactionStrategy
|
||||
* @see CompactionStrategy
|
||||
|
||||
@@ -26,12 +26,12 @@ import java.io.Closeable;
|
||||
public abstract class MetricsReporter {
|
||||
|
||||
/**
|
||||
* Push out metrics at scheduled intervals
|
||||
* Push out metrics at scheduled intervals.
|
||||
*/
|
||||
public abstract void start();
|
||||
|
||||
/**
|
||||
* Deterministically push out metrics
|
||||
* Deterministically push out metrics.
|
||||
*/
|
||||
public abstract void report();
|
||||
|
||||
|
||||
@@ -84,7 +84,7 @@ import java.util.stream.Collectors;
|
||||
import scala.Tuple2;
|
||||
|
||||
/**
|
||||
* Implementation of a very heavily read-optimized Hoodie Table where
|
||||
* Implementation of a very heavily read-optimized Hoodie Table where.
|
||||
* <p>
|
||||
* INSERTS - Produce new files, block aligned to desired size (or) Merge with the smallest existing file, to expand it
|
||||
* <p>
|
||||
@@ -273,7 +273,7 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates List of files to be cleaned
|
||||
* Generates List of files to be cleaned.
|
||||
*
|
||||
* @param jsc JavaSparkContext
|
||||
* @return Cleaner Plan
|
||||
@@ -389,7 +389,7 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete Inflight instant if enabled
|
||||
* Delete Inflight instant if enabled.
|
||||
*
|
||||
* @param deleteInstant Enable Deletion of Inflight instant
|
||||
* @param activeTimeline Hoodie active timeline
|
||||
@@ -414,7 +414,7 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
|
||||
}
|
||||
|
||||
/**
|
||||
* Consumer that dequeues records from queue and sends to Merge Handle
|
||||
* Consumer that dequeues records from queue and sends to Merge Handle.
|
||||
*/
|
||||
private static class UpdateHandler extends BoundedInMemoryQueueConsumer<GenericRecord, Void> {
|
||||
|
||||
@@ -474,7 +474,7 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper class for a small file's location and its actual size on disk
|
||||
* Helper class for a small file's location and its actual size on disk.
|
||||
*/
|
||||
static class SmallFile implements Serializable {
|
||||
|
||||
@@ -493,7 +493,7 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
|
||||
|
||||
/**
|
||||
* Helper class for an insert bucket along with the weight [0.0, 0.1] that defines the amount of incoming inserts that
|
||||
* should be allocated to the bucket
|
||||
* should be allocated to the bucket.
|
||||
*/
|
||||
class InsertBucket implements Serializable {
|
||||
|
||||
@@ -512,7 +512,7 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper class for a bucket's type (INSERT and UPDATE) and its file location
|
||||
* Helper class for a bucket's type (INSERT and UPDATE) and its file location.
|
||||
*/
|
||||
class BucketInfo implements Serializable {
|
||||
|
||||
@@ -530,16 +530,16 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
|
||||
}
|
||||
|
||||
/**
|
||||
* Packs incoming records to be upserted, into buckets (1 bucket = 1 RDD partition)
|
||||
* Packs incoming records to be upserted, into buckets (1 bucket = 1 RDD partition).
|
||||
*/
|
||||
class UpsertPartitioner extends Partitioner {
|
||||
|
||||
/**
|
||||
* List of all small files to be corrected
|
||||
* List of all small files to be corrected.
|
||||
*/
|
||||
List<SmallFile> smallFiles = new ArrayList<SmallFile>();
|
||||
/**
|
||||
* Total number of RDD partitions, is determined by total buckets we want to pack the incoming workload into
|
||||
* Total number of RDD partitions, is determined by total buckets we want to pack the incoming workload into.
|
||||
*/
|
||||
private int totalBuckets = 0;
|
||||
/**
|
||||
@@ -560,7 +560,7 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
|
||||
private HashMap<Integer, BucketInfo> bucketInfoMap;
|
||||
|
||||
/**
|
||||
* Rolling stats for files
|
||||
* Rolling stats for files.
|
||||
*/
|
||||
protected HoodieRollingStatMetadata rollingStatMetadata;
|
||||
protected long averageRecordSize;
|
||||
@@ -672,7 +672,7 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a list of small files in the given partition path
|
||||
* Returns a list of small files in the given partition path.
|
||||
*/
|
||||
protected List<SmallFile> getSmallFiles(String partitionPath) {
|
||||
|
||||
|
||||
@@ -201,7 +201,7 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends Hoodi
|
||||
|
||||
/**
|
||||
* Generate all rollback requests that we need to perform for rolling back this action without actually performing
|
||||
* rolling back
|
||||
* rolling back.
|
||||
*
|
||||
* @param jsc JavaSparkContext
|
||||
* @param instantToRollback Instant to Rollback
|
||||
|
||||
@@ -69,7 +69,7 @@ import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
/**
|
||||
* Abstract implementation of a HoodieTable
|
||||
* Abstract implementation of a HoodieTable.
|
||||
*/
|
||||
public abstract class HoodieTable<T extends HoodieRecordPayload> implements Serializable {
|
||||
|
||||
@@ -111,17 +111,17 @@ public abstract class HoodieTable<T extends HoodieRecordPayload> implements Seri
|
||||
}
|
||||
|
||||
/**
|
||||
* Provides a partitioner to perform the upsert operation, based on the workload profile
|
||||
* Provides a partitioner to perform the upsert operation, based on the workload profile.
|
||||
*/
|
||||
public abstract Partitioner getUpsertPartitioner(WorkloadProfile profile);
|
||||
|
||||
/**
|
||||
* Provides a partitioner to perform the insert operation, based on the workload profile
|
||||
* Provides a partitioner to perform the insert operation, based on the workload profile.
|
||||
*/
|
||||
public abstract Partitioner getInsertPartitioner(WorkloadProfile profile);
|
||||
|
||||
/**
|
||||
* Return whether this HoodieTable implementation can benefit from workload profiling
|
||||
* Return whether this HoodieTable implementation can benefit from workload profiling.
|
||||
*/
|
||||
public abstract boolean isWorkloadProfileNeeded();
|
||||
|
||||
@@ -138,84 +138,84 @@ public abstract class HoodieTable<T extends HoodieRecordPayload> implements Seri
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the view of the file system for this table
|
||||
* Get the view of the file system for this table.
|
||||
*/
|
||||
public TableFileSystemView getFileSystemView() {
|
||||
return new HoodieTableFileSystemView(metaClient, getCompletedCommitsTimeline());
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the read optimized view of the file system for this table
|
||||
* Get the read optimized view of the file system for this table.
|
||||
*/
|
||||
public TableFileSystemView.ReadOptimizedView getROFileSystemView() {
|
||||
return getViewManager().getFileSystemView(metaClient.getBasePath());
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the real time view of the file system for this table
|
||||
* Get the real time view of the file system for this table.
|
||||
*/
|
||||
public TableFileSystemView.RealtimeView getRTFileSystemView() {
|
||||
return getViewManager().getFileSystemView(metaClient.getBasePath());
|
||||
}
|
||||
|
||||
/**
|
||||
* Get complete view of the file system for this table with ability to force sync
|
||||
* Get complete view of the file system for this table with ability to force sync.
|
||||
*/
|
||||
public SyncableFileSystemView getHoodieView() {
|
||||
return getViewManager().getFileSystemView(metaClient.getBasePath());
|
||||
}
|
||||
|
||||
/**
|
||||
* Get only the completed (no-inflights) commit + deltacommit timeline
|
||||
* Get only the completed (no-inflights) commit + deltacommit timeline.
|
||||
*/
|
||||
public HoodieTimeline getCompletedCommitsTimeline() {
|
||||
return metaClient.getCommitsTimeline().filterCompletedInstants();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get only the completed (no-inflights) commit timeline
|
||||
* Get only the completed (no-inflights) commit timeline.
|
||||
*/
|
||||
public HoodieTimeline getCompletedCommitTimeline() {
|
||||
return metaClient.getCommitTimeline().filterCompletedInstants();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get only the inflights (no-completed) commit timeline
|
||||
* Get only the inflights (no-completed) commit timeline.
|
||||
*/
|
||||
public HoodieTimeline getInflightCommitTimeline() {
|
||||
return metaClient.getCommitsTimeline().filterInflightsExcludingCompaction();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get only the completed (no-inflights) clean timeline
|
||||
* Get only the completed (no-inflights) clean timeline.
|
||||
*/
|
||||
public HoodieTimeline getCompletedCleanTimeline() {
|
||||
return getActiveTimeline().getCleanerTimeline().filterCompletedInstants();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get clean timeline
|
||||
* Get clean timeline.
|
||||
*/
|
||||
public HoodieTimeline getCleanTimeline() {
|
||||
return getActiveTimeline().getCleanerTimeline();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get only the completed (no-inflights) savepoint timeline
|
||||
* Get only the completed (no-inflights) savepoint timeline.
|
||||
*/
|
||||
public HoodieTimeline getCompletedSavepointTimeline() {
|
||||
return getActiveTimeline().getSavePointTimeline().filterCompletedInstants();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the list of savepoints in this table
|
||||
* Get the list of savepoints in this table.
|
||||
*/
|
||||
public List<String> getSavepoints() {
|
||||
return getCompletedSavepointTimeline().getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the list of data file names savepointed
|
||||
* Get the list of data file names savepointed.
|
||||
*/
|
||||
public Stream<String> getSavepointedDataFiles(String savepointTime) {
|
||||
if (!getSavepoints().contains(savepointTime)) {
|
||||
@@ -237,26 +237,26 @@ public abstract class HoodieTable<T extends HoodieRecordPayload> implements Seri
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the index
|
||||
* Return the index.
|
||||
*/
|
||||
public HoodieIndex<T> getIndex() {
|
||||
return index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform the ultimate IO for a given upserted (RDD) partition
|
||||
* Perform the ultimate IO for a given upserted (RDD) partition.
|
||||
*/
|
||||
public abstract Iterator<List<WriteStatus>> handleUpsertPartition(String commitTime, Integer partition,
|
||||
Iterator<HoodieRecord<T>> recordIterator, Partitioner partitioner);
|
||||
|
||||
/**
|
||||
* Perform the ultimate IO for a given inserted (RDD) partition
|
||||
* Perform the ultimate IO for a given inserted (RDD) partition.
|
||||
*/
|
||||
public abstract Iterator<List<WriteStatus>> handleInsertPartition(String commitTime, Integer partition,
|
||||
Iterator<HoodieRecord<T>> recordIterator, Partitioner partitioner);
|
||||
|
||||
/**
|
||||
* Schedule compaction for the instant time
|
||||
* Schedule compaction for the instant time.
|
||||
*
|
||||
* @param jsc Spark Context
|
||||
* @param instantTime Instant Time for scheduling compaction
|
||||
@@ -265,7 +265,7 @@ public abstract class HoodieTable<T extends HoodieRecordPayload> implements Seri
|
||||
public abstract HoodieCompactionPlan scheduleCompaction(JavaSparkContext jsc, String instantTime);
|
||||
|
||||
/**
|
||||
* Run Compaction on the table. Compaction arranges the data so that it is optimized for data access
|
||||
* Run Compaction on the table. Compaction arranges the data so that it is optimized for data access.
|
||||
*
|
||||
* @param jsc Spark Context
|
||||
* @param compactionInstantTime Instant Time
|
||||
@@ -275,7 +275,7 @@ public abstract class HoodieTable<T extends HoodieRecordPayload> implements Seri
|
||||
HoodieCompactionPlan compactionPlan);
|
||||
|
||||
/**
|
||||
* Generates list of files that are eligible for cleaning
|
||||
* Generates list of files that are eligible for cleaning.
|
||||
*
|
||||
* @param jsc Java Spark Context
|
||||
* @return Cleaner Plan containing list of files to be deleted.
|
||||
@@ -283,7 +283,7 @@ public abstract class HoodieTable<T extends HoodieRecordPayload> implements Seri
|
||||
public abstract HoodieCleanerPlan scheduleClean(JavaSparkContext jsc);
|
||||
|
||||
/**
|
||||
* Cleans the files listed in the cleaner plan associated with clean instant
|
||||
* Cleans the files listed in the cleaner plan associated with clean instant.
|
||||
*
|
||||
* @param jsc Java Spark Context
|
||||
* @param cleanInstant Clean Instant
|
||||
@@ -300,7 +300,7 @@ public abstract class HoodieTable<T extends HoodieRecordPayload> implements Seri
|
||||
throws IOException;
|
||||
|
||||
/**
|
||||
* Finalize the written data onto storage. Perform any final cleanups
|
||||
* Finalize the written data onto storage. Perform any final cleanups.
|
||||
*
|
||||
* @param jsc Spark Context
|
||||
* @param stats List of HoodieWriteStats
|
||||
@@ -312,7 +312,7 @@ public abstract class HoodieTable<T extends HoodieRecordPayload> implements Seri
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete Marker directory corresponding to an instant
|
||||
* Delete Marker directory corresponding to an instant.
|
||||
*
|
||||
* @param instantTs Instant Time
|
||||
*/
|
||||
@@ -409,7 +409,7 @@ public abstract class HoodieTable<T extends HoodieRecordPayload> implements Seri
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensures all files passed either appear or disappear
|
||||
* Ensures all files passed either appear or disappear.
|
||||
*
|
||||
* @param jsc JavaSparkContext
|
||||
* @param groupByPartition Files grouped by partition
|
||||
|
||||
@@ -51,7 +51,7 @@ import java.util.Map;
|
||||
import scala.Tuple2;
|
||||
|
||||
/**
|
||||
* Performs Rollback of Hoodie Tables
|
||||
* Performs Rollback of Hoodie Tables.
|
||||
*/
|
||||
public class RollbackExecutor implements Serializable {
|
||||
|
||||
@@ -143,7 +143,7 @@ public class RollbackExecutor implements Serializable {
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper to merge 2 rollback-stats for a given partition
|
||||
* Helper to merge 2 rollback-stats for a given partition.
|
||||
*
|
||||
* @param stat1 HoodieRollbackStat
|
||||
* @param stat2 HoodieRollbackStat
|
||||
@@ -177,7 +177,7 @@ public class RollbackExecutor implements Serializable {
|
||||
}
|
||||
|
||||
/**
|
||||
* Common method used for cleaning out parquet files under a partition path during rollback of a set of commits
|
||||
* Common method used for cleaning out parquet files under a partition path during rollback of a set of commits.
|
||||
*/
|
||||
private Map<FileStatus, Boolean> deleteCleanedFiles(HoodieTableMetaClient metaClient, HoodieWriteConfig config,
|
||||
Map<FileStatus, Boolean> results, String partitionPath, PathFilter filter) throws IOException {
|
||||
@@ -193,7 +193,7 @@ public class RollbackExecutor implements Serializable {
|
||||
}
|
||||
|
||||
/**
|
||||
* Common method used for cleaning out parquet files under a partition path during rollback of a set of commits
|
||||
* Common method used for cleaning out parquet files under a partition path during rollback of a set of commits.
|
||||
*/
|
||||
private Map<FileStatus, Boolean> deleteCleanedFiles(HoodieTableMetaClient metaClient, HoodieWriteConfig config,
|
||||
Map<FileStatus, Boolean> results, String commit, String partitionPath) throws IOException {
|
||||
|
||||
@@ -22,39 +22,39 @@ import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
|
||||
/**
|
||||
* Request for performing one rollback action
|
||||
* Request for performing one rollback action.
|
||||
*/
|
||||
public class RollbackRequest {
|
||||
|
||||
/**
|
||||
* Rollback Action Types
|
||||
* Rollback Action Types.
|
||||
*/
|
||||
public enum RollbackAction {
|
||||
DELETE_DATA_FILES_ONLY, DELETE_DATA_AND_LOG_FILES, APPEND_ROLLBACK_BLOCK
|
||||
}
|
||||
|
||||
/**
|
||||
* Partition path that needs to be rolled-back
|
||||
* Partition path that needs to be rolled-back.
|
||||
*/
|
||||
private final String partitionPath;
|
||||
|
||||
/**
|
||||
* Rollback Instant
|
||||
* Rollback Instant.
|
||||
*/
|
||||
private final HoodieInstant rollbackInstant;
|
||||
|
||||
/**
|
||||
* FileId in case of appending rollback block
|
||||
* FileId in case of appending rollback block.
|
||||
*/
|
||||
private final Option<String> fileId;
|
||||
|
||||
/**
|
||||
* Latest base instant needed for appending rollback block instant
|
||||
* Latest base instant needed for appending rollback block instant.
|
||||
*/
|
||||
private final Option<String> latestBaseInstant;
|
||||
|
||||
/**
|
||||
* Rollback Action
|
||||
* Rollback Action.
|
||||
*/
|
||||
private final RollbackAction rollbackAction;
|
||||
|
||||
|
||||
@@ -33,19 +33,19 @@ import java.util.Set;
|
||||
import scala.Tuple2;
|
||||
|
||||
/**
|
||||
* Information about incoming records for upsert/insert obtained either via sampling or introspecting the data fully
|
||||
* Information about incoming records for upsert/insert obtained either via sampling or introspecting the data fully.
|
||||
* <p>
|
||||
* TODO(vc): Think about obtaining this directly from index.tagLocation
|
||||
*/
|
||||
public class WorkloadProfile<T extends HoodieRecordPayload> implements Serializable {
|
||||
|
||||
/**
|
||||
* Input workload
|
||||
* Input workload.
|
||||
*/
|
||||
private final JavaRDD<HoodieRecord<T>> taggedRecords;
|
||||
|
||||
/**
|
||||
* Computed workload profile
|
||||
* Computed workload profile.
|
||||
*/
|
||||
private final HashMap<String, WorkloadStat> partitionPathStatMap;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user