[MINOR] clean up and add comments to flink client (#2261)
This commit is contained in:
@@ -18,14 +18,6 @@
|
||||
|
||||
package org.apache.hudi.table.action.commit;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import org.apache.avro.generic.GenericDatumReader;
|
||||
import org.apache.avro.generic.GenericDatumWriter;
|
||||
import org.apache.avro.io.BinaryDecoder;
|
||||
import org.apache.avro.io.BinaryEncoder;
|
||||
import org.apache.avro.io.DecoderFactory;
|
||||
import org.apache.avro.io.EncoderFactory;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hudi.avro.HoodieAvroUtils;
|
||||
import org.apache.hudi.client.utils.MergingIterator;
|
||||
import org.apache.hudi.common.model.HoodieBaseFile;
|
||||
@@ -37,15 +29,23 @@ import org.apache.hudi.io.storage.HoodieFileReader;
|
||||
import org.apache.hudi.io.storage.HoodieFileReaderFactory;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
|
||||
import org.apache.avro.generic.GenericDatumReader;
|
||||
import org.apache.avro.generic.GenericDatumWriter;
|
||||
import org.apache.avro.io.BinaryDecoder;
|
||||
import org.apache.avro.io.BinaryEncoder;
|
||||
import org.apache.avro.io.DecoderFactory;
|
||||
import org.apache.avro.io.EncoderFactory;
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* Helper to read records from previous version of parquet and run Merge.
|
||||
* Helper to read records from previous version of base file and run Merge.
|
||||
*/
|
||||
public abstract class AbstractMergeHelper<T extends HoodieRecordPayload, I, K, O> {
|
||||
|
||||
|
||||
@@ -96,6 +96,11 @@ public class HoodieFlinkWriteClient<T extends HoodieRecordPayload> extends
|
||||
return recordsWithLocation.stream().filter(v1 -> !v1.isCurrentLocationKnown()).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void bootstrap(Option<Map<String, String>> extraMetadata) {
|
||||
throw new HoodieNotSupportedException("Bootstrap operation is not supported yet");
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<WriteStatus> upsert(List<HoodieRecord<T>> records, String instantTime) {
|
||||
HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> table =
|
||||
|
||||
@@ -18,21 +18,23 @@
|
||||
|
||||
package org.apache.hudi.client.common;
|
||||
|
||||
import org.apache.flink.api.common.functions.RuntimeContext;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hudi.client.FlinkTaskContextSupplier;
|
||||
import org.apache.hudi.client.common.function.SerializableConsumer;
|
||||
import org.apache.hudi.client.common.function.SerializableFunction;
|
||||
import org.apache.hudi.client.common.function.SerializablePairFunction;
|
||||
import org.apache.hudi.common.config.SerializableConfiguration;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import scala.Tuple2;
|
||||
|
||||
import org.apache.flink.api.common.functions.RuntimeContext;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import scala.Tuple2;
|
||||
|
||||
import static org.apache.hudi.client.common.function.FunctionWrapper.throwingFlatMapWrapper;
|
||||
import static org.apache.hudi.client.common.function.FunctionWrapper.throwingForeachWrapper;
|
||||
import static org.apache.hudi.client.common.function.FunctionWrapper.throwingMapToPairWrapper;
|
||||
|
||||
@@ -19,7 +19,6 @@
|
||||
package org.apache.hudi.index;
|
||||
|
||||
import org.apache.hudi.ApiMaturityLevel;
|
||||
import org.apache.hudi.PublicAPIMethod;
|
||||
import org.apache.hudi.client.WriteStatus;
|
||||
import org.apache.hudi.client.common.HoodieEngineContext;
|
||||
import org.apache.hudi.client.common.HoodieFlinkEngineContext;
|
||||
@@ -31,6 +30,7 @@ import org.apache.hudi.common.util.StringUtils;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.HoodieIndexException;
|
||||
import org.apache.hudi.index.state.FlinkInMemoryStateIndex;
|
||||
import org.apache.hudi.PublicAPIMethod;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@@ -46,6 +46,14 @@ import org.apache.hudi.table.action.rollback.FlinkCopyOnWriteRollbackActionExecu
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Implementation of a very heavily read-optimized Hoodie Table where, all data is stored in base files, with
|
||||
* zero read amplification.
|
||||
* <p>
|
||||
* INSERTS - Produce new files, block aligned to desired size (or) Merge with the smallest existing file, to expand it
|
||||
* <p>
|
||||
* UPDATES - Produce a new version of the file, just replacing the updated records with new values
|
||||
*/
|
||||
public class HoodieFlinkCopyOnWriteTable<T extends HoodieRecordPayload> extends HoodieFlinkTable<T> {
|
||||
|
||||
protected HoodieFlinkCopyOnWriteTable(HoodieWriteConfig config, HoodieEngineContext context, HoodieTableMetaClient metaClient) {
|
||||
|
||||
@@ -35,7 +35,7 @@ import org.apache.hudi.index.HoodieIndex;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public abstract class HoodieFlinkTable<T extends HoodieRecordPayload>
|
||||
public abstract class HoodieFlinkTable<T extends HoodieRecordPayload>
|
||||
extends HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> {
|
||||
protected HoodieFlinkTable(HoodieWriteConfig config, HoodieEngineContext context, HoodieTableMetaClient metaClient) {
|
||||
super(config, context, metaClient);
|
||||
|
||||
@@ -18,7 +18,6 @@
|
||||
|
||||
package org.apache.hudi.table.action.commit;
|
||||
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hudi.client.WriteStatus;
|
||||
import org.apache.hudi.client.common.HoodieEngineContext;
|
||||
import org.apache.hudi.common.model.HoodieBaseFile;
|
||||
@@ -47,6 +46,7 @@ import org.apache.hudi.table.WorkloadProfile;
|
||||
import org.apache.hudi.table.WorkloadStat;
|
||||
import org.apache.hudi.table.action.HoodieWriteMetadata;
|
||||
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
|
||||
@@ -94,7 +94,7 @@ public class FlinkDeleteHelper<R> extends
|
||||
List<HoodieRecord<EmptyHoodieRecordPayload>> dedupedRecords =
|
||||
dedupedKeys.stream().map(key -> new HoodieRecord<>(key, new EmptyHoodieRecordPayload())).collect(Collectors.toList());
|
||||
Instant beginTag = Instant.now();
|
||||
// perform index loop up to get existing location of records
|
||||
// perform index look up to get existing location of records
|
||||
List<HoodieRecord<EmptyHoodieRecordPayload>> taggedRecords =
|
||||
table.getIndex().tagLocation(dedupedRecords, context, table);
|
||||
Duration tagLocationDuration = Duration.between(beginTag, Instant.now());
|
||||
|
||||
@@ -49,6 +49,9 @@ import java.util.stream.Collectors;
|
||||
|
||||
import scala.Tuple2;
|
||||
|
||||
/**
|
||||
* Packs incoming records to be upserted, into buckets.
|
||||
*/
|
||||
public class UpsertPartitioner<T extends HoodieRecordPayload<T>> implements Partitioner {
|
||||
|
||||
private static final Logger LOG = LogManager.getLogger(UpsertPartitioner.class);
|
||||
|
||||
@@ -30,11 +30,12 @@ import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.HoodieRollbackException;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hudi.table.MarkerFiles;
|
||||
import scala.Tuple2;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import scala.Tuple2;
|
||||
|
||||
@SuppressWarnings("checkstyle:LineLength")
|
||||
public class FlinkMarkerBasedRollbackStrategy<T extends HoodieRecordPayload> extends AbstractMarkerBasedRollbackStrategy<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> {
|
||||
public FlinkMarkerBasedRollbackStrategy(HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> table, HoodieEngineContext context, HoodieWriteConfig config, String instantTime) {
|
||||
|
||||
@@ -50,6 +50,9 @@ import java.util.stream.Collectors;
|
||||
|
||||
import scala.Tuple2;
|
||||
|
||||
/**
|
||||
* Performs Rollback of Hoodie Tables.
|
||||
*/
|
||||
public class ListingBasedRollbackHelper implements Serializable {
|
||||
|
||||
private static final Logger LOG = LogManager.getLogger(ListingBasedRollbackHelper.class);
|
||||
|
||||
Reference in New Issue
Block a user