1
0

[HUDI-366] Refactor some module codes based on new ImportOrder code style rule (#1055)

[HUDI-366] Refactor hudi-hadoop-mr / hudi-timeline-service / hudi-spark / hudi-integ-test / hudi- utilities based on new ImportOrder code style rule
This commit is contained in:
谢磊
2019-11-27 21:32:43 +08:00
committed by vinoyang
parent b77fad39b5
commit f9139c0f61
94 changed files with 779 additions and 612 deletions

View File

@@ -18,12 +18,17 @@
package org.apache.hudi.hadoop; package org.apache.hudi.hadoop;
import java.io.IOException; import org.apache.hudi.common.model.HoodieDataFile;
import java.util.ArrayList; import org.apache.hudi.common.model.HoodiePartitionMetadata;
import java.util.HashMap; import org.apache.hudi.common.table.HoodieTableMetaClient;
import java.util.List; import org.apache.hudi.common.table.HoodieTimeline;
import java.util.Map; import org.apache.hudi.common.table.TableFileSystemView.ReadOptimizedView;
import java.util.stream.Collectors; import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
import org.apache.hudi.exception.DatasetNotFoundException;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.exception.InvalidDatasetException;
import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileStatus;
@@ -37,19 +42,16 @@ import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Job;
import org.apache.hudi.common.model.HoodieDataFile;
import org.apache.hudi.common.model.HoodiePartitionMetadata;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.HoodieTimeline;
import org.apache.hudi.common.table.TableFileSystemView.ReadOptimizedView;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
import org.apache.hudi.exception.DatasetNotFoundException;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.exception.InvalidDatasetException;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
/** /**
* HoodieInputFormat which understands the Hoodie File Structure and filters files based on the Hoodie Mode. If paths * HoodieInputFormat which understands the Hoodie File Structure and filters files based on the Hoodie Mode. If paths
* that does not correspond to a hoodie dataset then they are passed in as is (as what FileInputFormat.listStatus() * that does not correspond to a hoodie dataset then they are passed in as is (as what FileInputFormat.listStatus()

View File

@@ -18,24 +18,26 @@
package org.apache.hudi.hadoop; package org.apache.hudi.hadoop;
import java.io.Serializable;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hudi.common.model.HoodieDataFile; import org.apache.hudi.common.model.HoodieDataFile;
import org.apache.hudi.common.model.HoodiePartitionMetadata; import org.apache.hudi.common.model.HoodiePartitionMetadata;
import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.view.HoodieTableFileSystemView; import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
import org.apache.hudi.exception.DatasetNotFoundException; import org.apache.hudi.exception.DatasetNotFoundException;
import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import java.io.Serializable;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.stream.Collectors;
/** /**
* Given a path is a part of - Hoodie dataset = accepts ONLY the latest version of each path - Non-Hoodie dataset = then * Given a path is a part of - Hoodie dataset = accepts ONLY the latest version of each path - Non-Hoodie dataset = then
* always accept * always accept

View File

@@ -18,13 +18,15 @@
package org.apache.hudi.hadoop; package org.apache.hudi.hadoop;
import org.apache.hudi.exception.HoodieException;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import java.io.IOException; import java.io.IOException;
import java.util.Iterator; import java.util.Iterator;
import java.util.NoSuchElementException; import java.util.NoSuchElementException;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hudi.exception.HoodieException;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
/** /**
* Provides Iterator Interface to iterate value entries read from record reader * Provides Iterator Interface to iterate value entries read from record reader

View File

@@ -18,12 +18,13 @@
package org.apache.hudi.hadoop; package org.apache.hudi.hadoop;
import java.io.IOException;
import org.apache.hadoop.io.ArrayWritable; import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.RecordReader;
import java.io.IOException;
/** /**
* Record Reader for parquet. Records read from this reader is safe to be buffered for concurrent processing. * Record Reader for parquet. Records read from this reader is safe to be buffered for concurrent processing.
* *

View File

@@ -18,23 +18,10 @@
package org.apache.hudi.hadoop.hive; package org.apache.hudi.hadoop.hive;
import org.apache.hudi.hadoop.HoodieParquetInputFormat;
import org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat;
import com.google.common.annotations.VisibleForTesting; import com.google.common.annotations.VisibleForTesting;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
@@ -70,11 +57,26 @@ import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.lib.CombineFileInputFormat; import org.apache.hadoop.mapred.lib.CombineFileInputFormat;
import org.apache.hadoop.mapred.lib.CombineFileSplit; import org.apache.hadoop.mapred.lib.CombineFileSplit;
import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hudi.hadoop.HoodieParquetInputFormat;
import org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
/** /**
* This is just a copy of the org.apache.hadoop.hive.ql.io.CombineHiveInputFormat from Hive 2.x Search for **MOD** to * This is just a copy of the org.apache.hadoop.hive.ql.io.CombineHiveInputFormat from Hive 2.x Search for **MOD** to
* see minor modifications to support custom inputformat in CombineHiveInputFormat. See * see minor modifications to support custom inputformat in CombineHiveInputFormat. See

View File

@@ -18,15 +18,14 @@
package org.apache.hudi.hadoop.realtime; package org.apache.hudi.hadoop.realtime;
import java.io.IOException; import org.apache.hudi.common.model.HoodieAvroPayload;
import java.util.ArrayList; import org.apache.hudi.common.table.HoodieTableMetaClient;
import java.util.Arrays; import org.apache.hudi.common.util.HoodieAvroUtils;
import java.util.LinkedHashSet; import org.apache.hudi.common.util.LogReaderUtils;
import java.util.List; import org.apache.hudi.common.util.collection.Pair;
import java.util.Map; import org.apache.hudi.exception.HoodieException;
import java.util.Set; import org.apache.hudi.exception.HoodieIOException;
import java.util.TreeMap;
import java.util.stream.Collectors;
import org.apache.avro.Schema; import org.apache.avro.Schema;
import org.apache.avro.Schema.Field; import org.apache.avro.Schema.Field;
import org.apache.avro.generic.GenericArray; import org.apache.avro.generic.GenericArray;
@@ -46,19 +45,22 @@ import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.JobConf;
import org.apache.hudi.common.model.HoodieAvroPayload;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.util.HoodieAvroUtils;
import org.apache.hudi.common.util.LogReaderUtils;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.apache.parquet.avro.AvroSchemaConverter; import org.apache.parquet.avro.AvroSchemaConverter;
import org.apache.parquet.hadoop.ParquetFileReader; import org.apache.parquet.hadoop.ParquetFileReader;
import org.apache.parquet.schema.MessageType; import org.apache.parquet.schema.MessageType;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.stream.Collectors;
/** /**
* Record Reader implementation to merge fresh avro data with base parquet data, to support real time queries. * Record Reader implementation to merge fresh avro data with base parquet data, to support real time queries.
*/ */

View File

@@ -18,29 +18,6 @@
package org.apache.hudi.hadoop.realtime; package org.apache.hudi.hadoop.realtime;
import com.google.common.base.Preconditions;
import com.google.common.collect.Sets;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hudi.common.model.FileSlice; import org.apache.hudi.common.model.FileSlice;
import org.apache.hudi.common.model.HoodieLogFile; import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieRecord;
@@ -54,9 +31,34 @@ import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.hadoop.HoodieParquetInputFormat; import org.apache.hudi.hadoop.HoodieParquetInputFormat;
import org.apache.hudi.hadoop.UseFileSplitsFromInputFormat; import org.apache.hudi.hadoop.UseFileSplitsFromInputFormat;
import com.google.common.base.Preconditions;
import com.google.common.collect.Sets;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
/** /**
* Input Format, that provides a real-time view of data in a Hoodie dataset * Input Format, that provides a real-time view of data in a Hoodie dataset
*/ */

View File

@@ -18,13 +18,14 @@
package org.apache.hudi.hadoop.realtime; package org.apache.hudi.hadoop.realtime;
import org.apache.hadoop.mapred.FileSplit;
import java.io.DataInput; import java.io.DataInput;
import java.io.DataOutput; import java.io.DataOutput;
import java.io.IOException; import java.io.IOException;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import org.apache.hadoop.mapred.FileSplit;
/** /**
* Filesplit that wraps the base split and a list of log files to merge deltas from. * Filesplit that wraps the base split and a list of log files to merge deltas from.

View File

@@ -18,15 +18,17 @@
package org.apache.hudi.hadoop.realtime; package org.apache.hudi.hadoop.realtime;
import java.io.IOException; import org.apache.hudi.exception.HoodieException;
import org.apache.hadoop.io.ArrayWritable; import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.RecordReader;
import org.apache.hudi.exception.HoodieException;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import java.io.IOException;
/** /**
* Realtime Record Reader which can do compacted (merge-on-read) record reading or unmerged reading (parquet and log * Realtime Record Reader which can do compacted (merge-on-read) record reading or unmerged reading (parquet and log
* files read in parallel) based on job configuration. * files read in parallel) based on job configuration.

View File

@@ -18,23 +18,25 @@
package org.apache.hudi.hadoop.realtime; package org.apache.hudi.hadoop.realtime;
import java.io.IOException;
import java.util.Map;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieRecordPayload; import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner; import org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner;
import org.apache.hudi.common.util.FSUtils; import org.apache.hudi.common.util.FSUtils;
import org.apache.hudi.common.util.HoodieAvroUtils; import org.apache.hudi.common.util.HoodieAvroUtils;
import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.Option;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import java.io.IOException;
import java.util.Map;
class RealtimeCompactedRecordReader extends AbstractRealtimeRecordReader class RealtimeCompactedRecordReader extends AbstractRealtimeRecordReader
implements RecordReader<NullWritable, ArrayWritable> { implements RecordReader<NullWritable, ArrayWritable> {

View File

@@ -18,15 +18,6 @@
package org.apache.hudi.hadoop.realtime; package org.apache.hudi.hadoop.realtime;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hudi.common.table.log.HoodieUnMergedLogRecordScanner; import org.apache.hudi.common.table.log.HoodieUnMergedLogRecordScanner;
import org.apache.hudi.common.util.DefaultSizeEstimator; import org.apache.hudi.common.util.DefaultSizeEstimator;
import org.apache.hudi.common.util.FSUtils; import org.apache.hudi.common.util.FSUtils;
@@ -38,6 +29,17 @@ import org.apache.hudi.common.util.queue.IteratorBasedQueueProducer;
import org.apache.hudi.hadoop.RecordReaderValueIterator; import org.apache.hudi.hadoop.RecordReaderValueIterator;
import org.apache.hudi.hadoop.SafeParquetRecordReaderWrapper; import org.apache.hudi.hadoop.SafeParquetRecordReaderWrapper;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
class RealtimeUnmergedRecordReader extends AbstractRealtimeRecordReader class RealtimeUnmergedRecordReader extends AbstractRealtimeRecordReader
implements RecordReader<NullWritable, ArrayWritable> { implements RecordReader<NullWritable, ArrayWritable> {

View File

@@ -18,6 +18,21 @@
package org.apache.hudi.hadoop; package org.apache.hudi.hadoop;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieTestUtils;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.util.FSUtils;
import org.apache.hudi.common.util.HoodieAvroUtils;
import org.apache.hudi.common.util.SchemaTestUtil;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
import org.apache.parquet.avro.AvroParquetWriter;
import org.junit.rules.TemporaryFolder;
import java.io.File; import java.io.File;
import java.io.FilenameFilter; import java.io.FilenameFilter;
import java.io.IOException; import java.io.IOException;
@@ -26,19 +41,6 @@ import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.UUID; import java.util.UUID;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieTestUtils;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.util.FSUtils;
import org.apache.hudi.common.util.HoodieAvroUtils;
import org.apache.hudi.common.util.SchemaTestUtil;
import org.apache.parquet.avro.AvroParquetWriter;
import org.junit.rules.TemporaryFolder;
public class InputFormatTestUtil { public class InputFormatTestUtil {

View File

@@ -18,10 +18,11 @@
package org.apache.hudi.hadoop; package org.apache.hudi.hadoop;
import static org.junit.Assert.assertTrue; import org.junit.Test;
import java.lang.annotation.Annotation; import java.lang.annotation.Annotation;
import org.junit.Test;
import static org.junit.Assert.assertTrue;
public class TestAnnotation { public class TestAnnotation {

View File

@@ -18,10 +18,8 @@
package org.apache.hudi.hadoop; package org.apache.hudi.hadoop;
import static org.junit.Assert.assertEquals; import org.apache.hudi.common.util.FSUtils;
import java.io.File;
import java.io.IOException;
import org.apache.avro.Schema; import org.apache.avro.Schema;
import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.io.ArrayWritable; import org.apache.hadoop.io.ArrayWritable;
@@ -30,12 +28,16 @@ import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.RecordReader;
import org.apache.hudi.common.util.FSUtils;
import org.junit.Before; import org.junit.Before;
import org.junit.Rule; import org.junit.Rule;
import org.junit.Test; import org.junit.Test;
import org.junit.rules.TemporaryFolder; import org.junit.rules.TemporaryFolder;
import java.io.File;
import java.io.IOException;
import static org.junit.Assert.assertEquals;
public class TestHoodieInputFormat { public class TestHoodieInputFormat {
private HoodieParquetInputFormat inputFormat; private HoodieParquetInputFormat inputFormat;

View File

@@ -18,19 +18,21 @@
package org.apache.hudi.hadoop; package org.apache.hudi.hadoop;
import static org.junit.Assert.assertFalse; import org.apache.hudi.common.HoodieCommonTestHarness;
import static org.junit.Assert.assertTrue; import org.apache.hudi.common.model.HoodieTestUtils;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hadoop.fs.Path;
import org.junit.Before;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.common.HoodieCommonTestHarness; import static org.junit.Assert.assertFalse;
import org.apache.hudi.common.model.HoodieTestUtils; import static org.junit.Assert.assertTrue;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.junit.Before;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
/** /**
* *

View File

@@ -18,16 +18,18 @@
package org.apache.hudi.hadoop; package org.apache.hudi.hadoop;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.RecordReader;
import org.junit.Assert;
import org.junit.Test;
import java.io.IOException; import java.io.IOException;
import java.util.List; import java.util.List;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.IntStream; import java.util.stream.IntStream;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hudi.common.util.collection.Pair;
import org.junit.Assert;
import org.junit.Test;
public class TestRecordReaderValueIterator { public class TestRecordReaderValueIterator {

View File

@@ -18,18 +18,26 @@
package org.apache.hudi.hadoop.realtime; package org.apache.hudi.hadoop.realtime;
import static org.junit.Assert.assertTrue; import org.apache.hudi.common.model.FileSlice;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.model.HoodieTestUtils;
import org.apache.hudi.common.table.HoodieTimeline;
import org.apache.hudi.common.table.log.HoodieLogFormat;
import org.apache.hudi.common.table.log.HoodieLogFormat.Writer;
import org.apache.hudi.common.table.log.block.HoodieAvroDataBlock;
import org.apache.hudi.common.table.log.block.HoodieCommandBlock;
import org.apache.hudi.common.table.log.block.HoodieCommandBlock.HoodieCommandBlockTypeEnum;
import org.apache.hudi.common.table.log.block.HoodieLogBlock;
import org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType;
import org.apache.hudi.common.util.FSUtils;
import org.apache.hudi.common.util.HoodieAvroUtils;
import org.apache.hudi.common.util.SchemaTestUtil;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.hadoop.InputFormatTestUtil;
import com.google.common.collect.Maps; import com.google.common.collect.Maps;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.avro.Schema; import org.apache.avro.Schema;
import org.apache.avro.Schema.Field; import org.apache.avro.Schema.Field;
import org.apache.avro.generic.IndexedRecord; import org.apache.avro.generic.IndexedRecord;
@@ -51,30 +59,25 @@ import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.RecordReader;
import org.apache.hudi.common.model.FileSlice;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.model.HoodieTestUtils;
import org.apache.hudi.common.table.HoodieTimeline;
import org.apache.hudi.common.table.log.HoodieLogFormat;
import org.apache.hudi.common.table.log.HoodieLogFormat.Writer;
import org.apache.hudi.common.table.log.block.HoodieAvroDataBlock;
import org.apache.hudi.common.table.log.block.HoodieCommandBlock;
import org.apache.hudi.common.table.log.block.HoodieCommandBlock.HoodieCommandBlockTypeEnum;
import org.apache.hudi.common.table.log.block.HoodieLogBlock;
import org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType;
import org.apache.hudi.common.util.FSUtils;
import org.apache.hudi.common.util.HoodieAvroUtils;
import org.apache.hudi.common.util.SchemaTestUtil;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.hadoop.InputFormatTestUtil;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Before; import org.junit.Before;
import org.junit.Rule; import org.junit.Rule;
import org.junit.Test; import org.junit.Test;
import org.junit.rules.TemporaryFolder; import org.junit.rules.TemporaryFolder;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import static org.junit.Assert.assertTrue;
public class TestHoodieRealtimeRecordReader { public class TestHoodieRealtimeRecordReader {
private static final String PARTITION_COLUMN = "datestr"; private static final String PARTITION_COLUMN = "datestr";

View File

@@ -18,8 +18,8 @@
package org.apache.hudi.integ; package org.apache.hudi.integ;
import static java.util.concurrent.TimeUnit.SECONDS; import org.apache.hudi.common.util.FileIOUtils;
import static org.awaitility.Awaitility.await; import org.apache.hudi.common.util.collection.Pair;
import com.github.dockerjava.api.DockerClient; import com.github.dockerjava.api.DockerClient;
import com.github.dockerjava.api.command.DockerCmdExecFactory; import com.github.dockerjava.api.command.DockerCmdExecFactory;
@@ -31,6 +31,11 @@ import com.github.dockerjava.core.DockerClientBuilder;
import com.github.dockerjava.core.DockerClientConfig; import com.github.dockerjava.core.DockerClientConfig;
import com.github.dockerjava.core.command.ExecStartResultCallback; import com.github.dockerjava.core.command.ExecStartResultCallback;
import com.github.dockerjava.jaxrs.JerseyDockerCmdExecFactory; import com.github.dockerjava.jaxrs.JerseyDockerCmdExecFactory;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.junit.Assert;
import org.junit.Before;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
@@ -38,12 +43,9 @@ import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.apache.hudi.common.util.FileIOUtils;
import org.apache.hudi.common.util.collection.Pair; import static java.util.concurrent.TimeUnit.SECONDS;
import org.apache.log4j.LogManager; import static org.awaitility.Awaitility.await;
import org.apache.log4j.Logger;
import org.junit.Assert;
import org.junit.Before;
public abstract class ITTestBase { public abstract class ITTestBase {

View File

@@ -18,11 +18,13 @@
package org.apache.hudi.integ; package org.apache.hudi.integ;
import com.google.common.collect.ImmutableList;
import java.util.List;
import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.common.util.collection.Pair;
import com.google.common.collect.ImmutableList;
import org.junit.Test; import org.junit.Test;
import java.util.List;
/** /**
* Goes through steps described in https://hudi.incubator.apache.org/docker_demo.html * Goes through steps described in https://hudi.incubator.apache.org/docker_demo.html
* *

View File

@@ -20,6 +20,7 @@ package org.apache.hudi.integ;
import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.common.model.HoodieTableType; import org.apache.hudi.common.model.HoodieTableType;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Test; import org.junit.Test;

View File

@@ -18,13 +18,15 @@
package org.apache.hudi; package org.apache.hudi;
import java.io.IOException;
import java.io.Serializable;
import org.apache.avro.generic.GenericRecord;
import org.apache.hudi.common.util.HoodieAvroUtils; import org.apache.hudi.common.util.HoodieAvroUtils;
import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.HoodieIOException;
import org.apache.avro.generic.GenericRecord;
import java.io.IOException;
import java.io.Serializable;
/** /**
* Base class for all AVRO record based payloads, that can be ordered based on a field * Base class for all AVRO record based payloads, that can be ordered based on a field
*/ */

View File

@@ -18,13 +18,15 @@
package org.apache.hudi; package org.apache.hudi;
import java.util.Arrays;
import java.util.List;
import org.apache.avro.generic.GenericRecord;
import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.exception.HoodieKeyException; import org.apache.hudi.exception.HoodieKeyException;
import org.apache.avro.generic.GenericRecord;
import java.util.Arrays;
import java.util.List;
/** /**
* Complex key generator, which takes names of fields to be used for recordKey and partitionPath as configs. * Complex key generator, which takes names of fields to be used for recordKey and partitionPath as configs.
*/ */

View File

@@ -18,14 +18,6 @@
package org.apache.hudi; package org.apache.hudi;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.avro.Schema.Field;
import org.apache.avro.generic.GenericRecord;
import org.apache.hudi.client.embedded.EmbeddedTimelineService; import org.apache.hudi.client.embedded.EmbeddedTimelineService;
import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieRecord;
@@ -43,9 +35,19 @@ import org.apache.hudi.hive.HiveSyncConfig;
import org.apache.hudi.hive.PartitionValueExtractor; import org.apache.hudi.hive.PartitionValueExtractor;
import org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor; import org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor;
import org.apache.hudi.index.HoodieIndex; import org.apache.hudi.index.HoodieIndex;
import org.apache.avro.Schema.Field;
import org.apache.avro.generic.GenericRecord;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
/** /**
* Utilities used throughout the data source * Utilities used throughout the data source
*/ */

View File

@@ -18,16 +18,18 @@
package org.apache.hudi; package org.apache.hudi;
import com.google.common.collect.Sets;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hudi.common.model.HoodieTableType; import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.HoodieTimeline; import org.apache.hudi.common.table.HoodieTimeline;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline; import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.table.timeline.HoodieInstant; import org.apache.hudi.common.table.timeline.HoodieInstant;
import com.google.common.collect.Sets;
import org.apache.hadoop.fs.FileSystem;
import java.util.List;
import java.util.stream.Collectors;
/** /**
* List of helpers to aid, construction of instanttime for read and write operations using datasource * List of helpers to aid, construction of instanttime for read and write operations using datasource
*/ */

View File

@@ -18,11 +18,13 @@
package org.apache.hudi; package org.apache.hudi;
import java.io.Serializable;
import org.apache.avro.generic.GenericRecord;
import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.common.util.TypedProperties;
import org.apache.avro.generic.GenericRecord;
import java.io.Serializable;
/** /**
* Abstract class to extend for plugging in extraction of {@link HoodieKey} from an Avro record * Abstract class to extend for plugging in extraction of {@link HoodieKey} from an Avro record
*/ */

View File

@@ -18,11 +18,12 @@
package org.apache.hudi; package org.apache.hudi;
import org.apache.avro.generic.GenericRecord;
import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.exception.HoodieKeyException; import org.apache.hudi.exception.HoodieKeyException;
import org.apache.avro.generic.GenericRecord;
/** /**
* Simple Key generator for unpartitioned Hive Tables * Simple Key generator for unpartitioned Hive Tables
*/ */

View File

@@ -18,14 +18,16 @@
package org.apache.hudi; package org.apache.hudi;
import java.io.IOException;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import org.apache.hudi.common.model.HoodieRecordPayload; import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.HoodieAvroUtils; import org.apache.hudi.common.util.HoodieAvroUtils;
import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.Option;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import java.io.IOException;
/** /**
* Default payload used for delta streamer. * Default payload used for delta streamer.
* <p> * <p>

View File

@@ -18,6 +18,16 @@
package org.apache.hudi; package org.apache.hudi;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.util.HoodieAvroUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
@@ -29,14 +39,6 @@ import java.util.UUID;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.IntStream; import java.util.stream.IntStream;
import java.util.stream.Stream; import java.util.stream.Stream;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.util.HoodieAvroUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.exception.HoodieIOException;
/** /**
* Class to be used in quickstart guide for generating inserts and updates against a corpus. Test data uses a toy Uber * Class to be used in quickstart guide for generating inserts and updates against a corpus. Test data uses a toy Uber

View File

@@ -18,11 +18,12 @@
package org.apache.hudi; package org.apache.hudi;
import org.apache.avro.generic.GenericRecord;
import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.exception.HoodieKeyException; import org.apache.hudi.exception.HoodieKeyException;
import org.apache.avro.generic.GenericRecord;
/** /**
* Simple key generator, which takes names of fields to be used for recordKey and partitionPath as configs. * Simple key generator, which takes names of fields to be used for recordKey and partitionPath as configs.
*/ */

View File

@@ -16,14 +16,15 @@
* limitations under the License. * limitations under the License.
*/ */
import java.io.IOException;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.hudi.common.TestRawTripPayload; import org.apache.hudi.common.TestRawTripPayload;
import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.Option;
import java.io.IOException;
import java.util.List;
import java.util.stream.Collectors;
/** /**
* Test utils for data source tests. * Test utils for data source tests.
*/ */

View File

@@ -16,11 +16,6 @@
* limitations under the License. * limitations under the License.
*/ */
import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hudi.DataSourceReadOptions; import org.apache.hudi.DataSourceReadOptions;
import org.apache.hudi.DataSourceWriteOptions; import org.apache.hudi.DataSourceWriteOptions;
import org.apache.hudi.HoodieDataSourceHelpers; import org.apache.hudi.HoodieDataSourceHelpers;
@@ -33,6 +28,10 @@ import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.hive.MultiPartKeysValueExtractor; import org.apache.hudi.hive.MultiPartKeysValueExtractor;
import org.apache.hudi.hive.NonPartitionedExtractor; import org.apache.hudi.hive.NonPartitionedExtractor;
import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import org.apache.hadoop.fs.FileSystem;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
@@ -42,6 +41,9 @@ import org.apache.spark.sql.Row;
import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
import java.util.ArrayList;
import java.util.List;
/** /**
* Sample program that writes & reads hoodie datasets via the Spark datasource * Sample program that writes & reads hoodie datasets via the Spark datasource
*/ */

View File

@@ -16,14 +16,6 @@
* limitations under the License. * limitations under the License.
*/ */
import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.DataSourceReadOptions; import org.apache.hudi.DataSourceReadOptions;
import org.apache.hudi.DataSourceWriteOptions; import org.apache.hudi.DataSourceWriteOptions;
import org.apache.hudi.HoodieDataSourceHelpers; import org.apache.hudi.HoodieDataSourceHelpers;
@@ -31,6 +23,11 @@ import org.apache.hudi.common.HoodieTestDataGenerator;
import org.apache.hudi.common.model.HoodieTableType; import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.hive.MultiPartKeysValueExtractor; import org.apache.hudi.hive.MultiPartKeysValueExtractor;
import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
@@ -42,6 +39,11 @@ import org.apache.spark.sql.streaming.DataStreamWriter;
import org.apache.spark.sql.streaming.OutputMode; import org.apache.spark.sql.streaming.OutputMode;
import org.apache.spark.sql.streaming.ProcessingTime; import org.apache.spark.sql.streaming.ProcessingTime;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
/** /**
* Sample program that writes & reads hoodie datasets via the Spark datasource streaming * Sample program that writes & reads hoodie datasets via the Spark datasource streaming
*/ */

View File

@@ -18,17 +18,6 @@
package org.apache.hudi.timeline.service; package org.apache.hudi.timeline.service;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Preconditions;
import io.javalin.Context;
import io.javalin.Handler;
import io.javalin.Javalin;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.hudi.common.table.HoodieTimeline; import org.apache.hudi.common.table.HoodieTimeline;
import org.apache.hudi.common.table.SyncableFileSystemView; import org.apache.hudi.common.table.SyncableFileSystemView;
import org.apache.hudi.common.table.timeline.dto.CompactionOpDTO; import org.apache.hudi.common.table.timeline.dto.CompactionOpDTO;
@@ -42,10 +31,23 @@ import org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView;
import org.apache.hudi.timeline.service.handlers.DataFileHandler; import org.apache.hudi.timeline.service.handlers.DataFileHandler;
import org.apache.hudi.timeline.service.handlers.FileSliceHandler; import org.apache.hudi.timeline.service.handlers.FileSliceHandler;
import org.apache.hudi.timeline.service.handlers.TimelineHandler; import org.apache.hudi.timeline.service.handlers.TimelineHandler;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Preconditions;
import io.javalin.Context;
import io.javalin.Handler;
import io.javalin.Javalin;
import org.apache.hadoop.conf.Configuration;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.NotNull;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
/** /**
* Main REST Handler class that handles local view staleness and delegates calls to slice/data-file/timeline handlers * Main REST Handler class that handles local view staleness and delegates calls to slice/data-file/timeline handlers
*/ */

View File

@@ -18,21 +18,23 @@
package org.apache.hudi.timeline.service; package org.apache.hudi.timeline.service;
import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import io.javalin.Javalin;
import java.io.IOException;
import java.io.Serializable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hudi.common.SerializableConfiguration; import org.apache.hudi.common.SerializableConfiguration;
import org.apache.hudi.common.table.view.FileSystemViewManager; import org.apache.hudi.common.table.view.FileSystemViewManager;
import org.apache.hudi.common.table.view.FileSystemViewStorageConfig; import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
import org.apache.hudi.common.table.view.FileSystemViewStorageType; import org.apache.hudi.common.table.view.FileSystemViewStorageType;
import org.apache.hudi.common.util.FSUtils; import org.apache.hudi.common.util.FSUtils;
import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import io.javalin.Javalin;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import java.io.IOException;
import java.io.Serializable;
/** /**
* A stand alone timeline service exposing File-System View interfaces to clients * A stand alone timeline service exposing File-System View interfaces to clients
*/ */

View File

@@ -18,14 +18,16 @@
package org.apache.hudi.timeline.service.handlers; package org.apache.hudi.timeline.service.handlers;
import org.apache.hudi.common.table.timeline.dto.DataFileDTO;
import org.apache.hudi.common.table.view.FileSystemViewManager;
import org.apache.hadoop.conf.Configuration;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.hudi.common.table.timeline.dto.DataFileDTO;
import org.apache.hudi.common.table.view.FileSystemViewManager;
/** /**
* REST Handler servicing data-file requests * REST Handler servicing data-file requests

View File

@@ -18,16 +18,18 @@
package org.apache.hudi.timeline.service.handlers; package org.apache.hudi.timeline.service.handlers;
import org.apache.hudi.common.table.timeline.dto.CompactionOpDTO;
import org.apache.hudi.common.table.timeline.dto.FileGroupDTO;
import org.apache.hudi.common.table.timeline.dto.FileSliceDTO;
import org.apache.hudi.common.table.view.FileSystemViewManager;
import org.apache.hadoop.conf.Configuration;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.hudi.common.table.timeline.dto.CompactionOpDTO;
import org.apache.hudi.common.table.timeline.dto.FileGroupDTO;
import org.apache.hudi.common.table.timeline.dto.FileSliceDTO;
import org.apache.hudi.common.table.view.FileSystemViewManager;
/** /**
* REST Handler servicing file-slice requests * REST Handler servicing file-slice requests

View File

@@ -18,10 +18,12 @@
package org.apache.hudi.timeline.service.handlers; package org.apache.hudi.timeline.service.handlers;
import java.io.IOException; import org.apache.hudi.common.table.view.FileSystemViewManager;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hudi.common.table.view.FileSystemViewManager;
import java.io.IOException;
public abstract class Handler { public abstract class Handler {

View File

@@ -18,14 +18,16 @@
package org.apache.hudi.timeline.service.handlers; package org.apache.hudi.timeline.service.handlers;
import org.apache.hudi.common.table.timeline.dto.InstantDTO;
import org.apache.hudi.common.table.timeline.dto.TimelineDTO;
import org.apache.hudi.common.table.view.FileSystemViewManager;
import org.apache.hadoop.conf.Configuration;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hudi.common.table.timeline.dto.InstantDTO;
import org.apache.hudi.common.table.timeline.dto.TimelineDTO;
import org.apache.hudi.common.table.view.FileSystemViewManager;
/** /**
* REST Handler servicing timeline requests * REST Handler servicing timeline requests

View File

@@ -27,6 +27,7 @@ import org.apache.hudi.common.table.view.FileSystemViewStorageType;
import org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView; import org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView;
import org.apache.hudi.common.table.view.TestHoodieTableFileSystemView; import org.apache.hudi.common.table.view.TestHoodieTableFileSystemView;
import org.apache.hudi.timeline.service.TimelineService; import org.apache.hudi.timeline.service.TimelineService;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;

View File

@@ -18,26 +18,6 @@
package org.apache.hudi.utilities; package org.apache.hudi.utilities;
import com.beust.jcommander.IValueValidator;
import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import com.beust.jcommander.ParameterException;
import com.google.common.annotations.VisibleForTesting;
import java.io.IOException;
import java.io.Serializable;
import java.time.Instant;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Properties;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hudi.HoodieWriteClient; import org.apache.hudi.HoodieWriteClient;
import org.apache.hudi.WriteStatus; import org.apache.hudi.WriteStatus;
import org.apache.hudi.common.HoodieJsonPayload; import org.apache.hudi.common.HoodieJsonPayload;
@@ -50,12 +30,35 @@ import org.apache.hudi.common.util.FSUtils;
import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.HoodieIOException;
import com.beust.jcommander.IValueValidator;
import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import com.beust.jcommander.ParameterException;
import com.google.common.annotations.VisibleForTesting;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.apache.parquet.avro.AvroReadSupport; import org.apache.parquet.avro.AvroReadSupport;
import org.apache.parquet.hadoop.ParquetInputFormat; import org.apache.parquet.hadoop.ParquetInputFormat;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import java.io.IOException;
import java.io.Serializable;
import java.time.Instant;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Properties;
import scala.Tuple2; import scala.Tuple2;
/** /**

View File

@@ -18,8 +18,25 @@
package org.apache.hudi.utilities; package org.apache.hudi.utilities;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.util.FileIOUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.utilities.exception.HoodieIncrementalPullException;
import org.apache.hudi.utilities.exception.HoodieIncrementalPullSQLException;
import com.beust.jcommander.JCommander; import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter; import com.beust.jcommander.Parameter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.stringtemplate.v4.ST;
import java.io.File; import java.io.File;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
@@ -32,21 +49,6 @@ import java.sql.Statement;
import java.util.List; import java.util.List;
import java.util.Scanner; import java.util.Scanner;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.util.FileIOUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.utilities.exception.HoodieIncrementalPullException;
import org.apache.hudi.utilities.exception.HoodieIncrementalPullSQLException;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.stringtemplate.v4.ST;
/** /**
* Utility to pull data after a given commit, based on the supplied HiveQL and save the delta as another hive temporary * Utility to pull data after a given commit, based on the supplied HiveQL and save the delta as another hive temporary

View File

@@ -18,22 +18,24 @@
package org.apache.hudi.utilities; package org.apache.hudi.utilities;
import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.HoodieWriteClient; import org.apache.hudi.HoodieWriteClient;
import org.apache.hudi.common.util.FSUtils; import org.apache.hudi.common.util.FSUtils;
import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.config.HoodieWriteConfig;
import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
public class HoodieCleaner { public class HoodieCleaner {
private static volatile Logger log = LogManager.getLogger(HoodieCleaner.class); private static volatile Logger log = LogManager.getLogger(HoodieCleaner.class);

View File

@@ -18,22 +18,24 @@
package org.apache.hudi.utilities; package org.apache.hudi.utilities;
import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.List;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.CompactionAdminClient; import org.apache.hudi.CompactionAdminClient;
import org.apache.hudi.CompactionAdminClient.RenameOpResult; import org.apache.hudi.CompactionAdminClient.RenameOpResult;
import org.apache.hudi.CompactionAdminClient.ValidationOpResult; import org.apache.hudi.CompactionAdminClient.ValidationOpResult;
import org.apache.hudi.common.model.HoodieFileGroupId; import org.apache.hudi.common.model.HoodieFileGroupId;
import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.util.FSUtils; import org.apache.hudi.common.util.FSUtils;
import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.List;
public class HoodieCompactionAdminTool { public class HoodieCompactionAdminTool {
private final Config cfg; private final Config cfg;

View File

@@ -18,23 +18,25 @@
package org.apache.hudi.utilities; package org.apache.hudi.utilities;
import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.HoodieWriteClient; import org.apache.hudi.HoodieWriteClient;
import org.apache.hudi.WriteStatus; import org.apache.hudi.WriteStatus;
import org.apache.hudi.common.util.FSUtils; import org.apache.hudi.common.util.FSUtils;
import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.common.util.TypedProperties;
import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
public class HoodieCompactor { public class HoodieCompactor {
private static volatile Logger logger = LogManager.getLogger(HoodieCompactor.class); private static volatile Logger logger = LogManager.getLogger(HoodieCompactor.class);

View File

@@ -18,17 +18,6 @@
package org.apache.hudi.utilities; package org.apache.hudi.utilities;
import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Stream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.common.SerializableConfiguration; import org.apache.hudi.common.SerializableConfiguration;
import org.apache.hudi.common.model.HoodieDataFile; import org.apache.hudi.common.model.HoodieDataFile;
import org.apache.hudi.common.model.HoodiePartitionMetadata; import org.apache.hudi.common.model.HoodiePartitionMetadata;
@@ -40,10 +29,24 @@ import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.view.HoodieTableFileSystemView; import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
import org.apache.hudi.common.util.FSUtils; import org.apache.hudi.common.util.FSUtils;
import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.Option;
import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Stream;
import scala.Tuple2; import scala.Tuple2;
/** /**

View File

@@ -22,6 +22,12 @@ import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter; import com.beust.jcommander.Parameter;
import com.google.common.base.Preconditions; import com.google.common.base.Preconditions;
import io.javalin.Javalin; import io.javalin.Javalin;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.spark.api.java.JavaSparkContext;
import java.io.BufferedReader; import java.io.BufferedReader;
import java.io.IOException; import java.io.IOException;
import java.io.InputStreamReader; import java.io.InputStreamReader;
@@ -31,11 +37,6 @@ import java.net.UnknownHostException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.stream.IntStream; import java.util.stream.IntStream;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.spark.api.java.JavaSparkContext;
public class HoodieWithTimelineServer implements Serializable { public class HoodieWithTimelineServer implements Serializable {

View File

@@ -18,18 +18,6 @@
package org.apache.hudi.utilities; package org.apache.hudi.utilities;
import com.google.common.base.Preconditions;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.nio.ByteBuffer;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.HoodieWriteClient; import org.apache.hudi.HoodieWriteClient;
import org.apache.hudi.WriteStatus; import org.apache.hudi.WriteStatus;
import org.apache.hudi.common.util.DFSPropertiesConfiguration; import org.apache.hudi.common.util.DFSPropertiesConfiguration;
@@ -44,6 +32,11 @@ import org.apache.hudi.index.HoodieIndex;
import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.hudi.utilities.sources.Source; import org.apache.hudi.utilities.sources.Source;
import org.apache.hudi.utilities.transform.Transformer; import org.apache.hudi.utilities.transform.Transformer;
import com.google.common.base.Preconditions;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.apache.spark.Accumulator; import org.apache.spark.Accumulator;
@@ -52,6 +45,15 @@ import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.nio.ByteBuffer;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/** /**
* Bunch of helper methods * Bunch of helper methods
*/ */

View File

@@ -18,8 +18,19 @@
package org.apache.hudi.utilities.adhoc; package org.apache.hudi.utilities.adhoc;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.util.FSUtils;
import org.apache.hudi.config.HoodieCompactionConfig;
import com.beust.jcommander.JCommander; import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter; import com.beust.jcommander.Parameter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import java.io.BufferedReader; import java.io.BufferedReader;
import java.io.FileReader; import java.io.FileReader;
import java.io.IOException; import java.io.IOException;
@@ -27,15 +38,6 @@ import java.io.Serializable;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import java.util.Properties; import java.util.Properties;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.util.FSUtils;
import org.apache.hudi.config.HoodieCompactionConfig;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
/** /**
* This is an one-time use class meant for migrating the configuration for "hoodie.compaction.payload.class" in * This is an one-time use class meant for migrating the configuration for "hoodie.compaction.payload.class" in

View File

@@ -18,6 +18,11 @@
package org.apache.hudi.utilities.deltastreamer; package org.apache.hudi.utilities.deltastreamer;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import java.io.Serializable; import java.io.Serializable;
import java.util.concurrent.CompletableFuture; import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutionException;
@@ -25,9 +30,6 @@ import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors; import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.function.Function; import java.util.function.Function;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
/** /**
* Base Class for running delta-sync/compaction in separate thread and controlling their life-cyle * Base Class for running delta-sync/compaction in separate thread and controlling their life-cyle

View File

@@ -18,18 +18,20 @@
package org.apache.hudi.utilities.deltastreamer; package org.apache.hudi.utilities.deltastreamer;
import java.io.IOException;
import java.io.Serializable;
import org.apache.hudi.HoodieWriteClient; import org.apache.hudi.HoodieWriteClient;
import org.apache.hudi.WriteStatus; import org.apache.hudi.WriteStatus;
import org.apache.hudi.common.table.timeline.HoodieInstant; import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.Option;
import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieException;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import java.io.IOException;
import java.io.Serializable;
/** /**
* Run one round of compaction * Run one round of compaction
*/ */

View File

@@ -18,25 +18,6 @@
package org.apache.hudi.utilities.deltastreamer; package org.apache.hudi.utilities.deltastreamer;
import static org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_NAMESPACE;
import static org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_STRUCT_NAME;
import com.codahale.metrics.Timer;
import com.google.common.base.Preconditions;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Objects;
import java.util.function.Function;
import java.util.stream.Collectors;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hudi.AvroConversionUtils; import org.apache.hudi.AvroConversionUtils;
import org.apache.hudi.DataSourceUtils; import org.apache.hudi.DataSourceUtils;
import org.apache.hudi.HoodieWriteClient; import org.apache.hudi.HoodieWriteClient;
@@ -66,6 +47,15 @@ import org.apache.hudi.utilities.schema.RowBasedSchemaProvider;
import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.hudi.utilities.sources.InputBatch; import org.apache.hudi.utilities.sources.InputBatch;
import org.apache.hudi.utilities.transform.Transformer; import org.apache.hudi.utilities.transform.Transformer;
import com.codahale.metrics.Timer;
import com.google.common.base.Preconditions;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
@@ -73,8 +63,22 @@ import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row; import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Objects;
import java.util.function.Function;
import java.util.stream.Collectors;
import scala.collection.JavaConversions; import scala.collection.JavaConversions;
import static org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_NAMESPACE;
import static org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_STRUCT_NAME;
/** /**
* Sync's one batch of data to hoodie dataset * Sync's one batch of data to hoodie dataset
*/ */

View File

@@ -18,30 +18,6 @@
package org.apache.hudi.utilities.deltastreamer; package org.apache.hudi.utilities.deltastreamer;
import com.beust.jcommander.IStringConverter;
import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import com.beust.jcommander.ParameterException;
import com.google.common.base.Preconditions;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.ReentrantLock;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hudi.HoodieWriteClient; import org.apache.hudi.HoodieWriteClient;
import org.apache.hudi.OverwriteWithLatestAvroPayload; import org.apache.hudi.OverwriteWithLatestAvroPayload;
import org.apache.hudi.common.model.HoodieTableType; import org.apache.hudi.common.model.HoodieTableType;
@@ -60,11 +36,37 @@ import org.apache.hudi.utilities.HiveIncrementalPuller;
import org.apache.hudi.utilities.UtilHelpers; import org.apache.hudi.utilities.UtilHelpers;
import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.hudi.utilities.sources.JsonDFSSource; import org.apache.hudi.utilities.sources.JsonDFSSource;
import com.beust.jcommander.IStringConverter;
import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import com.beust.jcommander.ParameterException;
import com.google.common.base.Preconditions;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.ReentrantLock;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
/** /**
* An Utility which can incrementally take the output from {@link HiveIncrementalPuller} and apply it to the target * An Utility which can incrementally take the output from {@link HiveIncrementalPuller} and apply it to the target
* dataset. Does not maintain any state, queries at runtime to see how far behind the target dataset is from the source * dataset. Does not maintain any state, queries at runtime to see how far behind the target dataset is from the source

View File

@@ -18,10 +18,11 @@
package org.apache.hudi.utilities.deltastreamer; package org.apache.hudi.utilities.deltastreamer;
import com.codahale.metrics.Timer;
import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.metrics.Metrics; import org.apache.hudi.metrics.Metrics;
import com.codahale.metrics.Timer;
public class HoodieDeltaStreamerMetrics { public class HoodieDeltaStreamerMetrics {
private HoodieWriteConfig config = null; private HoodieWriteConfig config = null;

View File

@@ -18,6 +18,13 @@
package org.apache.hudi.utilities.deltastreamer; package org.apache.hudi.utilities.deltastreamer;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.util.Option;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.SparkConf;
import java.io.BufferedWriter; import java.io.BufferedWriter;
import java.io.File; import java.io.File;
import java.io.FileWriter; import java.io.FileWriter;
@@ -25,11 +32,6 @@ import java.io.IOException;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import java.util.UUID; import java.util.UUID;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.util.Option;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.SparkConf;
/** /**
* Utility Class to generate Spark Scheduling allocation file. This kicks in only when user sets * Utility Class to generate Spark Scheduling allocation file. This kicks in only when user sets

View File

@@ -18,11 +18,6 @@
package org.apache.hudi.utilities.deltastreamer; package org.apache.hudi.utilities.deltastreamer;
import static org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_NAMESPACE;
import static org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_STRUCT_NAME;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.hudi.AvroConversionUtils; import org.apache.hudi.AvroConversionUtils;
import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.Option;
import org.apache.hudi.utilities.sources.AvroSource; import org.apache.hudi.utilities.sources.AvroSource;
@@ -32,11 +27,17 @@ import org.apache.hudi.utilities.sources.ParquetSource;
import org.apache.hudi.utilities.sources.RowSource; import org.apache.hudi.utilities.sources.RowSource;
import org.apache.hudi.utilities.sources.Source; import org.apache.hudi.utilities.sources.Source;
import org.apache.hudi.utilities.sources.helpers.AvroConvertor; import org.apache.hudi.utilities.sources.helpers.AvroConvertor;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row; import org.apache.spark.sql.Row;
import org.apache.spark.sql.types.StructType; import org.apache.spark.sql.types.StructType;
import static org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_NAMESPACE;
import static org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_STRUCT_NAME;
/** /**
* Adapts data-format provided by the source to the data-format required by the client (DeltaStreamer) * Adapts data-format provided by the source to the data-format required by the client (DeltaStreamer)
*/ */

View File

@@ -18,9 +18,10 @@
package org.apache.hudi.utilities.exception; package org.apache.hudi.utilities.exception;
import java.sql.SQLException;
import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieException;
import java.sql.SQLException;
public class HoodieIncrementalPullException extends HoodieException { public class HoodieIncrementalPullException extends HoodieException {
public HoodieIncrementalPullException(String msg, SQLException e) { public HoodieIncrementalPullException(String msg, SQLException e) {

View File

@@ -18,14 +18,6 @@
package org.apache.hudi.utilities.keygen; package org.apache.hudi.utilities.keygen;
import java.io.Serializable;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.TimeZone;
import org.apache.avro.generic.GenericRecord;
import org.apache.hudi.DataSourceUtils; import org.apache.hudi.DataSourceUtils;
import org.apache.hudi.SimpleKeyGenerator; import org.apache.hudi.SimpleKeyGenerator;
import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.model.HoodieKey;
@@ -34,6 +26,16 @@ import org.apache.hudi.exception.HoodieKeyException;
import org.apache.hudi.exception.HoodieNotSupportedException; import org.apache.hudi.exception.HoodieNotSupportedException;
import org.apache.hudi.utilities.exception.HoodieDeltaStreamerException; import org.apache.hudi.utilities.exception.HoodieDeltaStreamerException;
import org.apache.avro.generic.GenericRecord;
import java.io.Serializable;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.TimeZone;
/** /**
* Key generator, that relies on timestamps for partitioning field. Still picks record key by name. * Key generator, that relies on timestamps for partitioning field. Still picks record key by name.
*/ */

View File

@@ -18,11 +18,30 @@
package org.apache.hudi.utilities.perf; package org.apache.hudi.utilities.perf;
import org.apache.hudi.common.model.FileSlice;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.SyncableFileSystemView;
import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
import org.apache.hudi.common.table.view.FileSystemViewStorageType;
import org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView;
import org.apache.hudi.common.util.FSUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.timeline.service.TimelineService;
import org.apache.hudi.utilities.UtilHelpers;
import com.beust.jcommander.JCommander; import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter; import com.beust.jcommander.Parameter;
import com.codahale.metrics.Histogram; import com.codahale.metrics.Histogram;
import com.codahale.metrics.Snapshot; import com.codahale.metrics.Snapshot;
import com.codahale.metrics.UniformReservoir; import com.codahale.metrics.UniformReservoir;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import java.io.IOException; import java.io.IOException;
import java.io.Serializable; import java.io.Serializable;
import java.util.ArrayList; import java.util.ArrayList;
@@ -36,23 +55,6 @@ import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.IntStream; import java.util.stream.IntStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.common.model.FileSlice;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.SyncableFileSystemView;
import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
import org.apache.hudi.common.table.view.FileSystemViewStorageType;
import org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView;
import org.apache.hudi.common.util.FSUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.timeline.service.TimelineService;
import org.apache.hudi.utilities.UtilHelpers;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
public class TimelineServerPerf implements Serializable { public class TimelineServerPerf implements Serializable {

View File

@@ -18,17 +18,19 @@
package org.apache.hudi.utilities.schema; package org.apache.hudi.utilities.schema;
import java.io.IOException;
import java.util.Collections;
import org.apache.avro.Schema;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.DataSourceUtils; import org.apache.hudi.DataSourceUtils;
import org.apache.hudi.common.util.FSUtils; import org.apache.hudi.common.util.FSUtils;
import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.HoodieIOException;
import org.apache.avro.Schema;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import java.io.IOException;
import java.util.Collections;
/** /**
* A simple schema provider, that reads off files on DFS * A simple schema provider, that reads off files on DFS
*/ */

View File

@@ -18,8 +18,9 @@
package org.apache.hudi.utilities.schema; package org.apache.hudi.utilities.schema;
import org.apache.avro.Schema;
import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.common.util.TypedProperties;
import org.apache.avro.Schema;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
/** /**

View File

@@ -18,8 +18,9 @@
package org.apache.hudi.utilities.schema; package org.apache.hudi.utilities.schema;
import org.apache.avro.Schema;
import org.apache.hudi.AvroConversionUtils; import org.apache.hudi.AvroConversionUtils;
import org.apache.avro.Schema;
import org.apache.spark.sql.types.StructType; import org.apache.spark.sql.types.StructType;
public class RowBasedSchemaProvider extends SchemaProvider { public class RowBasedSchemaProvider extends SchemaProvider {

View File

@@ -18,11 +18,13 @@
package org.apache.hudi.utilities.schema; package org.apache.hudi.utilities.schema;
import java.io.Serializable;
import org.apache.avro.Schema;
import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.common.util.TypedProperties;
import org.apache.avro.Schema;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import java.io.Serializable;
/** /**
* Class to provide schema for reading data and also writing into a Hoodie table * Class to provide schema for reading data and also writing into a Hoodie table
*/ */

View File

@@ -18,17 +18,19 @@
package org.apache.hudi.utilities.schema; package org.apache.hudi.utilities.schema;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.io.IOException;
import java.net.URL;
import java.util.Collections;
import org.apache.avro.Schema;
import org.apache.hudi.DataSourceUtils; import org.apache.hudi.DataSourceUtils;
import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.HoodieIOException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.avro.Schema;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import java.io.IOException;
import java.net.URL;
import java.util.Collections;
/** /**
* Obtains latest schema from the Confluent/Kafka schema-registry * Obtains latest schema from the Confluent/Kafka schema-registry
* *

View File

@@ -18,15 +18,16 @@
package org.apache.hudi.utilities.sources; package org.apache.hudi.utilities.sources;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.mapred.AvroKey;
import org.apache.avro.mapreduce.AvroKeyInputFormat;
import org.apache.hadoop.io.NullWritable;
import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.hudi.utilities.sources.helpers.DFSPathSelector; import org.apache.hudi.utilities.sources.helpers.DFSPathSelector;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.mapred.AvroKey;
import org.apache.avro.mapreduce.AvroKeyInputFormat;
import org.apache.hadoop.io.NullWritable;
import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;

View File

@@ -18,14 +18,15 @@
package org.apache.hudi.utilities.sources; package org.apache.hudi.utilities.sources;
import io.confluent.kafka.serializers.KafkaAvroDecoder;
import kafka.serializer.StringDecoder;
import org.apache.avro.generic.GenericRecord;
import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen; import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen;
import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen.CheckpointUtils; import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen.CheckpointUtils;
import io.confluent.kafka.serializers.KafkaAvroDecoder;
import kafka.serializer.StringDecoder;
import org.apache.avro.generic.GenericRecord;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;

View File

@@ -18,9 +18,10 @@
package org.apache.hudi.utilities.sources; package org.apache.hudi.utilities.sources;
import org.apache.avro.generic.GenericRecord;
import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.avro.generic.GenericRecord;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;

View File

@@ -18,19 +18,6 @@
package org.apache.hudi.utilities.sources; package org.apache.hudi.utilities.sources;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.mapred.AvroKey;
import org.apache.avro.mapreduce.AvroKeyInputFormat;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hudi.DataSourceUtils; import org.apache.hudi.DataSourceUtils;
import org.apache.hudi.common.util.FSUtils; import org.apache.hudi.common.util.FSUtils;
import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.Option;
@@ -38,6 +25,14 @@ import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.utilities.HiveIncrementalPuller; import org.apache.hudi.utilities.HiveIncrementalPuller;
import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.mapred.AvroKey;
import org.apache.avro.mapreduce.AvroKeyInputFormat;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaPairRDD;
@@ -45,6 +40,13 @@ import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
/** /**
* Source to read deltas produced by {@link HiveIncrementalPuller}, commit by commit and apply to the target table * Source to read deltas produced by {@link HiveIncrementalPuller}, commit by commit and apply to the target table
* <p> * <p>

View File

@@ -18,7 +18,6 @@
package org.apache.hudi.utilities.sources; package org.apache.hudi.utilities.sources;
import java.util.Arrays;
import org.apache.hudi.DataSourceReadOptions; import org.apache.hudi.DataSourceReadOptions;
import org.apache.hudi.DataSourceUtils; import org.apache.hudi.DataSourceUtils;
import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieRecord;
@@ -28,12 +27,15 @@ import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor; import org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor;
import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.hudi.utilities.sources.helpers.IncrSourceHelper; import org.apache.hudi.utilities.sources.helpers.IncrSourceHelper;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.DataFrameReader; import org.apache.spark.sql.DataFrameReader;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row; import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
import java.util.Arrays;
public class HoodieIncrSource extends RowSource { public class HoodieIncrSource extends RowSource {
protected static class Config { protected static class Config {

View File

@@ -23,6 +23,7 @@ import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.hudi.utilities.sources.helpers.DFSPathSelector; import org.apache.hudi.utilities.sources.helpers.DFSPathSelector;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;

View File

@@ -18,12 +18,13 @@
package org.apache.hudi.utilities.sources; package org.apache.hudi.utilities.sources;
import kafka.serializer.StringDecoder;
import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen; import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen;
import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen.CheckpointUtils; import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen.CheckpointUtils;
import kafka.serializer.StringDecoder;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;

View File

@@ -20,6 +20,7 @@ package org.apache.hudi.utilities.sources;
import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;

View File

@@ -18,12 +18,13 @@
package org.apache.hudi.utilities.sources; package org.apache.hudi.utilities.sources;
import org.apache.avro.generic.GenericRecord;
import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.hudi.utilities.sources.helpers.DFSPathSelector; import org.apache.hudi.utilities.sources.helpers.DFSPathSelector;
import org.apache.avro.generic.GenericRecord;
import org.apache.parquet.avro.AvroParquetInputFormat; import org.apache.parquet.avro.AvroParquetInputFormat;
import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;

View File

@@ -18,9 +18,10 @@
package org.apache.hudi.utilities.sources; package org.apache.hudi.utilities.sources;
import org.apache.avro.generic.GenericRecord;
import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.avro.generic.GenericRecord;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;

View File

@@ -23,6 +23,7 @@ import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.utilities.schema.RowBasedSchemaProvider; import org.apache.hudi.utilities.schema.RowBasedSchemaProvider;
import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row; import org.apache.spark.sql.Row;

View File

@@ -18,15 +18,17 @@
package org.apache.hudi.utilities.sources; package org.apache.hudi.utilities.sources;
import java.io.Serializable;
import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
import java.io.Serializable;
/** /**
* Represents a source from which we can tail data. Assumes a constructor that takes properties. * Represents a source from which we can tail data. Assumes a constructor that takes properties.
*/ */

View File

@@ -18,13 +18,15 @@
package org.apache.hudi.utilities.sources.helpers; package org.apache.hudi.utilities.sources.helpers;
import org.apache.hudi.avro.MercifulJsonConverter;
import com.twitter.bijection.Injection; import com.twitter.bijection.Injection;
import com.twitter.bijection.avro.GenericAvroCodecs; import com.twitter.bijection.avro.GenericAvroCodecs;
import java.io.IOException;
import java.io.Serializable;
import org.apache.avro.Schema; import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord; import org.apache.avro.generic.GenericRecord;
import org.apache.hudi.avro.MercifulJsonConverter;
import java.io.IOException;
import java.io.Serializable;
/** /**
* Convert a variety of datum into Avro GenericRecords. Has a bunch of lazy fields to circumvent issues around * Convert a variety of datum into Avro GenericRecords. Has a bunch of lazy fields to circumvent issues around

View File

@@ -18,18 +18,6 @@
package org.apache.hudi.utilities.sources.helpers; package org.apache.hudi.utilities.sources.helpers;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hudi.DataSourceUtils; import org.apache.hudi.DataSourceUtils;
import org.apache.hudi.common.util.FSUtils; import org.apache.hudi.common.util.FSUtils;
import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.Option;
@@ -38,6 +26,20 @@ import org.apache.hudi.common.util.collection.ImmutablePair;
import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.HoodieIOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.stream.Collectors;
public class DFSPathSelector { public class DFSPathSelector {
/** /**

View File

@@ -18,12 +18,13 @@
package org.apache.hudi.utilities.sources.helpers; package org.apache.hudi.utilities.sources.helpers;
import com.google.common.base.Preconditions;
import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.HoodieTimeline; import org.apache.hudi.common.table.HoodieTimeline;
import org.apache.hudi.common.table.timeline.HoodieInstant; import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.common.util.collection.Pair;
import com.google.common.base.Preconditions;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Row; import org.apache.spark.sql.Row;

View File

@@ -18,23 +18,26 @@
package org.apache.hudi.utilities.sources.helpers; package org.apache.hudi.utilities.sources.helpers;
import org.apache.hudi.DataSourceUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.exception.HoodieNotSupportedException;
import org.apache.hudi.utilities.exception.HoodieDeltaStreamerException;
import kafka.common.TopicAndPartition;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.streaming.kafka.KafkaCluster;
import org.apache.spark.streaming.kafka.KafkaCluster.LeaderOffset;
import org.apache.spark.streaming.kafka.OffsetRange;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.Comparator; import java.util.Comparator;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import kafka.common.TopicAndPartition;
import org.apache.hudi.DataSourceUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.exception.HoodieNotSupportedException;
import org.apache.hudi.utilities.exception.HoodieDeltaStreamerException;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.streaming.kafka.KafkaCluster;
import org.apache.spark.streaming.kafka.KafkaCluster.LeaderOffset;
import org.apache.spark.streaming.kafka.OffsetRange;
import scala.Predef; import scala.Predef;
import scala.collection.JavaConverters; import scala.collection.JavaConverters;
import scala.collection.immutable.Map; import scala.collection.immutable.Map;

View File

@@ -18,8 +18,8 @@
package org.apache.hudi.utilities.transform; package org.apache.hudi.utilities.transform;
import java.util.UUID;
import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.common.util.TypedProperties;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
@@ -29,6 +29,8 @@ import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType; import org.apache.spark.sql.types.StructType;
import java.util.UUID;
/** /**
* Transformer that can flatten nested objects. It currently doesn't unnest arrays. * Transformer that can flatten nested objects. It currently doesn't unnest arrays.
*/ */

View File

@@ -19,6 +19,7 @@
package org.apache.hudi.utilities.transform; package org.apache.hudi.utilities.transform;
import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.common.util.TypedProperties;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row; import org.apache.spark.sql.Row;

View File

@@ -18,8 +18,8 @@
package org.apache.hudi.utilities.transform; package org.apache.hudi.utilities.transform;
import java.util.UUID;
import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.common.util.TypedProperties;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
@@ -27,6 +27,8 @@ import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row; import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
import java.util.UUID;
/** /**
* A transformer that allows a sql-query template be used to transform the source before writing to Hudi data-set. * A transformer that allows a sql-query template be used to transform the source before writing to Hudi data-set.
* *

View File

@@ -19,6 +19,7 @@
package org.apache.hudi.utilities.transform; package org.apache.hudi.utilities.transform;
import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.common.util.TypedProperties;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row; import org.apache.spark.sql.Row;

View File

@@ -18,15 +18,16 @@
package org.apache.hudi.utilities; package org.apache.hudi.utilities;
import static org.junit.Assert.assertEquals;
import org.apache.hudi.utilities.transform.FlatteningTransformer; import org.apache.hudi.utilities.transform.FlatteningTransformer;
import org.apache.spark.sql.types.DataTypes; import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.Metadata; import org.apache.spark.sql.types.Metadata;
import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType; import org.apache.spark.sql.types.StructType;
import org.junit.Test; import org.junit.Test;
import static org.junit.Assert.assertEquals;
public class TestFlatteningTransformer { public class TestFlatteningTransformer {
@Test @Test

View File

@@ -18,8 +18,29 @@
package org.apache.hudi.utilities; package org.apache.hudi.utilities;
import static org.junit.Assert.assertEquals; import org.apache.hudi.HoodieReadClient;
import static org.junit.Assert.assertTrue; import org.apache.hudi.HoodieWriteClient;
import org.apache.hudi.common.HoodieTestDataGenerator;
import org.apache.hudi.common.minicluster.HdfsTestService;
import org.apache.hudi.common.model.HoodieTestUtils;
import org.apache.hudi.common.table.HoodieTimeline;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.parquet.avro.AvroParquetWriter;
import org.apache.parquet.hadoop.ParquetWriter;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SQLContext;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.IOException; import java.io.IOException;
import java.io.Serializable; import java.io.Serializable;
@@ -31,28 +52,9 @@ import java.util.Map;
import java.util.Map.Entry; import java.util.Map.Entry;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.fs.FSDataOutputStream; import static org.junit.Assert.assertEquals;
import org.apache.hadoop.fs.LocatedFileStatus; import static org.junit.Assert.assertTrue;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hudi.HoodieReadClient;
import org.apache.hudi.HoodieWriteClient;
import org.apache.hudi.common.HoodieTestDataGenerator;
import org.apache.hudi.common.minicluster.HdfsTestService;
import org.apache.hudi.common.model.HoodieTestUtils;
import org.apache.hudi.common.table.HoodieTimeline;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.parquet.avro.AvroParquetWriter;
import org.apache.parquet.hadoop.ParquetWriter;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SQLContext;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
public class TestHDFSParquetImporter implements Serializable { public class TestHDFSParquetImporter implements Serializable {

View File

@@ -18,23 +18,6 @@
package org.apache.hudi.utilities; package org.apache.hudi.utilities;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.function.Function;
import java.util.stream.Collectors;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.DataSourceWriteOptions; import org.apache.hudi.DataSourceWriteOptions;
import org.apache.hudi.SimpleKeyGenerator; import org.apache.hudi.SimpleKeyGenerator;
import org.apache.hudi.common.model.HoodieCommitMetadata; import org.apache.hudi.common.model.HoodieCommitMetadata;
@@ -62,6 +45,11 @@ import org.apache.hudi.utilities.sources.TestDataSource;
import org.apache.hudi.utilities.sources.config.TestSourceConfig; import org.apache.hudi.utilities.sources.config.TestSourceConfig;
import org.apache.hudi.utilities.transform.SqlQueryBasedTransformer; import org.apache.hudi.utilities.transform.SqlQueryBasedTransformer;
import org.apache.hudi.utilities.transform.Transformer; import org.apache.hudi.utilities.transform.Transformer;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
@@ -80,6 +68,20 @@ import org.junit.Before;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.function.Function;
import java.util.stream.Collectors;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
/** /**
* Basic tests against {@link HoodieDeltaStreamer}, by issuing bulk_inserts, upserts, inserts. Check counts at the end. * Basic tests against {@link HoodieDeltaStreamer}, by issuing bulk_inserts, upserts, inserts. Check counts at the end.
*/ */

View File

@@ -18,25 +18,27 @@
package org.apache.hudi.utilities; package org.apache.hudi.utilities;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.io.File;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.common.HoodieCommonTestHarness; import org.apache.hudi.common.HoodieCommonTestHarness;
import org.apache.hudi.common.HoodieTestDataGenerator; import org.apache.hudi.common.HoodieTestDataGenerator;
import org.apache.hudi.common.model.HoodieTestUtils; import org.apache.hudi.common.model.HoodieTestUtils;
import org.apache.hudi.common.util.FSUtils; import org.apache.hudi.common.util.FSUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.junit.After; import org.junit.After;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
import java.io.File;
import java.io.IOException;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
public class TestHoodieSnapshotCopier extends HoodieCommonTestHarness { public class TestHoodieSnapshotCopier extends HoodieCommonTestHarness {
private static String TEST_WRITE_TOKEN = "1-0-1"; private static String TEST_WRITE_TOKEN = "1-0-1";

View File

@@ -18,15 +18,17 @@
package org.apache.hudi.utilities; package org.apache.hudi.utilities;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import java.util.Map;
import org.apache.hudi.common.model.HoodieTableType; import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer; import org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer;
import org.apache.hudi.utilities.deltastreamer.SchedulerConfGenerator; import org.apache.hudi.utilities.deltastreamer.SchedulerConfGenerator;
import org.junit.Test; import org.junit.Test;
import java.util.Map;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
public class TestSchedulerConfGenerator { public class TestSchedulerConfGenerator {
@Test @Test

View File

@@ -18,21 +18,6 @@
package org.apache.hudi.utilities; package org.apache.hudi.utilities;
import com.google.common.collect.ImmutableList;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hive.service.server.HiveServer2;
import org.apache.hudi.common.HoodieTestDataGenerator; import org.apache.hudi.common.HoodieTestDataGenerator;
import org.apache.hudi.common.TestRawTripPayload; import org.apache.hudi.common.TestRawTripPayload;
import org.apache.hudi.common.minicluster.HdfsTestService; import org.apache.hudi.common.minicluster.HdfsTestService;
@@ -46,6 +31,16 @@ import org.apache.hudi.hive.HiveSyncConfig;
import org.apache.hudi.hive.HoodieHiveClient; import org.apache.hudi.hive.HoodieHiveClient;
import org.apache.hudi.hive.util.HiveTestService; import org.apache.hudi.hive.util.HiveTestService;
import org.apache.hudi.utilities.sources.TestDataSource; import org.apache.hudi.utilities.sources.TestDataSource;
import com.google.common.collect.ImmutableList;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hive.service.server.HiveServer2;
import org.apache.parquet.avro.AvroParquetWriter; import org.apache.parquet.avro.AvroParquetWriter;
import org.apache.parquet.hadoop.ParquetWriter; import org.apache.parquet.hadoop.ParquetWriter;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
@@ -56,6 +51,13 @@ import org.junit.AfterClass;
import org.junit.Before; import org.junit.Before;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;
/** /**
* Abstract test that provides a dfs & spark contexts. * Abstract test that provides a dfs & spark contexts.
* *

View File

@@ -18,13 +18,6 @@
package org.apache.hudi.utilities.sources; package org.apache.hudi.utilities.sources;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.stream.Stream;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import org.apache.hudi.common.HoodieTestDataGenerator; import org.apache.hudi.common.HoodieTestDataGenerator;
import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.Option;
@@ -33,9 +26,18 @@ import org.apache.hudi.common.util.collection.RocksDBBasedMap;
import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.hudi.utilities.sources.config.TestSourceConfig; import org.apache.hudi.utilities.sources.config.TestSourceConfig;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.stream.Stream;
public abstract class AbstractBaseTestSource extends AvroSource { public abstract class AbstractBaseTestSource extends AvroSource {
static final int DEFAULT_PARTITION_NUM = 0; static final int DEFAULT_PARTITION_NUM = 0;

View File

@@ -18,18 +18,20 @@
package org.apache.hudi.utilities.sources; package org.apache.hudi.utilities.sources;
import java.util.Iterator;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.apache.avro.generic.GenericRecord;
import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.hudi.utilities.sources.config.TestSourceConfig; import org.apache.hudi.utilities.sources.config.TestSourceConfig;
import org.apache.avro.generic.GenericRecord;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
import java.util.Iterator;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
/** /**
* A Test DataSource which scales test-data generation by using spark parallelism. * A Test DataSource which scales test-data generation by using spark parallelism.
*/ */

View File

@@ -18,16 +18,6 @@
package org.apache.hudi.utilities.sources; package org.apache.hudi.utilities.sources;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.util.List;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hudi.AvroConversionUtils; import org.apache.hudi.AvroConversionUtils;
import org.apache.hudi.common.HoodieTestDataGenerator; import org.apache.hudi.common.HoodieTestDataGenerator;
import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.Option;
@@ -35,6 +25,12 @@ import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.utilities.UtilitiesTestBase; import org.apache.hudi.utilities.UtilitiesTestBase;
import org.apache.hudi.utilities.deltastreamer.SourceFormatAdapter; import org.apache.hudi.utilities.deltastreamer.SourceFormatAdapter;
import org.apache.hudi.utilities.schema.FilebasedSchemaProvider; import org.apache.hudi.utilities.schema.FilebasedSchemaProvider;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row; import org.apache.spark.sql.Row;
@@ -45,6 +41,12 @@ import org.junit.Before;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
import java.io.IOException;
import java.util.List;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
/** /**
* Basic tests against all subclasses of {@link JsonDFSSource} and {@link ParquetDFSSource} * Basic tests against all subclasses of {@link JsonDFSSource} and {@link ParquetDFSSource}
*/ */

View File

@@ -18,18 +18,20 @@
package org.apache.hudi.utilities.sources; package org.apache.hudi.utilities.sources;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.avro.generic.GenericRecord;
import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.avro.generic.GenericRecord;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
import java.util.List;
import java.util.stream.Collectors;
/** /**
* An implementation of {@link Source}, that emits test upserts. * An implementation of {@link Source}, that emits test upserts.
*/ */

View File

@@ -18,12 +18,6 @@
package org.apache.hudi.utilities.sources; package org.apache.hudi.utilities.sources;
import static org.junit.Assert.assertEquals;
import java.io.IOException;
import java.util.HashMap;
import kafka.common.TopicAndPartition;
import org.apache.avro.generic.GenericRecord;
import org.apache.hudi.AvroConversionUtils; import org.apache.hudi.AvroConversionUtils;
import org.apache.hudi.common.HoodieTestDataGenerator; import org.apache.hudi.common.HoodieTestDataGenerator;
import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.Option;
@@ -33,6 +27,9 @@ import org.apache.hudi.utilities.deltastreamer.SourceFormatAdapter;
import org.apache.hudi.utilities.schema.FilebasedSchemaProvider; import org.apache.hudi.utilities.schema.FilebasedSchemaProvider;
import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen.CheckpointUtils; import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen.CheckpointUtils;
import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen.Config; import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen.Config;
import kafka.common.TopicAndPartition;
import org.apache.avro.generic.GenericRecord;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row; import org.apache.spark.sql.Row;
@@ -45,6 +42,11 @@ import org.junit.Before;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
import java.io.IOException;
import java.util.HashMap;
import static org.junit.Assert.assertEquals;
/** /**
* Tests against {@link AvroKafkaSource} * Tests against {@link AvroKafkaSource}
*/ */