diff --git a/hudi-cli/pom.xml b/hudi-cli/pom.xml
index 3b66ff2f2..562adbbc9 100644
--- a/hudi-cli/pom.xml
+++ b/hudi-cli/pom.xml
@@ -166,6 +166,11 @@
log4j
+
+ org.apache.parquet
+ parquet-avro
+
+
org.apache.spark
@@ -188,12 +193,6 @@
${spring.shell.version}
-
- de.vandermeer
- asciitable
- 0.2.5
-
-
com.jakewharton.fliptables
fliptables
@@ -214,12 +213,5 @@
org.apache.hadoop
hadoop-hdfs
-
-
- junit
- junit-dep
- ${junit-dep.version}
- test
-
diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/HoodieLogFileCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/HoodieLogFileCommand.java
index 9e82cd745..cc822353e 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/HoodieLogFileCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/HoodieLogFileCommand.java
@@ -18,8 +18,8 @@
package org.apache.hudi.cli.commands;
-import com.beust.jcommander.internal.Maps;
import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.collect.Maps;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
@@ -50,12 +50,12 @@ import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieCompactionConfig;
import org.apache.hudi.config.HoodieMemoryConfig;
import org.apache.hudi.hive.util.SchemaUtil;
+import org.apache.parquet.avro.AvroSchemaConverter;
import org.springframework.shell.core.CommandMarker;
import org.springframework.shell.core.annotation.CliAvailabilityIndicator;
import org.springframework.shell.core.annotation.CliCommand;
import org.springframework.shell.core.annotation.CliOption;
import org.springframework.stereotype.Component;
-import parquet.avro.AvroSchemaConverter;
import scala.Tuple2;
import scala.Tuple3;
diff --git a/hudi-client/pom.xml b/hudi-client/pom.xml
index 5e1512d8d..c6ed6ffb5 100644
--- a/hudi-client/pom.xml
+++ b/hudi-client/pom.xml
@@ -91,10 +91,6 @@
org.apache.parquet
parquet-avro
-
- org.apache.parquet
- parquet-hadoop
-
@@ -110,6 +106,12 @@
io.dropwizard.metrics
metrics-graphite
+
+
+ com.rabbitmq
+ *
+
+
io.dropwizard.metrics
@@ -125,12 +127,7 @@
com.beust
jcommander
1.48
-
-
-
- org.htrace
- htrace-core
- 3.0.4
+ test
diff --git a/hudi-client/src/main/java/org/apache/hudi/io/HoodieAppendHandle.java b/hudi-client/src/main/java/org/apache/hudi/io/HoodieAppendHandle.java
index 6dbcb8b19..34178e618 100644
--- a/hudi-client/src/main/java/org/apache/hudi/io/HoodieAppendHandle.java
+++ b/hudi-client/src/main/java/org/apache/hudi/io/HoodieAppendHandle.java
@@ -18,7 +18,7 @@
package org.apache.hudi.io;
-import com.beust.jcommander.internal.Maps;
+import com.google.common.collect.Maps;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
diff --git a/hudi-client/src/test/java/org/apache/hudi/io/strategy/TestHoodieCompactionStrategy.java b/hudi-client/src/test/java/org/apache/hudi/io/strategy/TestHoodieCompactionStrategy.java
index c3e123c94..f075edffe 100644
--- a/hudi-client/src/test/java/org/apache/hudi/io/strategy/TestHoodieCompactionStrategy.java
+++ b/hudi-client/src/test/java/org/apache/hudi/io/strategy/TestHoodieCompactionStrategy.java
@@ -21,8 +21,8 @@ package org.apache.hudi.io.strategy;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
-import com.beust.jcommander.internal.Lists;
import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
@@ -248,7 +248,7 @@ public class TestHoodieCompactionStrategy {
private List createCompactionOperations(HoodieWriteConfig config,
Map> sizesMap, Map keyToPartitionMap) {
- List operations = Lists.newArrayList(sizesMap.size());
+ List operations = new ArrayList<>(sizesMap.size());
sizesMap.forEach((k, v) -> {
HoodieDataFile df = TestHoodieDataFile.newDataFile(k);
diff --git a/hudi-common/pom.xml b/hudi-common/pom.xml
index f418ab64d..db3ffe98d 100644
--- a/hudi-common/pom.xml
+++ b/hudi-common/pom.xml
@@ -111,13 +111,6 @@
parquet-avro
-
-
- com.twitter.common
- objectsize
- 0.0.12
-
-
commons-codec
@@ -154,7 +147,12 @@
org.apache.hadoop
hadoop-common
tests
+ test
+
+ org.apache.hadoop
+ hadoop-hdfs
+
org.apache.hadoop
hadoop-hdfs
@@ -173,11 +171,6 @@
test
-
- com.esotericsoftware
- kryo
- test
-
com.esotericsoftware
kryo-shaded
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/DefaultSizeEstimator.java b/hudi-common/src/main/java/org/apache/hudi/common/util/DefaultSizeEstimator.java
index 4c8320416..883c4cfa5 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/DefaultSizeEstimator.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/DefaultSizeEstimator.java
@@ -18,8 +18,6 @@
package org.apache.hudi.common.util;
-import com.twitter.common.objectsize.ObjectSizeCalculator;
-
/**
* Default implementation of size-estimator that uses Twitter's ObjectSizeCalculator
* @param
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/HoodieRecordSizeEstimator.java b/hudi-common/src/main/java/org/apache/hudi/common/util/HoodieRecordSizeEstimator.java
index 3a29b9aa6..0cdd243db 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/HoodieRecordSizeEstimator.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/HoodieRecordSizeEstimator.java
@@ -18,7 +18,6 @@
package org.apache.hudi.common.util;
-import com.twitter.common.objectsize.ObjectSizeCalculator;
import org.apache.avro.Schema;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieRecordPayload;
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/ObjectSizeCalculator.java b/hudi-common/src/main/java/org/apache/hudi/common/util/ObjectSizeCalculator.java
new file mode 100644
index 000000000..8fdf73703
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/ObjectSizeCalculator.java
@@ -0,0 +1,478 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// COPIED FROM https://github.com/twitter/commons/blob/master/src/java/com/twitter/common/objectsize/
+// ObjectSizeCalculator.java
+// =================================================================================================
+// Copyright 2011 Twitter, Inc.
+// -------------------------------------------------------------------------------------------------
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this work except in compliance with the License.
+// You may obtain a copy of the License in the LICENSE file, or at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =================================================================================================
+
+package org.apache.hudi.common.util;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.CacheLoader;
+import com.google.common.cache.LoadingCache;
+import com.google.common.collect.Sets;
+import java.lang.management.ManagementFactory;
+import java.lang.management.MemoryPoolMXBean;
+import java.lang.reflect.Array;
+import java.lang.reflect.Field;
+import java.lang.reflect.Modifier;
+import java.util.ArrayDeque;
+import java.util.Arrays;
+import java.util.Deque;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * Contains utility methods for calculating the memory usage of objects. It
+ * only works on the HotSpot JVM, and infers the actual memory layout (32 bit
+ * vs. 64 bit word size, compressed object pointers vs. uncompressed) from
+ * best available indicators. It can reliably detect a 32 bit vs. 64 bit JVM.
+ * It can only make an educated guess at whether compressed OOPs are used,
+ * though; specifically, it knows what the JVM's default choice of OOP
+ * compression would be based on HotSpot version and maximum heap sizes, but if
+ * the choice is explicitly overridden with the -XX:{+|-}UseCompressedOops command line
+ * switch, it can not detect
+ * this fact and will report incorrect sizes, as it will presume the default JVM
+ * behavior.
+ *
+ * @author Attila Szegedi
+ */
+public class ObjectSizeCalculator {
+
+ /**
+ * Describes constant memory overheads for various constructs in a JVM implementation.
+ */
+ public interface MemoryLayoutSpecification {
+
+ /**
+ * Returns the fixed overhead of an array of any type or length in this JVM.
+ *
+ * @return the fixed overhead of an array.
+ */
+ int getArrayHeaderSize();
+
+ /**
+ * Returns the fixed overhead of for any {@link Object} subclass in this JVM.
+ *
+ * @return the fixed overhead of any object.
+ */
+ int getObjectHeaderSize();
+
+ /**
+ * Returns the quantum field size for a field owned by an object in this JVM.
+ *
+ * @return the quantum field size for an object.
+ */
+ int getObjectPadding();
+
+ /**
+ * Returns the fixed size of an object reference in this JVM.
+ *
+ * @return the size of all object references.
+ */
+ int getReferenceSize();
+
+ /**
+ * Returns the quantum field size for a field owned by one of an object's ancestor superclasses
+ * in this JVM.
+ *
+ * @return the quantum field size for a superclass field.
+ */
+ int getSuperclassFieldPadding();
+ }
+
+ private static class CurrentLayout {
+
+ private static final MemoryLayoutSpecification SPEC =
+ getEffectiveMemoryLayoutSpecification();
+ }
+
+ /**
+ * Given an object, returns the total allocated size, in bytes, of the object
+ * and all other objects reachable from it. Attempts to to detect the current JVM memory layout,
+ * but may fail with {@link UnsupportedOperationException};
+ *
+ * @param obj the object; can be null. Passing in a {@link java.lang.Class} object doesn't do
+ * anything special, it measures the size of all objects
+ * reachable through it (which will include its class loader, and by
+ * extension, all other Class objects loaded by
+ * the same loader, and all the parent class loaders). It doesn't provide the
+ * size of the static fields in the JVM class that the Class object
+ * represents.
+ * @return the total allocated size of the object and all other objects it
+ * retains.
+ * @throws UnsupportedOperationException if the current vm memory layout cannot be detected.
+ */
+ public static long getObjectSize(Object obj) throws UnsupportedOperationException {
+ return obj == null ? 0 : new ObjectSizeCalculator(CurrentLayout.SPEC).calculateObjectSize(obj);
+ }
+
+ // Fixed object header size for arrays.
+ private final int arrayHeaderSize;
+ // Fixed object header size for non-array objects.
+ private final int objectHeaderSize;
+ // Padding for the object size - if the object size is not an exact multiple
+ // of this, it is padded to the next multiple.
+ private final int objectPadding;
+ // Size of reference (pointer) fields.
+ private final int referenceSize;
+ // Padding for the fields of superclass before fields of subclasses are
+ // added.
+ private final int superclassFieldPadding;
+
+ private final LoadingCache, ClassSizeInfo> classSizeInfos =
+ CacheBuilder.newBuilder().build(new CacheLoader, ClassSizeInfo>() {
+ public ClassSizeInfo load(Class> clazz) {
+ return new ClassSizeInfo(clazz);
+ }
+ });
+
+
+ private final Set
-
-
- com.twitter
- parquet-avro
-
-
- com.twitter
- parquet-hadoop-bundle
-
-
commons-logging
commons-logging
-
- com.twitter.common
- objectsize
- 0.0.12
-
-
org.apache.hadoop
@@ -113,12 +97,6 @@
test
-
- com.esotericsoftware
- kryo
- test
-
-
junit
junit
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/AbstractRealtimeRecordReader.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/AbstractRealtimeRecordReader.java
index d6cd4e1a5..a82b0bc3d 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/AbstractRealtimeRecordReader.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/AbstractRealtimeRecordReader.java
@@ -53,9 +53,9 @@ import org.apache.hudi.common.util.LogReaderUtils;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
-import parquet.avro.AvroSchemaConverter;
-import parquet.hadoop.ParquetFileReader;
-import parquet.schema.MessageType;
+import org.apache.parquet.avro.AvroSchemaConverter;
+import org.apache.parquet.hadoop.ParquetFileReader;
+import org.apache.parquet.schema.MessageType;
/**
* Record Reader implementation to merge fresh avro data with base parquet data, to support real
diff --git a/hudi-hive/pom.xml b/hudi-hive/pom.xml
index 758860d34..8fdcf5869 100644
--- a/hudi-hive/pom.xml
+++ b/hudi-hive/pom.xml
@@ -46,17 +46,12 @@
- org.slf4j
- slf4j-api
-
-
- org.slf4j
- slf4j-log4j12
+ log4j
+ log4j
-
- com.twitter
+ org.apache.parquet
parquet-avro
@@ -65,12 +60,6 @@
guava
-
- org.apache.thrift
- libthrift
- ${thrift.version}
-
-
joda-time
joda-time
@@ -95,16 +84,6 @@
jcommander
-
-
- org.apache.httpcomponents
- httpcore
-
-
- org.apache.httpcomponents
- httpclient
-
-
org.apache.hadoop
@@ -175,15 +154,15 @@
- org.mockito
- mockito-all
+ org.apache.thrift
+ libthrift
+ ${thrift.version}
test
- com.esotericsoftware.kryo
- kryo
- 2.21
+ org.mockito
+ mockito-all
test
diff --git a/hudi-hive/src/main/java/org/apache/hudi/hive/HiveSyncTool.java b/hudi-hive/src/main/java/org/apache/hudi/hive/HiveSyncTool.java
index a70106419..6e8e82a23 100644
--- a/hudi-hive/src/main/java/org/apache/hudi/hive/HiveSyncTool.java
+++ b/hudi-hive/src/main/java/org/apache/hudi/hive/HiveSyncTool.java
@@ -36,9 +36,9 @@ import org.apache.hudi.hadoop.realtime.HoodieRealtimeInputFormat;
import org.apache.hudi.hive.HoodieHiveClient.PartitionEvent;
import org.apache.hudi.hive.HoodieHiveClient.PartitionEvent.PartitionEventType;
import org.apache.hudi.hive.util.SchemaUtil;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import parquet.schema.MessageType;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.apache.parquet.schema.MessageType;
/**
@@ -52,7 +52,7 @@ import parquet.schema.MessageType;
@SuppressWarnings("WeakerAccess")
public class HiveSyncTool {
- private static final Logger LOG = LoggerFactory.getLogger(HiveSyncTool.class);
+ private static Logger LOG = LogManager.getLogger(HiveSyncTool.class);
private final HoodieHiveClient hoodieHiveClient;
public static final String SUFFIX_REALTIME_TABLE = "_rt";
private final HiveSyncConfig cfg;
diff --git a/hudi-hive/src/main/java/org/apache/hudi/hive/HoodieHiveClient.java b/hudi-hive/src/main/java/org/apache/hudi/hive/HoodieHiveClient.java
index 90dded031..4a478016e 100644
--- a/hudi-hive/src/main/java/org/apache/hudi/hive/HoodieHiveClient.java
+++ b/hudi-hive/src/main/java/org/apache/hudi/hive/HoodieHiveClient.java
@@ -57,13 +57,13 @@ import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.exception.InvalidDatasetException;
import org.apache.hudi.hive.util.SchemaUtil;
+import org.apache.parquet.format.converter.ParquetMetadataConverter;
+import org.apache.parquet.hadoop.ParquetFileReader;
+import org.apache.parquet.hadoop.metadata.ParquetMetadata;
+import org.apache.parquet.schema.MessageType;
import org.apache.thrift.TException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import parquet.format.converter.ParquetMetadataConverter;
-import parquet.hadoop.ParquetFileReader;
-import parquet.hadoop.metadata.ParquetMetadata;
-import parquet.schema.MessageType;
@SuppressWarnings("ConstantConditions")
public class HoodieHiveClient {
diff --git a/hudi-hive/src/main/java/org/apache/hudi/hive/SchemaDifference.java b/hudi-hive/src/main/java/org/apache/hudi/hive/SchemaDifference.java
index 8a7cee95d..c1f2291b3 100644
--- a/hudi-hive/src/main/java/org/apache/hudi/hive/SchemaDifference.java
+++ b/hudi-hive/src/main/java/org/apache/hudi/hive/SchemaDifference.java
@@ -25,7 +25,7 @@ import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import java.util.List;
import java.util.Map;
-import parquet.schema.MessageType;
+import org.apache.parquet.schema.MessageType;
/**
* Represents the schema difference between the storage schema and hive table schema
diff --git a/hudi-hive/src/main/java/org/apache/hudi/hive/util/SchemaUtil.java b/hudi-hive/src/main/java/org/apache/hudi/hive/util/SchemaUtil.java
index 69ea8d7f5..9e2784ee3 100644
--- a/hudi-hive/src/main/java/org/apache/hudi/hive/util/SchemaUtil.java
+++ b/hudi-hive/src/main/java/org/apache/hudi/hive/util/SchemaUtil.java
@@ -36,14 +36,15 @@ import org.apache.hudi.common.table.log.block.HoodieLogBlock;
import org.apache.hudi.hive.HiveSyncConfig;
import org.apache.hudi.hive.HoodieHiveSyncException;
import org.apache.hudi.hive.SchemaDifference;
+import org.apache.parquet.avro.AvroSchemaConverter;
+import org.apache.parquet.schema.DecimalMetadata;
+import org.apache.parquet.schema.GroupType;
+import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.OriginalType;
+import org.apache.parquet.schema.PrimitiveType;
+import org.apache.parquet.schema.Type;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import parquet.schema.DecimalMetadata;
-import parquet.schema.GroupType;
-import parquet.schema.MessageType;
-import parquet.schema.OriginalType;
-import parquet.schema.PrimitiveType;
-import parquet.schema.Type;
/**
* Schema Utilities
@@ -439,6 +440,7 @@ public class SchemaUtil {
/**
* Read the schema from the log file on path
+ * @return
*/
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
public static MessageType readSchemaFromLogFile(FileSystem fs, Path path) throws IOException {
@@ -452,7 +454,7 @@ public class SchemaUtil {
}
reader.close();
if (lastBlock != null) {
- return new parquet.avro.AvroSchemaConverter().convert(lastBlock.getSchema());
+ return new AvroSchemaConverter().convert(lastBlock.getSchema());
}
return null;
}
diff --git a/hudi-hive/src/test/java/org/apache/hudi/hive/HiveSyncToolTest.java b/hudi-hive/src/test/java/org/apache/hudi/hive/HiveSyncToolTest.java
index fe6e196be..2947d8bb7 100644
--- a/hudi-hive/src/test/java/org/apache/hudi/hive/HiveSyncToolTest.java
+++ b/hudi-hive/src/test/java/org/apache/hudi/hive/HiveSyncToolTest.java
@@ -32,12 +32,13 @@ import org.apache.hudi.common.util.SchemaTestUtil;
import org.apache.hudi.hive.HoodieHiveClient.PartitionEvent;
import org.apache.hudi.hive.HoodieHiveClient.PartitionEvent.PartitionEventType;
import org.apache.hudi.hive.util.SchemaUtil;
+import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.OriginalType;
+import org.apache.parquet.schema.PrimitiveType;
+import org.apache.parquet.schema.Types;
import org.joda.time.DateTime;
import org.junit.Before;
import org.junit.Test;
-import parquet.schema.MessageType;
-import parquet.schema.OriginalType;
-import parquet.schema.PrimitiveType;
@SuppressWarnings("ConstantConditions")
public class HiveSyncToolTest {
@@ -59,8 +60,8 @@ public class HiveSyncToolTest {
@Test
public void testSchemaConvertArray() throws IOException {
// Testing the 3-level annotation structure
- MessageType schema = parquet.schema.Types.buildMessage().optionalGroup()
- .as(parquet.schema.OriginalType.LIST).repeatedGroup()
+ MessageType schema = Types.buildMessage().optionalGroup()
+ .as(OriginalType.LIST).repeatedGroup()
.optional(PrimitiveType.PrimitiveTypeName.INT32).named("element")
.named("list").named("int_list").named("ArrayOfInts");
@@ -68,8 +69,8 @@ public class HiveSyncToolTest {
assertEquals("`int_list` ARRAY< int>", schemaString);
// A array of arrays
- schema = parquet.schema.Types.buildMessage().optionalGroup()
- .as(parquet.schema.OriginalType.LIST).repeatedGroup().requiredGroup()
+ schema = Types.buildMessage().optionalGroup()
+ .as(OriginalType.LIST).repeatedGroup().requiredGroup()
.as(OriginalType.LIST).repeatedGroup()
.required(PrimitiveType.PrimitiveTypeName.INT32).named("element").named("list")
.named("element").named("list").named("int_list_list").named("ArrayOfArrayOfInts");
@@ -78,8 +79,8 @@ public class HiveSyncToolTest {
assertEquals("`int_list_list` ARRAY< ARRAY< int>>", schemaString);
// A list of integers
- schema = parquet.schema.Types.buildMessage().optionalGroup()
- .as(parquet.schema.OriginalType.LIST)
+ schema = Types.buildMessage().optionalGroup()
+ .as(OriginalType.LIST)
.repeated(PrimitiveType.PrimitiveTypeName.INT32).named("element").named("int_list")
.named("ArrayOfInts");
@@ -87,8 +88,8 @@ public class HiveSyncToolTest {
assertEquals("`int_list` ARRAY< int>", schemaString);
// A list of structs with two fields
- schema = parquet.schema.Types.buildMessage().optionalGroup()
- .as(parquet.schema.OriginalType.LIST).repeatedGroup()
+ schema = Types.buildMessage().optionalGroup()
+ .as(OriginalType.LIST).repeatedGroup()
.required(PrimitiveType.PrimitiveTypeName.BINARY).named("str")
.required(PrimitiveType.PrimitiveTypeName.INT32).named("num").named("element")
.named("tuple_list").named("ArrayOfTuples");
@@ -99,8 +100,8 @@ public class HiveSyncToolTest {
// A list of structs with a single field
// For this case, since the inner group name is "array", we treat the
// element type as a one-element struct.
- schema = parquet.schema.Types.buildMessage().optionalGroup()
- .as(parquet.schema.OriginalType.LIST).repeatedGroup()
+ schema = Types.buildMessage().optionalGroup()
+ .as(OriginalType.LIST).repeatedGroup()
.required(PrimitiveType.PrimitiveTypeName.BINARY).named("str").named("array")
.named("one_tuple_list").named("ArrayOfOneTuples");
@@ -110,8 +111,8 @@ public class HiveSyncToolTest {
// A list of structs with a single field
// For this case, since the inner group name ends with "_tuple", we also treat the
// element type as a one-element struct.
- schema = parquet.schema.Types.buildMessage().optionalGroup()
- .as(parquet.schema.OriginalType.LIST).repeatedGroup()
+ schema = Types.buildMessage().optionalGroup()
+ .as(OriginalType.LIST).repeatedGroup()
.required(PrimitiveType.PrimitiveTypeName.BINARY).named("str")
.named("one_tuple_list_tuple").named("one_tuple_list").named("ArrayOfOneTuples2");
@@ -121,8 +122,8 @@ public class HiveSyncToolTest {
// A list of structs with a single field
// Unlike the above two cases, for this the element type is the type of the
// only field in the struct.
- schema = parquet.schema.Types.buildMessage().optionalGroup()
- .as(parquet.schema.OriginalType.LIST).repeatedGroup()
+ schema = Types.buildMessage().optionalGroup()
+ .as(OriginalType.LIST).repeatedGroup()
.required(PrimitiveType.PrimitiveTypeName.BINARY).named("str")
.named("one_tuple_list").named("one_tuple_list").named("ArrayOfOneTuples3");
@@ -130,8 +131,8 @@ public class HiveSyncToolTest {
assertEquals("`one_tuple_list` ARRAY< binary>", schemaString);
// A list of maps
- schema = parquet.schema.Types.buildMessage().optionalGroup()
- .as(parquet.schema.OriginalType.LIST).repeatedGroup().as(OriginalType.MAP)
+ schema = Types.buildMessage().optionalGroup()
+ .as(OriginalType.LIST).repeatedGroup().as(OriginalType.MAP)
.repeatedGroup().as(OriginalType.MAP_KEY_VALUE)
.required(PrimitiveType.PrimitiveTypeName.BINARY).as(OriginalType.UTF8)
.named("string_key").required(PrimitiveType.PrimitiveTypeName.INT32)
diff --git a/hudi-hive/src/test/java/org/apache/hudi/hive/util/HiveTestService.java b/hudi-hive/src/test/java/org/apache/hudi/hive/util/HiveTestService.java
index 74e8d79d0..487200e49 100644
--- a/hudi-hive/src/test/java/org/apache/hudi/hive/util/HiveTestService.java
+++ b/hudi-hive/src/test/java/org/apache/hudi/hive/util/HiveTestService.java
@@ -41,6 +41,8 @@ import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.thrift.TUGIContainingTransport;
import org.apache.hive.service.server.HiveServer2;
import org.apache.hudi.common.model.HoodieTestUtils;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
import org.apache.thrift.TProcessor;
import org.apache.thrift.protocol.TBinaryProtocol;
import org.apache.thrift.server.TServer;
@@ -52,12 +54,10 @@ import org.apache.thrift.transport.TSocket;
import org.apache.thrift.transport.TTransport;
import org.apache.thrift.transport.TTransportException;
import org.apache.thrift.transport.TTransportFactory;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
public class HiveTestService {
- private static final Logger LOG = LoggerFactory.getLogger(HiveTestService.class);
+ private static Logger LOG = LogManager.getLogger(HiveTestService.class);
private static final int CONNECTION_TIMEOUT = 30000;
diff --git a/hudi-spark/pom.xml b/hudi-spark/pom.xml
index 60686f65a..b39508087 100644
--- a/hudi-spark/pom.xml
+++ b/hudi-spark/pom.xml
@@ -203,6 +203,12 @@
avro
+
+
+ org.apache.parquet
+ parquet-avro
+
+
org.apache.spark
@@ -218,12 +224,7 @@
com.databricks
spark-avro_2.11
4.0.0
-
-
-
-
- org.apache.commons
- commons-configuration2
+ provided
@@ -293,8 +294,7 @@
junit
- junit-dep
- ${junit-dep.version}
+ junit
test
diff --git a/hudi-timeline-service/pom.xml b/hudi-timeline-service/pom.xml
index 353a40f6f..af5e42161 100644
--- a/hudi-timeline-service/pom.xml
+++ b/hudi-timeline-service/pom.xml
@@ -55,59 +55,6 @@
org.apache.rat
apache-rat-plugin
-
- org.apache.maven.plugins
- maven-dependency-plugin
-
-
- copy-dependencies
- prepare-package
-
- copy-dependencies
-
-
- ${project.build.directory}/lib
- true
- true
- true
-
-
-
-
-
- org.apache.maven.plugins
- maven-shade-plugin
- 2.4
-
- true
-
-
- *:*
-
- META-INF/*.SF
- META-INF/*.DSA
- META-INF/*.RSA
-
-
-
-
-
-
- package
-
- shade
-
-
-
-
-
- org.apache.hudi.timeline.service.TimelineService
-
-
-
-
-
-
@@ -244,11 +191,5 @@
test
-
- com.esotericsoftware
- kryo
- test
-
-
diff --git a/hudi-utilities/pom.xml b/hudi-utilities/pom.xml
index 612afdb63..ce8327687 100644
--- a/hudi-utilities/pom.xml
+++ b/hudi-utilities/pom.xml
@@ -76,18 +76,20 @@
+
org.eclipse.jetty.aggregate
jetty-all
- 7.6.0.v20120127
+ test
org.eclipse.jetty
jetty-server
- 7.6.0.v20120127
+ ${jetty.version}
+ test
@@ -129,10 +131,6 @@
log4j
log4j
-
- org.slf4j
- slf4j-api
-
@@ -151,10 +149,6 @@
org.apache.parquet
parquet-avro
-
- org.apache.parquet
- parquet-hadoop
-
@@ -167,6 +161,7 @@
+
org.apache.spark
spark-sql_2.11
@@ -177,6 +172,13 @@
+
+
+ com.databricks
+ spark-avro_2.11
+ provided
+
+
org.apache.spark
spark-streaming_2.11
@@ -194,24 +196,6 @@
metrics-core
-
- io.javalin
- javalin
- 2.4.0
-
-
- org.eclipse.jetty
- *
-
-
-
-
-
- com.yammer.metrics
- metrics-core
- 2.2.0
-
-
org.antlr
@@ -252,24 +236,6 @@
3.0.0
-
-
- commons-codec
- commons-codec
-
-
- commons-dbcp
- commons-dbcp
-
-
- commons-lang
- commons-lang
-
-
- commons-pool
- commons-pool
-
-
org.apache.httpcomponents
@@ -295,11 +261,13 @@
org.apache.hadoop
hadoop-hdfs
tests
+ test
org.apache.hadoop
hadoop-common
tests
+ test
org.mortbay.jetty
@@ -330,6 +298,10 @@
javax.servlet
servlet-api
+
+ org.eclipse.jetty.orbit
+ javax.servlet
+
diff --git a/packaging/hudi-hadoop-mr-bundle/pom.xml b/packaging/hudi-hadoop-mr-bundle/pom.xml
index 1b68296b7..81b5e3d5a 100644
--- a/packaging/hudi-hadoop-mr-bundle/pom.xml
+++ b/packaging/hudi-hadoop-mr-bundle/pom.xml
@@ -25,6 +25,7 @@
4.0.0
hudi-hadoop-mr-bundle
+ jar
@@ -58,16 +59,6 @@
parquet-avro
-
-
- com.twitter
- parquet-avro
-
-
- com.twitter
- parquet-hadoop-bundle
-
-
commons-logging
@@ -82,12 +73,6 @@
commons-codec
-
- com.twitter.common
- objectsize
- 0.0.12
-
-
org.apache.hadoop
@@ -239,15 +224,16 @@
org.apache.hudi:hudi-common
org.apache.hudi:hudi-hadoop-mr
- com.twitter:parquet-avro
- com.twitter:parquet-hadoop-bundle
- com.twitter.common:objectsize
commons-logging:commons-logging
commons-io:commons-io
+ commons-lang:commons-lang
+ commons-pool:commons-pool
+ commons-codec:commons-codec
com.esotericsoftware:kryo-shaded
org.objenesis:objenesis
com.esotericsoftware:minlog
commons-codec:commons-codec
+ org.apache.parquet:parquet-avro
diff --git a/packaging/hudi-hive-bundle/pom.xml b/packaging/hudi-hive-bundle/pom.xml
index 29594f4fe..72f00dba7 100644
--- a/packaging/hudi-hive-bundle/pom.xml
+++ b/packaging/hudi-hive-bundle/pom.xml
@@ -62,12 +62,6 @@
slf4j-log4j12
-
-
- com.twitter
- parquet-avro
-
-
org.apache.thrift
diff --git a/packaging/hudi-presto-bundle/pom.xml b/packaging/hudi-presto-bundle/pom.xml
index 54b966d32..fbaba749c 100644
--- a/packaging/hudi-presto-bundle/pom.xml
+++ b/packaging/hudi-presto-bundle/pom.xml
@@ -50,12 +50,6 @@
slf4j-log4j12
-
-
- com.twitter
- parquet-avro
-
-
org.apache.thrift
diff --git a/packaging/hudi-spark-bundle/pom.xml b/packaging/hudi-spark-bundle/pom.xml
index f46b94b17..bcd5beaff 100644
--- a/packaging/hudi-spark-bundle/pom.xml
+++ b/packaging/hudi-spark-bundle/pom.xml
@@ -296,10 +296,6 @@
commons-dbcp
commons-dbcp
-
- org.apache.commons
- commons-configuration2
-
diff --git a/packaging/hudi-utilities-bundle/pom.xml b/packaging/hudi-utilities-bundle/pom.xml
index 530b12fba..e002c11ab 100644
--- a/packaging/hudi-utilities-bundle/pom.xml
+++ b/packaging/hudi-utilities-bundle/pom.xml
@@ -75,11 +75,22 @@
org.apache.hudi:hudi-hadoop-mr
org.apache.hudi:hudi-timeline-service
com.beust:jcommander
+
+ io.javalin:javalin
+ org.jetbrains.kotlin:*
+ org.eclipse.jetty:*
+ org.eclipse.jetty.websocket:*
+ org.rocksdb:rocksdbjni
+ org.apache.httpcomponents:httpclient
+ org.apache.httpcomponents:fluent-hc
+ org.antlr:stringtemplate
+ commons-io:commons-io
+ commons-logging:commons-logging
+ org.apache.parquet:parquet-avro
+
com.twitter:bijection-avro_2.11
com.twitter:bijection-core_2.11
org.apache.parquet:parquet-avro
- com.twitter:parquet-avro
- com.twitter.common:objectsize
io.confluent:kafka-avro-serializer
io.confluent:common-config
io.confluent:common-utils
@@ -263,10 +274,6 @@
org.apache.parquet
parquet-avro
-
- org.apache.parquet
- parquet-hadoop
-
diff --git a/pom.xml b/pom.xml
index 45b10b600..097cb87bf 100644
--- a/pom.xml
+++ b/pom.xml
@@ -147,6 +147,7 @@
3.0.1
file://${project.basedir}/src/test/resources/log4j-surefire.properties
0.12.0
+ 7.6.0.v20120127
1.2.3
1.9.13
${project.basedir}
@@ -524,7 +525,7 @@
org.eclipse.jetty.aggregate
jetty-all
test
- 7.6.0.v20120127
+ ${jetty.version}
@@ -545,34 +546,6 @@
parquet-avro
${parquet.version}
-
- org.apache.parquet
- parquet-hadoop
- ${parquet.version}
-
-
- org.apache.parquet
- parquet-hive-bundle
- ${parquet.version}
-
-
-
-
-
- com.twitter
- parquet-hadoop-bundle
- 1.6.0
-
-
- com.twitter
- parquet-hive-bundle
- 1.6.0
-
-
- com.twitter
- parquet-avro
- 1.6.0
-
@@ -675,11 +648,6 @@
commons-pool
1.4
-
- org.apache.commons
- commons-configuration2
- 2.1.1
-