1
0

[HUDI-159]: Pom cleanup and removal of com.twitter.parquet

- Redo all classes based on org.parquet only
 - remove unuused dependencies like parquet-hadoop, common-configuration2
 - timeline-service does not build a fat jar anymore
 - Fix utilities and hadoop-mr bundles based on above
This commit is contained in:
vinoth chandar
2019-08-25 05:34:51 -07:00
committed by Balaji Varadarajan
parent 6edf0b9def
commit cd090871a1
29 changed files with 600 additions and 326 deletions

View File

@@ -32,12 +32,13 @@ import org.apache.hudi.common.util.SchemaTestUtil;
import org.apache.hudi.hive.HoodieHiveClient.PartitionEvent;
import org.apache.hudi.hive.HoodieHiveClient.PartitionEvent.PartitionEventType;
import org.apache.hudi.hive.util.SchemaUtil;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.OriginalType;
import org.apache.parquet.schema.PrimitiveType;
import org.apache.parquet.schema.Types;
import org.joda.time.DateTime;
import org.junit.Before;
import org.junit.Test;
import parquet.schema.MessageType;
import parquet.schema.OriginalType;
import parquet.schema.PrimitiveType;
@SuppressWarnings("ConstantConditions")
public class HiveSyncToolTest {
@@ -59,8 +60,8 @@ public class HiveSyncToolTest {
@Test
public void testSchemaConvertArray() throws IOException {
// Testing the 3-level annotation structure
MessageType schema = parquet.schema.Types.buildMessage().optionalGroup()
.as(parquet.schema.OriginalType.LIST).repeatedGroup()
MessageType schema = Types.buildMessage().optionalGroup()
.as(OriginalType.LIST).repeatedGroup()
.optional(PrimitiveType.PrimitiveTypeName.INT32).named("element")
.named("list").named("int_list").named("ArrayOfInts");
@@ -68,8 +69,8 @@ public class HiveSyncToolTest {
assertEquals("`int_list` ARRAY< int>", schemaString);
// A array of arrays
schema = parquet.schema.Types.buildMessage().optionalGroup()
.as(parquet.schema.OriginalType.LIST).repeatedGroup().requiredGroup()
schema = Types.buildMessage().optionalGroup()
.as(OriginalType.LIST).repeatedGroup().requiredGroup()
.as(OriginalType.LIST).repeatedGroup()
.required(PrimitiveType.PrimitiveTypeName.INT32).named("element").named("list")
.named("element").named("list").named("int_list_list").named("ArrayOfArrayOfInts");
@@ -78,8 +79,8 @@ public class HiveSyncToolTest {
assertEquals("`int_list_list` ARRAY< ARRAY< int>>", schemaString);
// A list of integers
schema = parquet.schema.Types.buildMessage().optionalGroup()
.as(parquet.schema.OriginalType.LIST)
schema = Types.buildMessage().optionalGroup()
.as(OriginalType.LIST)
.repeated(PrimitiveType.PrimitiveTypeName.INT32).named("element").named("int_list")
.named("ArrayOfInts");
@@ -87,8 +88,8 @@ public class HiveSyncToolTest {
assertEquals("`int_list` ARRAY< int>", schemaString);
// A list of structs with two fields
schema = parquet.schema.Types.buildMessage().optionalGroup()
.as(parquet.schema.OriginalType.LIST).repeatedGroup()
schema = Types.buildMessage().optionalGroup()
.as(OriginalType.LIST).repeatedGroup()
.required(PrimitiveType.PrimitiveTypeName.BINARY).named("str")
.required(PrimitiveType.PrimitiveTypeName.INT32).named("num").named("element")
.named("tuple_list").named("ArrayOfTuples");
@@ -99,8 +100,8 @@ public class HiveSyncToolTest {
// A list of structs with a single field
// For this case, since the inner group name is "array", we treat the
// element type as a one-element struct.
schema = parquet.schema.Types.buildMessage().optionalGroup()
.as(parquet.schema.OriginalType.LIST).repeatedGroup()
schema = Types.buildMessage().optionalGroup()
.as(OriginalType.LIST).repeatedGroup()
.required(PrimitiveType.PrimitiveTypeName.BINARY).named("str").named("array")
.named("one_tuple_list").named("ArrayOfOneTuples");
@@ -110,8 +111,8 @@ public class HiveSyncToolTest {
// A list of structs with a single field
// For this case, since the inner group name ends with "_tuple", we also treat the
// element type as a one-element struct.
schema = parquet.schema.Types.buildMessage().optionalGroup()
.as(parquet.schema.OriginalType.LIST).repeatedGroup()
schema = Types.buildMessage().optionalGroup()
.as(OriginalType.LIST).repeatedGroup()
.required(PrimitiveType.PrimitiveTypeName.BINARY).named("str")
.named("one_tuple_list_tuple").named("one_tuple_list").named("ArrayOfOneTuples2");
@@ -121,8 +122,8 @@ public class HiveSyncToolTest {
// A list of structs with a single field
// Unlike the above two cases, for this the element type is the type of the
// only field in the struct.
schema = parquet.schema.Types.buildMessage().optionalGroup()
.as(parquet.schema.OriginalType.LIST).repeatedGroup()
schema = Types.buildMessage().optionalGroup()
.as(OriginalType.LIST).repeatedGroup()
.required(PrimitiveType.PrimitiveTypeName.BINARY).named("str")
.named("one_tuple_list").named("one_tuple_list").named("ArrayOfOneTuples3");
@@ -130,8 +131,8 @@ public class HiveSyncToolTest {
assertEquals("`one_tuple_list` ARRAY< binary>", schemaString);
// A list of maps
schema = parquet.schema.Types.buildMessage().optionalGroup()
.as(parquet.schema.OriginalType.LIST).repeatedGroup().as(OriginalType.MAP)
schema = Types.buildMessage().optionalGroup()
.as(OriginalType.LIST).repeatedGroup().as(OriginalType.MAP)
.repeatedGroup().as(OriginalType.MAP_KEY_VALUE)
.required(PrimitiveType.PrimitiveTypeName.BINARY).as(OriginalType.UTF8)
.named("string_key").required(PrimitiveType.PrimitiveTypeName.INT32)

View File

@@ -41,6 +41,8 @@ import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.thrift.TUGIContainingTransport;
import org.apache.hive.service.server.HiveServer2;
import org.apache.hudi.common.model.HoodieTestUtils;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.thrift.TProcessor;
import org.apache.thrift.protocol.TBinaryProtocol;
import org.apache.thrift.server.TServer;
@@ -52,12 +54,10 @@ import org.apache.thrift.transport.TSocket;
import org.apache.thrift.transport.TTransport;
import org.apache.thrift.transport.TTransportException;
import org.apache.thrift.transport.TTransportFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class HiveTestService {
private static final Logger LOG = LoggerFactory.getLogger(HiveTestService.class);
private static Logger LOG = LogManager.getLogger(HiveTestService.class);
private static final int CONNECTION_TIMEOUT = 30000;