[HUDI-159]: Pom cleanup and removal of com.twitter.parquet
- Redo all classes based on org.parquet only - remove unuused dependencies like parquet-hadoop, common-configuration2 - timeline-service does not build a fat jar anymore - Fix utilities and hadoop-mr bundles based on above
This commit is contained in:
committed by
Balaji Varadarajan
parent
6edf0b9def
commit
cd090871a1
@@ -32,12 +32,13 @@ import org.apache.hudi.common.util.SchemaTestUtil;
|
||||
import org.apache.hudi.hive.HoodieHiveClient.PartitionEvent;
|
||||
import org.apache.hudi.hive.HoodieHiveClient.PartitionEvent.PartitionEventType;
|
||||
import org.apache.hudi.hive.util.SchemaUtil;
|
||||
import org.apache.parquet.schema.MessageType;
|
||||
import org.apache.parquet.schema.OriginalType;
|
||||
import org.apache.parquet.schema.PrimitiveType;
|
||||
import org.apache.parquet.schema.Types;
|
||||
import org.joda.time.DateTime;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import parquet.schema.MessageType;
|
||||
import parquet.schema.OriginalType;
|
||||
import parquet.schema.PrimitiveType;
|
||||
|
||||
@SuppressWarnings("ConstantConditions")
|
||||
public class HiveSyncToolTest {
|
||||
@@ -59,8 +60,8 @@ public class HiveSyncToolTest {
|
||||
@Test
|
||||
public void testSchemaConvertArray() throws IOException {
|
||||
// Testing the 3-level annotation structure
|
||||
MessageType schema = parquet.schema.Types.buildMessage().optionalGroup()
|
||||
.as(parquet.schema.OriginalType.LIST).repeatedGroup()
|
||||
MessageType schema = Types.buildMessage().optionalGroup()
|
||||
.as(OriginalType.LIST).repeatedGroup()
|
||||
.optional(PrimitiveType.PrimitiveTypeName.INT32).named("element")
|
||||
.named("list").named("int_list").named("ArrayOfInts");
|
||||
|
||||
@@ -68,8 +69,8 @@ public class HiveSyncToolTest {
|
||||
assertEquals("`int_list` ARRAY< int>", schemaString);
|
||||
|
||||
// A array of arrays
|
||||
schema = parquet.schema.Types.buildMessage().optionalGroup()
|
||||
.as(parquet.schema.OriginalType.LIST).repeatedGroup().requiredGroup()
|
||||
schema = Types.buildMessage().optionalGroup()
|
||||
.as(OriginalType.LIST).repeatedGroup().requiredGroup()
|
||||
.as(OriginalType.LIST).repeatedGroup()
|
||||
.required(PrimitiveType.PrimitiveTypeName.INT32).named("element").named("list")
|
||||
.named("element").named("list").named("int_list_list").named("ArrayOfArrayOfInts");
|
||||
@@ -78,8 +79,8 @@ public class HiveSyncToolTest {
|
||||
assertEquals("`int_list_list` ARRAY< ARRAY< int>>", schemaString);
|
||||
|
||||
// A list of integers
|
||||
schema = parquet.schema.Types.buildMessage().optionalGroup()
|
||||
.as(parquet.schema.OriginalType.LIST)
|
||||
schema = Types.buildMessage().optionalGroup()
|
||||
.as(OriginalType.LIST)
|
||||
.repeated(PrimitiveType.PrimitiveTypeName.INT32).named("element").named("int_list")
|
||||
.named("ArrayOfInts");
|
||||
|
||||
@@ -87,8 +88,8 @@ public class HiveSyncToolTest {
|
||||
assertEquals("`int_list` ARRAY< int>", schemaString);
|
||||
|
||||
// A list of structs with two fields
|
||||
schema = parquet.schema.Types.buildMessage().optionalGroup()
|
||||
.as(parquet.schema.OriginalType.LIST).repeatedGroup()
|
||||
schema = Types.buildMessage().optionalGroup()
|
||||
.as(OriginalType.LIST).repeatedGroup()
|
||||
.required(PrimitiveType.PrimitiveTypeName.BINARY).named("str")
|
||||
.required(PrimitiveType.PrimitiveTypeName.INT32).named("num").named("element")
|
||||
.named("tuple_list").named("ArrayOfTuples");
|
||||
@@ -99,8 +100,8 @@ public class HiveSyncToolTest {
|
||||
// A list of structs with a single field
|
||||
// For this case, since the inner group name is "array", we treat the
|
||||
// element type as a one-element struct.
|
||||
schema = parquet.schema.Types.buildMessage().optionalGroup()
|
||||
.as(parquet.schema.OriginalType.LIST).repeatedGroup()
|
||||
schema = Types.buildMessage().optionalGroup()
|
||||
.as(OriginalType.LIST).repeatedGroup()
|
||||
.required(PrimitiveType.PrimitiveTypeName.BINARY).named("str").named("array")
|
||||
.named("one_tuple_list").named("ArrayOfOneTuples");
|
||||
|
||||
@@ -110,8 +111,8 @@ public class HiveSyncToolTest {
|
||||
// A list of structs with a single field
|
||||
// For this case, since the inner group name ends with "_tuple", we also treat the
|
||||
// element type as a one-element struct.
|
||||
schema = parquet.schema.Types.buildMessage().optionalGroup()
|
||||
.as(parquet.schema.OriginalType.LIST).repeatedGroup()
|
||||
schema = Types.buildMessage().optionalGroup()
|
||||
.as(OriginalType.LIST).repeatedGroup()
|
||||
.required(PrimitiveType.PrimitiveTypeName.BINARY).named("str")
|
||||
.named("one_tuple_list_tuple").named("one_tuple_list").named("ArrayOfOneTuples2");
|
||||
|
||||
@@ -121,8 +122,8 @@ public class HiveSyncToolTest {
|
||||
// A list of structs with a single field
|
||||
// Unlike the above two cases, for this the element type is the type of the
|
||||
// only field in the struct.
|
||||
schema = parquet.schema.Types.buildMessage().optionalGroup()
|
||||
.as(parquet.schema.OriginalType.LIST).repeatedGroup()
|
||||
schema = Types.buildMessage().optionalGroup()
|
||||
.as(OriginalType.LIST).repeatedGroup()
|
||||
.required(PrimitiveType.PrimitiveTypeName.BINARY).named("str")
|
||||
.named("one_tuple_list").named("one_tuple_list").named("ArrayOfOneTuples3");
|
||||
|
||||
@@ -130,8 +131,8 @@ public class HiveSyncToolTest {
|
||||
assertEquals("`one_tuple_list` ARRAY< binary>", schemaString);
|
||||
|
||||
// A list of maps
|
||||
schema = parquet.schema.Types.buildMessage().optionalGroup()
|
||||
.as(parquet.schema.OriginalType.LIST).repeatedGroup().as(OriginalType.MAP)
|
||||
schema = Types.buildMessage().optionalGroup()
|
||||
.as(OriginalType.LIST).repeatedGroup().as(OriginalType.MAP)
|
||||
.repeatedGroup().as(OriginalType.MAP_KEY_VALUE)
|
||||
.required(PrimitiveType.PrimitiveTypeName.BINARY).as(OriginalType.UTF8)
|
||||
.named("string_key").required(PrimitiveType.PrimitiveTypeName.INT32)
|
||||
|
||||
@@ -41,6 +41,8 @@ import org.apache.hadoop.hive.metastore.api.MetaException;
|
||||
import org.apache.hadoop.hive.thrift.TUGIContainingTransport;
|
||||
import org.apache.hive.service.server.HiveServer2;
|
||||
import org.apache.hudi.common.model.HoodieTestUtils;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.thrift.TProcessor;
|
||||
import org.apache.thrift.protocol.TBinaryProtocol;
|
||||
import org.apache.thrift.server.TServer;
|
||||
@@ -52,12 +54,10 @@ import org.apache.thrift.transport.TSocket;
|
||||
import org.apache.thrift.transport.TTransport;
|
||||
import org.apache.thrift.transport.TTransportException;
|
||||
import org.apache.thrift.transport.TTransportFactory;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class HiveTestService {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(HiveTestService.class);
|
||||
private static Logger LOG = LogManager.getLogger(HiveTestService.class);
|
||||
|
||||
private static final int CONNECTION_TIMEOUT = 30000;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user