1
0

Reformatting code per Google Code Style all over

This commit is contained in:
Vinoth Chandar
2017-11-12 22:54:56 -08:00
committed by vinoth chandar
parent 5a62480a92
commit e45679f5e2
254 changed files with 21580 additions and 21108 deletions

View File

@@ -15,8 +15,8 @@
-->
<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3 http://maven.apache.org/xsd/assembly-1.1.3.xsd">
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3 http://maven.apache.org/xsd/assembly-1.1.3.xsd">
<id>jar-with-dependencies</id>
<formats>
<format>jar</format>

View File

@@ -19,7 +19,6 @@
package com.uber.hoodie.hive;
import com.beust.jcommander.Parameter;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;

View File

@@ -26,6 +26,10 @@ import com.uber.hoodie.hadoop.realtime.HoodieRealtimeInputFormat;
import com.uber.hoodie.hive.HoodieHiveClient.PartitionEvent;
import com.uber.hoodie.hive.HoodieHiveClient.PartitionEvent.PartitionEventType;
import com.uber.hoodie.hive.util.SchemaUtil;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.Partition;
@@ -35,20 +39,14 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import parquet.schema.MessageType;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;
/**
* Tool to sync a hoodie HDFS dataset with a hive metastore table.
* Either use it as a api HiveSyncTool.syncHoodieTable(HiveSyncConfig)
* or as a command line java -cp hoodie-hive.jar HiveSyncTool [args]
* Tool to sync a hoodie HDFS dataset with a hive metastore table. Either use it as a api
* HiveSyncTool.syncHoodieTable(HiveSyncConfig) or as a command line java -cp hoodie-hive.jar
* HiveSyncTool [args]
*
* This utility will get the schema from the latest commit and will sync hive table schema
* Also this will sync the partitions incrementally
* (all the partitions modified since the last commit)
* This utility will get the schema from the latest commit and will sync hive table schema Also this
* will sync the partitions incrementally (all the partitions modified since the last commit)
*/
@SuppressWarnings("WeakerAccess")
public class HiveSyncTool {
@@ -64,7 +62,7 @@ public class HiveSyncTool {
}
public void syncHoodieTable() {
switch(hoodieHiveClient.getTableType()) {
switch (hoodieHiveClient.getTableType()) {
case COPY_ON_WRITE:
syncHoodieTable(false);
break;
@@ -125,15 +123,15 @@ public class HiveSyncTool {
// Check and sync schema
if (!tableExists) {
LOG.info("Table " + cfg.tableName + " is not found. Creating it");
if(!isRealTime) {
if (!isRealTime) {
// TODO - RO Table for MOR only after major compaction (UnboundedCompaction is default for now)
hoodieHiveClient.createTable(schema, HoodieInputFormat.class.getName(),
MapredParquetOutputFormat.class.getName(), ParquetHiveSerDe.class.getName());
MapredParquetOutputFormat.class.getName(), ParquetHiveSerDe.class.getName());
} else {
// Custom serde will not work with ALTER TABLE REPLACE COLUMNS
// https://github.com/apache/hive/blob/release-1.1.0/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java#L3488
hoodieHiveClient.createTable(schema, HoodieRealtimeInputFormat.class.getName(),
MapredParquetOutputFormat.class.getName(), ParquetHiveSerDe.class.getName());
// Custom serde will not work with ALTER TABLE REPLACE COLUMNS
// https://github.com/apache/hive/blob/release-1.1.0/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java#L3488
hoodieHiveClient.createTable(schema, HoodieRealtimeInputFormat.class.getName(),
MapredParquetOutputFormat.class.getName(), ParquetHiveSerDe.class.getName());
}
} else {
// Check if the dataset schema has evolved

View File

@@ -198,8 +198,8 @@ public class HoodieHiveClient {
}
/**
* Iterate over the storage partitions and find if there are any new partitions that need
* to be added or updated. Generate a list of PartitionEvent based on the changes required.
* Iterate over the storage partitions and find if there are any new partitions that need to be
* added or updated. Generate a list of PartitionEvent based on the changes required.
*/
List<PartitionEvent> getPartitionEvents(List<Partition> tablePartitions,
List<String> partitionStoragePartitions) {
@@ -297,9 +297,9 @@ public class HoodieHiveClient {
}
/**
* Gets the schema for a hoodie dataset.
* Depending on the type of table, read from any file written in the latest commit.
* We will assume that the schema has not changed within a single atomic write.
* Gets the schema for a hoodie dataset. Depending on the type of table, read from any file
* written in the latest commit. We will assume that the schema has not changed within a single
* atomic write.
*
* @return Parquet schema for this dataset
*/
@@ -313,7 +313,8 @@ public class HoodieHiveClient {
.orElseThrow(() -> new InvalidDatasetException(syncConfig.basePath));
HoodieCommitMetadata commitMetadata = HoodieCommitMetadata
.fromBytes(activeTimeline.getInstantDetails(lastCommit).get());
String filePath = commitMetadata.getFileIdAndFullPaths(metaClient.getBasePath()).values().stream().findAny()
String filePath = commitMetadata.getFileIdAndFullPaths(metaClient.getBasePath()).values()
.stream().findAny()
.orElseThrow(() -> new IllegalArgumentException(
"Could not find any data file written for commit " + lastCommit
+ ", could not get schema for dataset " + metaClient.getBasePath()));
@@ -330,7 +331,8 @@ public class HoodieHiveClient {
lastDeltaCommitAfterCompaction = metaClient.getActiveTimeline()
.getDeltaCommitTimeline()
.filterCompletedInstants()
.findInstantsAfter(lastCompactionCommit.get().getTimestamp(), Integer.MAX_VALUE).lastInstant();
.findInstantsAfter(lastCompactionCommit.get().getTimestamp(), Integer.MAX_VALUE)
.lastInstant();
}
LOG.info("Found the last delta commit after last compaction as "
+ lastDeltaCommitAfterCompaction);
@@ -340,8 +342,9 @@ public class HoodieHiveClient {
// read from the log file wrote
commitMetadata = HoodieCommitMetadata
.fromBytes(activeTimeline.getInstantDetails(lastDeltaCommit).get());
filePath = commitMetadata.getFileIdAndFullPaths(metaClient.getBasePath()).values().stream().filter(s -> s.contains(
HoodieLogFile.DELTA_EXTENSION)).findAny()
filePath = commitMetadata.getFileIdAndFullPaths(metaClient.getBasePath()).values()
.stream().filter(s -> s.contains(
HoodieLogFile.DELTA_EXTENSION)).findAny()
.orElseThrow(() -> new IllegalArgumentException(
"Could not find any data file written for commit " + lastDeltaCommit
+ ", could not get schema for dataset " + metaClient.getBasePath()));
@@ -361,10 +364,6 @@ public class HoodieHiveClient {
/**
* Read schema from a data file from the last compaction commit done.
*
* @param lastCompactionCommitOpt
* @return
* @throws IOException
*/
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
private MessageType readSchemaFromLastCompaction(Optional<HoodieInstant> lastCompactionCommitOpt)
@@ -377,7 +376,8 @@ public class HoodieHiveClient {
// Read from the compacted file wrote
HoodieCompactionMetadata compactionMetadata = HoodieCompactionMetadata
.fromBytes(activeTimeline.getInstantDetails(lastCompactionCommit).get());
String filePath = compactionMetadata.getFileIdAndFullPaths(metaClient.getBasePath()).values().stream().findAny()
String filePath = compactionMetadata.getFileIdAndFullPaths(metaClient.getBasePath()).values()
.stream().findAny()
.orElseThrow(() -> new IllegalArgumentException(
"Could not find any data file written for compaction " + lastCompactionCommit
+ ", could not get schema for dataset " + metaClient.getBasePath()));
@@ -386,11 +386,6 @@ public class HoodieHiveClient {
/**
* Read the schema from the log file on path
*
* @param lastCompactionCommitOpt
* @param path
* @return
* @throws IOException
*/
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
private MessageType readSchemaFromLogFile(Optional<HoodieInstant> lastCompactionCommitOpt,
@@ -422,7 +417,8 @@ public class HoodieHiveClient {
+ ". File does not exist.");
}
ParquetMetadata fileFooter =
ParquetFileReader.readFooter(fs.getConf(), parquetFilePath, ParquetMetadataConverter.NO_FILTER);
ParquetFileReader
.readFooter(fs.getConf(), parquetFilePath, ParquetMetadataConverter.NO_FILTER);
return fileFooter.getFileMetaData().getSchema();
}
@@ -530,7 +526,7 @@ public class HoodieHiveClient {
if (connection != null) {
connection.close();
}
if(client != null) {
if (client != null) {
client.close();
}
} catch (SQLException e) {

View File

@@ -18,23 +18,23 @@ package com.uber.hoodie.hive;
public class HoodieHiveSyncException extends RuntimeException {
public HoodieHiveSyncException() {
super();
}
public HoodieHiveSyncException() {
super();
}
public HoodieHiveSyncException(String message) {
super(message);
}
public HoodieHiveSyncException(String message) {
super(message);
}
public HoodieHiveSyncException(String message, Throwable t) {
super(message, t);
}
public HoodieHiveSyncException(String message, Throwable t) {
super(message, t);
}
public HoodieHiveSyncException(Throwable t) {
super(t);
}
public HoodieHiveSyncException(Throwable t) {
super(t);
}
protected static String format(String message, Object... args) {
return String.format(String.valueOf(message), (Object[]) args);
}
protected static String format(String message, Object... args) {
return String.format(String.valueOf(message), (Object[]) args);
}
}

View File

@@ -21,11 +21,13 @@ package com.uber.hoodie.hive;
import java.util.List;
/**
* HDFS Path contain hive partition values for the keys it is partitioned on.
* This mapping is not straight forward and requires a pluggable implementation to extract the partition value from HDFS path.
* HDFS Path contain hive partition values for the keys it is partitioned on. This mapping is not
* straight forward and requires a pluggable implementation to extract the partition value from HDFS
* path.
*
* e.g. Hive table partitioned by datestr=yyyy-mm-dd and hdfs path /app/hoodie/dataset1/YYYY=[yyyy]/MM=[mm]/DD=[dd]
*/
public interface PartitionValueExtractor {
List<String> extractPartitionValuesInPath(String partitionPath);
}

View File

@@ -21,88 +21,92 @@ import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import parquet.schema.MessageType;
import java.util.List;
import java.util.Map;
import parquet.schema.MessageType;
/**
* Represents the schema difference between the storage schema and hive table schema
*/
public class SchemaDifference {
private final MessageType storageSchema;
private final Map<String, String> tableSchema;
private final List<String> deleteColumns;
private final Map<String, String> updateColumnTypes;
private final Map<String, String> addColumnTypes;
private SchemaDifference(MessageType storageSchema, Map<String, String> tableSchema,
List<String> deleteColumns, Map<String, String> updateColumnTypes,
Map<String, String> addColumnTypes) {
this.storageSchema = storageSchema;
this.tableSchema = tableSchema;
this.deleteColumns = ImmutableList.copyOf(deleteColumns);
this.updateColumnTypes = ImmutableMap.copyOf(updateColumnTypes);
this.addColumnTypes = ImmutableMap.copyOf(addColumnTypes);
}
public List<String> getDeleteColumns() {
return deleteColumns;
}
public Map<String, String> getUpdateColumnTypes() {
return updateColumnTypes;
}
public Map<String, String> getAddColumnTypes() {
return addColumnTypes;
}
@Override
public String toString() {
return Objects.toStringHelper(this).add("deleteColumns", deleteColumns)
.add("updateColumnTypes", updateColumnTypes).add("addColumnTypes", addColumnTypes)
.toString();
}
public static Builder newBuilder(MessageType storageSchema, Map<String, String> tableSchema) {
return new Builder(storageSchema, tableSchema);
}
public boolean isEmpty() {
return deleteColumns.isEmpty() && updateColumnTypes.isEmpty() && addColumnTypes.isEmpty();
}
public static class Builder {
private final MessageType storageSchema;
private final Map<String, String> tableSchema;
private final List<String> deleteColumns;
private final Map<String, String> updateColumnTypes;
private final Map<String, String> addColumnTypes;
private List<String> deleteColumns;
private Map<String, String> updateColumnTypes;
private Map<String, String> addColumnTypes;
private SchemaDifference(MessageType storageSchema, Map<String, String> tableSchema,
List<String> deleteColumns, Map<String, String> updateColumnTypes, Map<String, String> addColumnTypes) {
this.storageSchema = storageSchema;
this.tableSchema = tableSchema;
this.deleteColumns = ImmutableList.copyOf(deleteColumns);
this.updateColumnTypes = ImmutableMap.copyOf(updateColumnTypes);
this.addColumnTypes = ImmutableMap.copyOf(addColumnTypes);
public Builder(MessageType storageSchema, Map<String, String> tableSchema) {
this.storageSchema = storageSchema;
this.tableSchema = tableSchema;
deleteColumns = Lists.newArrayList();
updateColumnTypes = Maps.newHashMap();
addColumnTypes = Maps.newHashMap();
}
public List<String> getDeleteColumns() {
return deleteColumns;
public Builder deleteTableColumn(String column) {
deleteColumns.add(column);
return this;
}
public Map<String, String> getUpdateColumnTypes() {
return updateColumnTypes;
public Builder updateTableColumn(String column, String storageColumnType) {
updateColumnTypes.put(column, storageColumnType);
return this;
}
public Map<String, String> getAddColumnTypes() {
return addColumnTypes;
public Builder addTableColumn(String name, String type) {
addColumnTypes.put(name, type);
return this;
}
@Override public String toString() {
return Objects.toStringHelper(this).add("deleteColumns", deleteColumns)
.add("updateColumnTypes", updateColumnTypes).add("addColumnTypes", addColumnTypes)
.toString();
}
public static Builder newBuilder(MessageType storageSchema, Map<String, String> tableSchema) {
return new Builder(storageSchema, tableSchema);
}
public boolean isEmpty() {
return deleteColumns.isEmpty() && updateColumnTypes.isEmpty() && addColumnTypes.isEmpty();
}
public static class Builder {
private final MessageType storageSchema;
private final Map<String, String> tableSchema;
private List<String> deleteColumns;
private Map<String, String> updateColumnTypes;
private Map<String, String> addColumnTypes;
public Builder(MessageType storageSchema, Map<String, String> tableSchema) {
this.storageSchema = storageSchema;
this.tableSchema = tableSchema;
deleteColumns = Lists.newArrayList();
updateColumnTypes = Maps.newHashMap();
addColumnTypes = Maps.newHashMap();
}
public Builder deleteTableColumn(String column) {
deleteColumns.add(column);
return this;
}
public Builder updateTableColumn(String column, String storageColumnType) {
updateColumnTypes.put(column, storageColumnType);
return this;
}
public Builder addTableColumn(String name, String type) {
addColumnTypes.put(name, type);
return this;
}
public SchemaDifference build() {
return new SchemaDifference(storageSchema, tableSchema, deleteColumns, updateColumnTypes, addColumnTypes);
}
public SchemaDifference build() {
return new SchemaDifference(storageSchema, tableSchema, deleteColumns, updateColumnTypes,
addColumnTypes);
}
}
}

View File

@@ -23,9 +23,11 @@ import java.util.List;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
/**
* HDFS Path contain hive partition values for the keys it is partitioned on.
* This mapping is not straight forward and requires a pluggable implementation to extract the partition value from HDFS path.
* HDFS Path contain hive partition values for the keys it is partitioned on. This mapping is not
* straight forward and requires a pluggable implementation to extract the partition value from HDFS
* path.
*
* This implementation extracts datestr=yyyy-mm-dd from path of type /yyyy/mm/dd
*/

View File

@@ -17,32 +17,32 @@
package com.uber.hoodie.hive.util;
import com.google.common.collect.Maps;
import java.util.Iterator;
import java.util.Map;
public class ColumnNameXLator {
private static Map<String, String> xformMap = Maps.newHashMap();
public static String translateNestedColumn(String colName) {
Map.Entry entry;
for (Iterator i$ = xformMap.entrySet().iterator(); i$.hasNext();
colName = colName.replaceAll((String) entry.getKey(), (String) entry.getValue())) {
entry = (Map.Entry) i$.next();
}
private static Map<String, String> xformMap = Maps.newHashMap();
return colName;
public static String translateNestedColumn(String colName) {
Map.Entry entry;
for (Iterator i$ = xformMap.entrySet().iterator(); i$.hasNext();
colName = colName.replaceAll((String) entry.getKey(), (String) entry.getValue())) {
entry = (Map.Entry) i$.next();
}
public static String translateColumn(String colName) {
return colName;
}
return colName;
}
public static String translate(String colName, boolean nestedColumn) {
return !nestedColumn ? translateColumn(colName) : translateNestedColumn(colName);
}
public static String translateColumn(String colName) {
return colName;
}
static {
xformMap.put("\\$", "_dollar_");
}
public static String translate(String colName, boolean nestedColumn) {
return !nestedColumn ? translateColumn(colName) : translateNestedColumn(colName);
}
static {
xformMap.put("\\$", "_dollar_");
}
}

View File

@@ -21,6 +21,10 @@ import com.google.common.collect.Sets;
import com.uber.hoodie.hive.HiveSyncConfig;
import com.uber.hoodie.hive.HoodieHiveSyncException;
import com.uber.hoodie.hive.SchemaDifference;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import parquet.schema.DecimalMetadata;
@@ -30,404 +34,386 @@ import parquet.schema.OriginalType;
import parquet.schema.PrimitiveType;
import parquet.schema.Type;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* Schema Utilities
*/
public class SchemaUtil {
private static Logger LOG = LoggerFactory.getLogger(SchemaUtil.class);
/**
* Get the schema difference between the storage schema and hive table schema
*
* @param storageSchema
* @param tableSchema
* @param partitionKeys
* @return
*/
public static SchemaDifference getSchemaDifference(MessageType storageSchema,
Map<String, String> tableSchema, List<String> partitionKeys) {
Map<String, String> newTableSchema;
try {
newTableSchema = convertParquetSchemaToHiveSchema(storageSchema);
} catch (IOException e) {
throw new HoodieHiveSyncException("Failed to convert parquet schema to hive schema",
e);
private static Logger LOG = LoggerFactory.getLogger(SchemaUtil.class);
/**
* Get the schema difference between the storage schema and hive table schema
*/
public static SchemaDifference getSchemaDifference(MessageType storageSchema,
Map<String, String> tableSchema, List<String> partitionKeys) {
Map<String, String> newTableSchema;
try {
newTableSchema = convertParquetSchemaToHiveSchema(storageSchema);
} catch (IOException e) {
throw new HoodieHiveSyncException("Failed to convert parquet schema to hive schema",
e);
}
LOG.info("Getting schema difference for " + tableSchema + "\r\n\r\n" + newTableSchema);
SchemaDifference.Builder schemaDiffBuilder =
SchemaDifference.newBuilder(storageSchema, tableSchema);
Set<String> tableColumns = Sets.newHashSet();
for (Map.Entry<String, String> field : tableSchema.entrySet()) {
String fieldName = field.getKey().toLowerCase();
String tickSurroundedFieldName = tickSurround(fieldName);
if (!isFieldExistsInSchema(newTableSchema, tickSurroundedFieldName) && !partitionKeys
.contains(fieldName)) {
schemaDiffBuilder.deleteTableColumn(fieldName);
} else {
// check type
String tableColumnType = field.getValue();
if (!isFieldExistsInSchema(newTableSchema, tickSurroundedFieldName)) {
if (partitionKeys.contains(fieldName)) {
// Partition key does not have to be part of the storage schema
continue;
}
// We will log this and continue. Hive schema is a superset of all parquet schemas
LOG.warn("Ignoring table column " + fieldName
+ " as its not present in the parquet schema");
continue;
}
LOG.info("Getting schema difference for " + tableSchema + "\r\n\r\n" + newTableSchema);
SchemaDifference.Builder schemaDiffBuilder =
SchemaDifference.newBuilder(storageSchema, tableSchema);
Set<String> tableColumns = Sets.newHashSet();
tableColumnType = tableColumnType.replaceAll("\\s+", "");
for (Map.Entry<String, String> field : tableSchema.entrySet()) {
String fieldName = field.getKey().toLowerCase();
String tickSurroundedFieldName = tickSurround(fieldName);
if (!isFieldExistsInSchema(newTableSchema, tickSurroundedFieldName) && !partitionKeys.contains(fieldName)) {
schemaDiffBuilder.deleteTableColumn(fieldName);
} else {
// check type
String tableColumnType = field.getValue();
if (!isFieldExistsInSchema(newTableSchema, tickSurroundedFieldName)) {
if (partitionKeys.contains(fieldName)) {
// Partition key does not have to be part of the storage schema
continue;
}
// We will log this and continue. Hive schema is a superset of all parquet schemas
LOG.warn("Ignoring table column " + fieldName
+ " as its not present in the parquet schema");
continue;
}
tableColumnType = tableColumnType.replaceAll("\\s+", "");
String expectedType = getExpectedType(newTableSchema, tickSurroundedFieldName);
expectedType = expectedType.replaceAll("\\s+", "");
expectedType = expectedType.replaceAll("`", "");
String expectedType = getExpectedType(newTableSchema, tickSurroundedFieldName);
expectedType = expectedType.replaceAll("\\s+", "");
expectedType = expectedType.replaceAll("`", "");
if (!tableColumnType.equalsIgnoreCase(expectedType)) {
// check for incremental datasets, the schema type change is allowed as per evolution rules
if (!isSchemaTypeUpdateAllowed(tableColumnType, expectedType)) {
throw new HoodieHiveSyncException(
"Could not convert field Type from " + tableColumnType + " to "
+ expectedType + " for field " + fieldName);
}
schemaDiffBuilder.updateTableColumn(fieldName,
getExpectedType(newTableSchema, tickSurroundedFieldName));
}
}
tableColumns.add(tickSurroundedFieldName);
if (!tableColumnType.equalsIgnoreCase(expectedType)) {
// check for incremental datasets, the schema type change is allowed as per evolution rules
if (!isSchemaTypeUpdateAllowed(tableColumnType, expectedType)) {
throw new HoodieHiveSyncException(
"Could not convert field Type from " + tableColumnType + " to "
+ expectedType + " for field " + fieldName);
}
schemaDiffBuilder.updateTableColumn(fieldName,
getExpectedType(newTableSchema, tickSurroundedFieldName));
}
for (Map.Entry<String, String> entry : newTableSchema.entrySet()) {
if (!tableColumns.contains(entry.getKey().toLowerCase())) {
schemaDiffBuilder.addTableColumn(entry.getKey(), entry.getValue());
}
}
LOG.info("Difference between schemas: " + schemaDiffBuilder.build().toString());
return schemaDiffBuilder.build();
}
tableColumns.add(tickSurroundedFieldName);
}
private static String getExpectedType(Map<String, String> newTableSchema, String fieldName) {
for (Map.Entry<String, String> entry : newTableSchema.entrySet()) {
if (entry.getKey().toLowerCase().equals(fieldName)) {
return entry.getValue();
}
}
return null;
for (Map.Entry<String, String> entry : newTableSchema.entrySet()) {
if (!tableColumns.contains(entry.getKey().toLowerCase())) {
schemaDiffBuilder.addTableColumn(entry.getKey(), entry.getValue());
}
}
LOG.info("Difference between schemas: " + schemaDiffBuilder.build().toString());
private static boolean isFieldExistsInSchema(Map<String, String> newTableSchema,
String fieldName) {
for (String entry : newTableSchema.keySet()) {
if (entry.toLowerCase().equals(fieldName)) {
return true;
}
}
return false;
return schemaDiffBuilder.build();
}
private static String getExpectedType(Map<String, String> newTableSchema, String fieldName) {
for (Map.Entry<String, String> entry : newTableSchema.entrySet()) {
if (entry.getKey().toLowerCase().equals(fieldName)) {
return entry.getValue();
}
}
return null;
}
private static boolean isFieldExistsInSchema(Map<String, String> newTableSchema,
String fieldName) {
for (String entry : newTableSchema.keySet()) {
if (entry.toLowerCase().equals(fieldName)) {
return true;
}
}
return false;
}
/**
* Returns equivalent Hive table schema read from a parquet file
*
* @param messageType : Parquet Schema
* @return : Hive Table schema read from parquet file MAP[String,String]
* @throws IOException
*/
public static Map<String, String> convertParquetSchemaToHiveSchema(MessageType messageType)
throws IOException {
Map<String, String> schema = Maps.newLinkedHashMap();
List<Type> parquetFields = messageType.getFields();
for (Type parquetType : parquetFields) {
StringBuilder result = new StringBuilder();
String key = parquetType.getName();
if (parquetType.isRepetition(Type.Repetition.REPEATED)) {
result.append(createHiveArray(parquetType, ""));
} else {
result.append(convertField(parquetType));
/**
* Returns equivalent Hive table schema read from a parquet file
*
* @param messageType : Parquet Schema
* @return : Hive Table schema read from parquet file MAP[String,String]
*/
public static Map<String, String> convertParquetSchemaToHiveSchema(MessageType messageType)
throws IOException {
Map<String, String> schema = Maps.newLinkedHashMap();
List<Type> parquetFields = messageType.getFields();
for (Type parquetType : parquetFields) {
StringBuilder result = new StringBuilder();
String key = parquetType.getName();
if (parquetType.isRepetition(Type.Repetition.REPEATED)) {
result.append(createHiveArray(parquetType, ""));
} else {
result.append(convertField(parquetType));
}
schema.put(hiveCompatibleFieldName(key, false), result.toString());
}
return schema;
}
/**
* Convert one field data type of parquet schema into an equivalent Hive schema
*
* @param parquetType : Single paruet field
* @return : Equivalent sHive schema
*/
private static String convertField(final Type parquetType) {
StringBuilder field = new StringBuilder();
if (parquetType.isPrimitive()) {
final PrimitiveType.PrimitiveTypeName parquetPrimitiveTypeName =
parquetType.asPrimitiveType().getPrimitiveTypeName();
final OriginalType originalType = parquetType.getOriginalType();
if (originalType == OriginalType.DECIMAL) {
final DecimalMetadata decimalMetadata =
parquetType.asPrimitiveType().getDecimalMetadata();
return field.append("DECIMAL(").append(decimalMetadata.getPrecision()).
append(" , ").append(decimalMetadata.getScale()).append(")").toString();
}
// TODO - fix the method naming here
return parquetPrimitiveTypeName
.convert(new PrimitiveType.PrimitiveTypeNameConverter<String, RuntimeException>() {
@Override
public String convertBOOLEAN(
PrimitiveType.PrimitiveTypeName primitiveTypeName) {
return "boolean";
}
schema.put(hiveCompatibleFieldName(key, false), result.toString());
}
return schema;
}
/**
* Convert one field data type of parquet schema into an equivalent Hive
* schema
*
* @param parquetType : Single paruet field
* @return : Equivalent sHive schema
*/
private static String convertField(final Type parquetType) {
StringBuilder field = new StringBuilder();
if (parquetType.isPrimitive()) {
final PrimitiveType.PrimitiveTypeName parquetPrimitiveTypeName =
parquetType.asPrimitiveType().getPrimitiveTypeName();
final OriginalType originalType = parquetType.getOriginalType();
if (originalType == OriginalType.DECIMAL) {
final DecimalMetadata decimalMetadata =
parquetType.asPrimitiveType().getDecimalMetadata();
return field.append("DECIMAL(").append(decimalMetadata.getPrecision()).
append(" , ").append(decimalMetadata.getScale()).append(")").toString();
@Override
public String convertINT32(PrimitiveType.PrimitiveTypeName primitiveTypeName) {
return "int";
}
// TODO - fix the method naming here
return parquetPrimitiveTypeName
.convert(new PrimitiveType.PrimitiveTypeNameConverter<String, RuntimeException>() {
@Override
public String convertBOOLEAN(
PrimitiveType.PrimitiveTypeName primitiveTypeName) {
return "boolean";
}
@Override
public String convertINT32(PrimitiveType.PrimitiveTypeName primitiveTypeName) {
return "int";
}
@Override
public String convertINT64(PrimitiveType.PrimitiveTypeName primitiveTypeName) {
return "bigint";
}
@Override
public String convertINT96(PrimitiveType.PrimitiveTypeName primitiveTypeName) {
return "timestamp-millis";
}
@Override
public String convertFLOAT(PrimitiveType.PrimitiveTypeName primitiveTypeName) {
return "float";
}
@Override
public String convertDOUBLE(PrimitiveType.PrimitiveTypeName primitiveTypeName) {
return "double";
}
@Override
public String convertFIXED_LEN_BYTE_ARRAY(
PrimitiveType.PrimitiveTypeName primitiveTypeName) {
return "binary";
}
@Override
public String convertBINARY(PrimitiveType.PrimitiveTypeName primitiveTypeName) {
if (originalType == OriginalType.UTF8
|| originalType == OriginalType.ENUM) {
return "string";
} else {
return "binary";
}
}
});
} else {
GroupType parquetGroupType = parquetType.asGroupType();
OriginalType originalType = parquetGroupType.getOriginalType();
if (originalType != null) {
switch (originalType) {
case LIST:
if (parquetGroupType.getFieldCount() != 1) {
throw new UnsupportedOperationException(
"Invalid list type " + parquetGroupType);
}
Type elementType = parquetGroupType.getType(0);
if (!elementType.isRepetition(Type.Repetition.REPEATED)) {
throw new UnsupportedOperationException(
"Invalid list type " + parquetGroupType);
}
return createHiveArray(elementType, parquetGroupType.getName());
case MAP:
if (parquetGroupType.getFieldCount() != 1 || parquetGroupType.getType(0)
.isPrimitive()) {
throw new UnsupportedOperationException(
"Invalid map type " + parquetGroupType);
}
GroupType mapKeyValType = parquetGroupType.getType(0).asGroupType();
if (!mapKeyValType.isRepetition(Type.Repetition.REPEATED) ||
!mapKeyValType.getOriginalType().equals(OriginalType.MAP_KEY_VALUE) ||
mapKeyValType.getFieldCount() != 2) {
throw new UnsupportedOperationException(
"Invalid map type " + parquetGroupType);
}
Type keyType = mapKeyValType.getType(0);
if (!keyType.isPrimitive() ||
!keyType.asPrimitiveType().getPrimitiveTypeName()
.equals(PrimitiveType.PrimitiveTypeName.BINARY) ||
!keyType.getOriginalType().equals(OriginalType.UTF8)) {
throw new UnsupportedOperationException(
"Map key type must be binary (UTF8): " + keyType);
}
Type valueType = mapKeyValType.getType(1);
return createHiveMap(convertField(keyType), convertField(valueType));
case ENUM:
case UTF8:
return "string";
case MAP_KEY_VALUE:
// MAP_KEY_VALUE was supposed to be used to annotate key and
// value group levels in a
// MAP. However, that is always implied by the structure of
// MAP. Hence, PARQUET-113
// dropped the requirement for having MAP_KEY_VALUE.
default:
throw new UnsupportedOperationException(
"Cannot convert Parquet type " + parquetType);
}
} else {
// if no original type then it's a record
return createHiveStruct(parquetGroupType.getFields());
@Override
public String convertINT64(PrimitiveType.PrimitiveTypeName primitiveTypeName) {
return "bigint";
}
}
}
/**
* Return a 'struct' Hive schema from a list of Parquet fields
*
* @param parquetFields : list of parquet fields
* @return : Equivalent 'struct' Hive schema
*/
private static String createHiveStruct(List<Type> parquetFields) {
StringBuilder struct = new StringBuilder();
struct.append("STRUCT< ");
for (Type field : parquetFields) {
//TODO: struct field name is only translated to support special char($)
//We will need to extend it to other collection type
struct.append(hiveCompatibleFieldName(field.getName(), true)).append(" : ");
struct.append(convertField(field)).append(", ");
}
struct.delete(struct.length() - 2, struct.length()); // Remove the last
// ", "
struct.append(">");
String finalStr = struct.toString();
// Struct cannot have - in them. userstore_udr_entities has uuid in struct. This breaks the schema.
// HDrone sync should not fail because of this.
finalStr = finalStr.replaceAll("-", "_");
return finalStr;
}
private static String hiveCompatibleFieldName(String fieldName, boolean isNested) {
String result = fieldName;
if (isNested) {
result = ColumnNameXLator.translateNestedColumn(fieldName);
}
return tickSurround(result);
}
private static String tickSurround(String result) {
if (!result.startsWith("`")) {
result = "`" + result;
}
if (!result.endsWith("`")) {
result = result + "`";
}
return result;
}
/**
* Create a 'Map' schema from Parquet map field
*
* @param keyType
* @param valueType
* @return
*/
private static String createHiveMap(String keyType, String valueType) {
return "MAP< " + keyType + ", " + valueType + ">";
}
/**
* Create an Array Hive schema from equivalent parquet list type
*
* @param elementType
* @param elementName
* @return
*/
private static String createHiveArray(Type elementType, String elementName) {
StringBuilder array = new StringBuilder();
array.append("ARRAY< ");
if (elementType.isPrimitive()) {
array.append(convertField(elementType));
} else {
final GroupType groupType = elementType.asGroupType();
final List<Type> groupFields = groupType.getFields();
if (groupFields.size() > 1 || (groupFields.size() == 1 && (
elementType.getName().equals("array") || elementType.getName()
.equals(elementName + "_tuple")))) {
array.append(convertField(elementType));
} else {
array.append(convertField(groupType.getFields().get(0)));
@Override
public String convertINT96(PrimitiveType.PrimitiveTypeName primitiveTypeName) {
return "timestamp-millis";
}
@Override
public String convertFLOAT(PrimitiveType.PrimitiveTypeName primitiveTypeName) {
return "float";
}
@Override
public String convertDOUBLE(PrimitiveType.PrimitiveTypeName primitiveTypeName) {
return "double";
}
@Override
public String convertFIXED_LEN_BYTE_ARRAY(
PrimitiveType.PrimitiveTypeName primitiveTypeName) {
return "binary";
}
@Override
public String convertBINARY(PrimitiveType.PrimitiveTypeName primitiveTypeName) {
if (originalType == OriginalType.UTF8
|| originalType == OriginalType.ENUM) {
return "string";
} else {
return "binary";
}
}
});
} else {
GroupType parquetGroupType = parquetType.asGroupType();
OriginalType originalType = parquetGroupType.getOriginalType();
if (originalType != null) {
switch (originalType) {
case LIST:
if (parquetGroupType.getFieldCount() != 1) {
throw new UnsupportedOperationException(
"Invalid list type " + parquetGroupType);
}
Type elementType = parquetGroupType.getType(0);
if (!elementType.isRepetition(Type.Repetition.REPEATED)) {
throw new UnsupportedOperationException(
"Invalid list type " + parquetGroupType);
}
return createHiveArray(elementType, parquetGroupType.getName());
case MAP:
if (parquetGroupType.getFieldCount() != 1 || parquetGroupType.getType(0)
.isPrimitive()) {
throw new UnsupportedOperationException(
"Invalid map type " + parquetGroupType);
}
GroupType mapKeyValType = parquetGroupType.getType(0).asGroupType();
if (!mapKeyValType.isRepetition(Type.Repetition.REPEATED) ||
!mapKeyValType.getOriginalType().equals(OriginalType.MAP_KEY_VALUE) ||
mapKeyValType.getFieldCount() != 2) {
throw new UnsupportedOperationException(
"Invalid map type " + parquetGroupType);
}
Type keyType = mapKeyValType.getType(0);
if (!keyType.isPrimitive() ||
!keyType.asPrimitiveType().getPrimitiveTypeName()
.equals(PrimitiveType.PrimitiveTypeName.BINARY) ||
!keyType.getOriginalType().equals(OriginalType.UTF8)) {
throw new UnsupportedOperationException(
"Map key type must be binary (UTF8): " + keyType);
}
Type valueType = mapKeyValType.getType(1);
return createHiveMap(convertField(keyType), convertField(valueType));
case ENUM:
case UTF8:
return "string";
case MAP_KEY_VALUE:
// MAP_KEY_VALUE was supposed to be used to annotate key and
// value group levels in a
// MAP. However, that is always implied by the structure of
// MAP. Hence, PARQUET-113
// dropped the requirement for having MAP_KEY_VALUE.
default:
throw new UnsupportedOperationException(
"Cannot convert Parquet type " + parquetType);
}
array.append(">");
return array.toString();
} else {
// if no original type then it's a record
return createHiveStruct(parquetGroupType.getFields());
}
}
}
/**
* Return a 'struct' Hive schema from a list of Parquet fields
*
* @param parquetFields : list of parquet fields
* @return : Equivalent 'struct' Hive schema
*/
private static String createHiveStruct(List<Type> parquetFields) {
StringBuilder struct = new StringBuilder();
struct.append("STRUCT< ");
for (Type field : parquetFields) {
//TODO: struct field name is only translated to support special char($)
//We will need to extend it to other collection type
struct.append(hiveCompatibleFieldName(field.getName(), true)).append(" : ");
struct.append(convertField(field)).append(", ");
}
struct.delete(struct.length() - 2, struct.length()); // Remove the last
// ", "
struct.append(">");
String finalStr = struct.toString();
// Struct cannot have - in them. userstore_udr_entities has uuid in struct. This breaks the schema.
// HDrone sync should not fail because of this.
finalStr = finalStr.replaceAll("-", "_");
return finalStr;
}
private static String hiveCompatibleFieldName(String fieldName, boolean isNested) {
String result = fieldName;
if (isNested) {
result = ColumnNameXLator.translateNestedColumn(fieldName);
}
return tickSurround(result);
}
private static String tickSurround(String result) {
if (!result.startsWith("`")) {
result = "`" + result;
}
if (!result.endsWith("`")) {
result = result + "`";
}
return result;
}
/**
* Create a 'Map' schema from Parquet map field
*/
private static String createHiveMap(String keyType, String valueType) {
return "MAP< " + keyType + ", " + valueType + ">";
}
/**
* Create an Array Hive schema from equivalent parquet list type
*/
private static String createHiveArray(Type elementType, String elementName) {
StringBuilder array = new StringBuilder();
array.append("ARRAY< ");
if (elementType.isPrimitive()) {
array.append(convertField(elementType));
} else {
final GroupType groupType = elementType.asGroupType();
final List<Type> groupFields = groupType.getFields();
if (groupFields.size() > 1 || (groupFields.size() == 1 && (
elementType.getName().equals("array") || elementType.getName()
.equals(elementName + "_tuple")))) {
array.append(convertField(elementType));
} else {
array.append(convertField(groupType.getFields().get(0)));
}
}
array.append(">");
return array.toString();
}
public static boolean isSchemaTypeUpdateAllowed(String prevType, String newType) {
if (prevType == null || prevType.trim().isEmpty() ||
newType == null || newType.trim().isEmpty()) {
return false;
}
prevType = prevType.toLowerCase();
newType = newType.toLowerCase();
if (prevType.equals(newType)) {
return true;
} else if (prevType.equalsIgnoreCase("int") && newType.equalsIgnoreCase("bigint")) {
return true;
} else if (prevType.equalsIgnoreCase("float") && newType.equalsIgnoreCase("double")) {
return true;
} else if (prevType.contains("struct") && newType.toLowerCase().contains("struct")) {
return true;
}
return false;
}
public static String generateSchemaString(MessageType storageSchema) throws IOException {
Map<String, String> hiveSchema = convertParquetSchemaToHiveSchema(storageSchema);
StringBuilder columns = new StringBuilder();
for (Map.Entry<String, String> hiveSchemaEntry : hiveSchema.entrySet()) {
columns.append(hiveSchemaEntry.getKey()).append(" ");
columns.append(hiveSchemaEntry.getValue()).append(", ");
}
// Remove the last ", "
columns.delete(columns.length() - 2, columns.length());
return columns.toString();
}
public static String generateCreateDDL(MessageType storageSchema,
HiveSyncConfig config, String inputFormatClass,
String outputFormatClass, String serdeClass) throws IOException {
Map<String, String> hiveSchema = convertParquetSchemaToHiveSchema(storageSchema);
String columns = generateSchemaString(storageSchema);
StringBuilder partitionFields = new StringBuilder();
for (String partitionKey : config.partitionFields) {
partitionFields.append(partitionKey).append(" ")
.append(getPartitionKeyType(hiveSchema, partitionKey));
}
public static boolean isSchemaTypeUpdateAllowed(String prevType, String newType) {
if (prevType == null || prevType.trim().isEmpty() ||
newType == null || newType.trim().isEmpty()) {
return false;
}
prevType = prevType.toLowerCase();
newType = newType.toLowerCase();
if (prevType.equals(newType)) {
return true;
} else if (prevType.equalsIgnoreCase("int") && newType.equalsIgnoreCase("bigint")) {
return true;
} else if (prevType.equalsIgnoreCase("float") && newType.equalsIgnoreCase("double")) {
return true;
} else if (prevType.contains("struct") && newType.toLowerCase().contains("struct")) {
return true;
}
return false;
StringBuilder sb = new StringBuilder("CREATE EXTERNAL TABLE IF NOT EXISTS ");
sb = sb.append(config.databaseName).append(".").append(config.tableName);
sb = sb.append("( ").append(columns).append(")");
if (!config.partitionFields.isEmpty()) {
sb = sb.append(" PARTITIONED BY (").append(partitionFields).append(")");
}
sb = sb.append(" ROW FORMAT SERDE '").append(serdeClass).append("'");
sb = sb.append(" STORED AS INPUTFORMAT '").append(inputFormatClass).append("'");
sb = sb.append(" OUTPUTFORMAT '").append(outputFormatClass).append("' LOCATION '")
.append(config.basePath).append("'");
return sb.toString();
}
public static String generateSchemaString(MessageType storageSchema) throws IOException {
Map<String, String> hiveSchema = convertParquetSchemaToHiveSchema(storageSchema);
StringBuilder columns = new StringBuilder();
for (Map.Entry<String, String> hiveSchemaEntry : hiveSchema.entrySet()) {
columns.append(hiveSchemaEntry.getKey()).append(" ");
columns.append(hiveSchemaEntry.getValue()).append(", ");
}
// Remove the last ", "
columns.delete(columns.length() - 2, columns.length());
return columns.toString();
}
public static String generateCreateDDL(MessageType storageSchema,
HiveSyncConfig config, String inputFormatClass,
String outputFormatClass, String serdeClass) throws IOException {
Map<String, String> hiveSchema = convertParquetSchemaToHiveSchema(storageSchema);
String columns = generateSchemaString(storageSchema);
StringBuilder partitionFields = new StringBuilder();
for (String partitionKey : config.partitionFields) {
partitionFields.append(partitionKey).append(" ")
.append(getPartitionKeyType(hiveSchema, partitionKey));
}
StringBuilder sb = new StringBuilder("CREATE EXTERNAL TABLE IF NOT EXISTS ");
sb = sb.append(config.databaseName).append(".").append(config.tableName);
sb = sb.append("( ").append(columns).append(")");
if (!config.partitionFields.isEmpty()) {
sb = sb.append(" PARTITIONED BY (").append(partitionFields).append(")");
}
sb = sb.append(" ROW FORMAT SERDE '").append(serdeClass).append("'");
sb = sb.append(" STORED AS INPUTFORMAT '").append(inputFormatClass).append("'");
sb = sb.append(" OUTPUTFORMAT '").append(outputFormatClass).append("' LOCATION '")
.append(config.basePath).append("'");
return sb.toString();
}
private static String getPartitionKeyType(Map<String, String> hiveSchema, String partitionKey) {
if (hiveSchema.containsKey(partitionKey)) {
return hiveSchema.get(partitionKey);
}
// Default the unknown partition fields to be String
// TODO - all partition fields should be part of the schema. datestr is treated as special. Dont do that
return "String";
private static String getPartitionKeyType(Map<String, String> hiveSchema, String partitionKey) {
if (hiveSchema.containsKey(partitionKey)) {
return hiveSchema.get(partitionKey);
}
// Default the unknown partition fields to be String
// TODO - all partition fields should be part of the schema. datestr is treated as special. Dont do that
return "String";
}
}

View File

@@ -18,7 +18,9 @@
package com.uber.hoodie.hive;
import static org.junit.Assert.*;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import com.uber.hoodie.common.util.SchemaTestUtil;
import com.uber.hoodie.hive.HoodieHiveClient.PartitionEvent;
@@ -52,9 +54,8 @@ public class HiveSyncToolTest {
}
/**
* Testing converting array types to Hive field declaration strings,
* according to the Parquet-113 spec:
* https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists
* Testing converting array types to Hive field declaration strings, according to the Parquet-113
* spec: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists
*/
@Test
public void testSchemaConvertArray() throws IOException {
@@ -274,7 +275,8 @@ public class HiveSyncToolTest {
assertTrue("Table " + TestUtil.hiveSyncConfig.tableName + " should exist after sync completes",
hiveClient.doesTableExist());
assertEquals("Hive Schema should match the dataset schema + partition field",
hiveClient.getTableSchema().size(), SchemaTestUtil.getSimpleSchema().getFields().size() + 1);
hiveClient.getTableSchema().size(),
SchemaTestUtil.getSimpleSchema().getFields().size() + 1);
assertEquals("Table partitions should match the number of partitions we wrote", 5,
hiveClient.scanTablePartitions().size());
assertEquals("The last commit that was sycned should be updated in the TBLPROPERTIES",
@@ -296,7 +298,8 @@ public class HiveSyncToolTest {
TestUtil.getHiveConf(), TestUtil.fileSystem);
assertEquals("Hive Schema should match the evolved dataset schema + partition field",
hiveClient.getTableSchema().size(), SchemaTestUtil.getEvolvedSchema().getFields().size() + 1);
hiveClient.getTableSchema().size(),
SchemaTestUtil.getEvolvedSchema().getFields().size() + 1);
// Sync should add the one partition
assertEquals("The 2 partitions we wrote should be added to hive", 6,
hiveClient.scanTablePartitions().size());
@@ -307,33 +310,37 @@ public class HiveSyncToolTest {
@Test
public void testSyncMergeOnReadRT()
throws IOException, InitializationError, URISyntaxException, TException, InterruptedException {
throws IOException, InitializationError, URISyntaxException, TException, InterruptedException {
String commitTime = "100";
String deltaCommitTime = "101";
String roTablename = TestUtil.hiveSyncConfig.tableName;
TestUtil.hiveSyncConfig.tableName = TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_REALTIME_TABLE;
TestUtil.hiveSyncConfig.tableName =
TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_REALTIME_TABLE;
TestUtil.createMORDataset(commitTime, deltaCommitTime, 5);
HoodieHiveClient hiveClientRT = new HoodieHiveClient(TestUtil.hiveSyncConfig,
TestUtil.getHiveConf(), TestUtil.fileSystem);
TestUtil.getHiveConf(), TestUtil.fileSystem);
assertFalse("Table " + TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_REALTIME_TABLE + " should not exist initially",
hiveClientRT.doesTableExist());
assertFalse("Table " + TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_REALTIME_TABLE
+ " should not exist initially",
hiveClientRT.doesTableExist());
// Lets do the sync
HiveSyncTool tool = new HiveSyncTool(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(),
TestUtil.fileSystem);
TestUtil.fileSystem);
tool.syncHoodieTable();
assertTrue("Table " + TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_REALTIME_TABLE + " should exist after sync completes",
hiveClientRT.doesTableExist());
assertTrue("Table " + TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_REALTIME_TABLE
+ " should exist after sync completes",
hiveClientRT.doesTableExist());
assertEquals("Hive Schema should match the dataset schema + partition field",
hiveClientRT.getTableSchema().size(), SchemaTestUtil.getSimpleSchema().getFields().size() + 1);
hiveClientRT.getTableSchema().size(),
SchemaTestUtil.getSimpleSchema().getFields().size() + 1);
assertEquals("Table partitions should match the number of partitions we wrote", 5,
hiveClientRT.scanTablePartitions().size());
hiveClientRT.scanTablePartitions().size());
assertEquals("The last commit that was sycned should be updated in the TBLPROPERTIES",
deltaCommitTime,
hiveClientRT.getLastCommitTimeSynced().get());
deltaCommitTime,
hiveClientRT.getLastCommitTimeSynced().get());
// Now lets create more parititions and these are the only ones which needs to be synced
DateTime dateTime = DateTime.now().plusDays(6);
@@ -344,20 +351,21 @@ public class HiveSyncToolTest {
TestUtil.addMORPartitions(1, true, false, dateTime, commitTime2, deltaCommitTime2);
// Lets do the sync
tool = new HiveSyncTool(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(),
TestUtil.fileSystem);
TestUtil.fileSystem);
tool.syncHoodieTable();
hiveClientRT = new HoodieHiveClient(TestUtil.hiveSyncConfig,
TestUtil.getHiveConf(), TestUtil.fileSystem);
TestUtil.getHiveConf(), TestUtil.fileSystem);
assertEquals("Hive Schema should match the evolved dataset schema + partition field",
hiveClientRT.getTableSchema().size(), SchemaTestUtil.getEvolvedSchema().getFields().size() + 1);
hiveClientRT.getTableSchema().size(),
SchemaTestUtil.getEvolvedSchema().getFields().size() + 1);
// Sync should add the one partition
assertEquals("The 2 partitions we wrote should be added to hive", 6,
hiveClientRT.scanTablePartitions().size());
hiveClientRT.scanTablePartitions().size());
assertEquals("The last commit that was sycned should be 103",
deltaCommitTime2,
hiveClientRT.getLastCommitTimeSynced().get());
deltaCommitTime2,
hiveClientRT.getLastCommitTimeSynced().get());
TestUtil.hiveSyncConfig.tableName = roTablename;
}
}
}

View File

@@ -16,6 +16,9 @@
package com.uber.hoodie.hive;
import static com.uber.hoodie.common.model.HoodieTestUtils.DEFAULT_TASK_PARTITIONID;
import static org.junit.Assert.fail;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
@@ -41,6 +44,15 @@ import com.uber.hoodie.common.table.log.block.HoodieLogBlock;
import com.uber.hoodie.common.util.FSUtils;
import com.uber.hoodie.common.util.SchemaTestUtil;
import com.uber.hoodie.hive.util.HiveTestService;
import java.io.File;
import java.io.IOException;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.UUID;
import org.apache.avro.Schema;
import org.apache.avro.generic.IndexedRecord;
import org.apache.commons.io.FileUtils;
@@ -60,19 +72,6 @@ import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.junit.runners.model.InitializationError;
import java.io.File;
import java.io.IOException;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.UUID;
import static com.uber.hoodie.common.model.HoodieTestUtils.DEFAULT_TASK_PARTITIONID;
import static org.junit.Assert.fail;
@SuppressWarnings("SameParameterValue")
public class TestUtil {
@@ -161,7 +160,8 @@ public class TestUtil {
boolean result = fileSystem.mkdirs(path);
checkResult(result);
DateTime dateTime = DateTime.now();
HoodieCommitMetadata commitMetadata = createPartitions(numberOfPartitions, true, dateTime, commitTime);
HoodieCommitMetadata commitMetadata = createPartitions(numberOfPartitions, true, dateTime,
commitTime);
createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName);
createCommitFile(commitMetadata, commitTime);
}
@@ -177,16 +177,19 @@ public class TestUtil {
boolean result = fileSystem.mkdirs(path);
checkResult(result);
DateTime dateTime = DateTime.now();
HoodieCommitMetadata commitMetadata = createPartitions(numberOfPartitions, true, dateTime, commitTime);
HoodieCommitMetadata commitMetadata = createPartitions(numberOfPartitions, true, dateTime,
commitTime);
createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName);
createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_REALTIME_TABLE);
createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName
+ HiveSyncTool.SUFFIX_REALTIME_TABLE);
HoodieCompactionMetadata compactionMetadata = new HoodieCompactionMetadata();
commitMetadata.getPartitionToWriteStats()
.forEach((key, value) -> value.stream().map(k -> new CompactionWriteStat(k, key, 0, 0, 0))
.forEach(l -> compactionMetadata.addWriteStat(key, l)));
createCompactionCommitFile(compactionMetadata, commitTime);
// Write a delta commit
HoodieCommitMetadata deltaMetadata = createLogFiles(commitMetadata.getPartitionToWriteStats(), true);
HoodieCommitMetadata deltaMetadata = createLogFiles(commitMetadata.getPartitionToWriteStats(),
true);
createDeltaCommitFile(deltaMetadata, deltaCommitTime);
}
@@ -206,18 +209,20 @@ public class TestUtil {
HoodieCommitMetadata commitMetadata = createPartitions(numberOfPartitions,
isParquetSchemaSimple, startFrom, commitTime);
createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName);
createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_REALTIME_TABLE);
createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName
+ HiveSyncTool.SUFFIX_REALTIME_TABLE);
HoodieCompactionMetadata compactionMetadata = new HoodieCompactionMetadata();
commitMetadata.getPartitionToWriteStats()
.forEach((key, value) -> value.stream().map(k -> new CompactionWriteStat(k, key, 0, 0, 0))
.forEach(l -> compactionMetadata.addWriteStat(key, l)));
createCompactionCommitFile(compactionMetadata, commitTime);
HoodieCommitMetadata deltaMetadata = createLogFiles(commitMetadata.getPartitionToWriteStats(), isLogSchemaSimple);
HoodieCommitMetadata deltaMetadata = createLogFiles(commitMetadata.getPartitionToWriteStats(),
isLogSchemaSimple);
createDeltaCommitFile(deltaMetadata, deltaCommitTime);
}
private static HoodieCommitMetadata createLogFiles(
Map<String, List<HoodieWriteStat>> partitionWriteStats, boolean isLogSchemaSimple)
Map<String, List<HoodieWriteStat>> partitionWriteStats, boolean isLogSchemaSimple)
throws InterruptedException, IOException, URISyntaxException {
HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata();
for (Entry<String, List<HoodieWriteStat>> wEntry : partitionWriteStats.entrySet()) {
@@ -246,7 +251,8 @@ public class TestUtil {
Path partPath = new Path(hiveSyncConfig.basePath + "/" + partitionPath);
fileSystem.makeQualified(partPath);
fileSystem.mkdirs(partPath);
List<HoodieWriteStat> writeStats = createTestData(partPath, isParquetSchemaSimple, commitTime);
List<HoodieWriteStat> writeStats = createTestData(partPath, isParquetSchemaSimple,
commitTime);
startFrom = startFrom.minusDays(1);
writeStats.forEach(s -> commitMetadata.addWriteStat(partitionPath, s));
}

View File

@@ -20,6 +20,13 @@ package com.uber.hoodie.hive.util;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
import com.google.common.io.Files;
import java.io.File;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.net.SocketException;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
@@ -46,277 +53,274 @@ import org.apache.thrift.transport.TTransportFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.net.SocketException;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
public class HiveTestService {
private static final Logger LOG = LoggerFactory.getLogger(HiveTestService.class);
private static final Logger LOG = LoggerFactory.getLogger(HiveTestService.class);
private static final int CONNECTION_TIMEOUT = 30000;
private static final int CONNECTION_TIMEOUT = 30000;
/**
* Configuration settings
*/
private Configuration hadoopConf;
private String workDir;
private String bindIP = "127.0.0.1";
private int metastorePort = 9083;
private int serverPort = 9999;
private boolean clean = true;
/**
* Configuration settings
*/
private Configuration hadoopConf;
private String workDir;
private String bindIP = "127.0.0.1";
private int metastorePort = 9083;
private int serverPort = 9999;
private boolean clean = true;
private Map<String, String> sysProps = Maps.newHashMap();
private ExecutorService executorService;
private TServer tServer;
private HiveServer2 hiveServer;
private Map<String, String> sysProps = Maps.newHashMap();
private ExecutorService executorService;
private TServer tServer;
private HiveServer2 hiveServer;
public HiveTestService(Configuration configuration) {
this.workDir = Files.createTempDir().getAbsolutePath();
public HiveTestService(Configuration configuration) {
this.workDir = Files.createTempDir().getAbsolutePath();
}
public Configuration getHadoopConf() {
return hadoopConf;
}
public HiveServer2 start() throws IOException {
Preconditions
.checkState(workDir != null, "The work dir must be set before starting cluster.");
if (hadoopConf == null) {
hadoopConf = new Configuration();
}
public Configuration getHadoopConf() {
return hadoopConf;
String localHiveLocation = getHiveLocation(workDir);
if (clean) {
LOG.info(
"Cleaning Hive cluster data at: " + localHiveLocation + " and starting fresh.");
File file = new File(localHiveLocation);
FileUtils.deleteDirectory(file);
}
public HiveServer2 start() throws IOException {
Preconditions
.checkState(workDir != null, "The work dir must be set before starting cluster.");
HiveConf serverConf = configureHive(hadoopConf, localHiveLocation);
if (hadoopConf == null) {
hadoopConf = new Configuration();
}
executorService = Executors.newSingleThreadExecutor();
tServer = startMetaStore(bindIP, metastorePort, serverConf);
String localHiveLocation = getHiveLocation(workDir);
if (clean) {
LOG.info(
"Cleaning Hive cluster data at: " + localHiveLocation + " and starting fresh.");
File file = new File(localHiveLocation);
FileUtils.deleteDirectory(file);
}
hiveServer = startHiveServer(serverConf);
HiveConf serverConf = configureHive(hadoopConf, localHiveLocation);
executorService = Executors.newSingleThreadExecutor();
tServer = startMetaStore(bindIP, metastorePort, serverConf);
hiveServer = startHiveServer(serverConf);
String serverHostname;
if (bindIP.equals("0.0.0.0")) {
serverHostname = "localhost";
} else {
serverHostname = bindIP;
}
if (!waitForServerUp(serverConf, serverHostname, metastorePort, CONNECTION_TIMEOUT)) {
throw new IOException("Waiting for startup of standalone server");
}
LOG.info("Hive Minicluster service started.");
return hiveServer;
String serverHostname;
if (bindIP.equals("0.0.0.0")) {
serverHostname = "localhost";
} else {
serverHostname = bindIP;
}
if (!waitForServerUp(serverConf, serverHostname, metastorePort, CONNECTION_TIMEOUT)) {
throw new IOException("Waiting for startup of standalone server");
}
public void stop() throws IOException {
resetSystemProperties();
if (tServer != null) {
tServer.stop();
}
if (hiveServer != null) {
hiveServer.stop();
}
LOG.info("Hive Minicluster service shut down.");
tServer = null;
hiveServer = null;
hadoopConf = null;
LOG.info("Hive Minicluster service started.");
return hiveServer;
}
public void stop() throws IOException {
resetSystemProperties();
if (tServer != null) {
tServer.stop();
}
if (hiveServer != null) {
hiveServer.stop();
}
LOG.info("Hive Minicluster service shut down.");
tServer = null;
hiveServer = null;
hadoopConf = null;
}
private HiveConf configureHive(Configuration conf, String localHiveLocation)
throws IOException {
conf.set("hive.metastore.local", "false");
conf.set(HiveConf.ConfVars.METASTOREURIS.varname,
"thrift://" + bindIP + ":" + metastorePort);
conf.set(HiveConf.ConfVars.HIVE_SERVER2_THRIFT_BIND_HOST.varname, bindIP);
conf.setInt(HiveConf.ConfVars.HIVE_SERVER2_THRIFT_PORT.varname, serverPort);
// The following line to turn of SASL has no effect since HiveAuthFactory calls
// 'new HiveConf()'. This is fixed by https://issues.apache.org/jira/browse/HIVE-6657,
// in Hive 0.14.
// As a workaround, the property is set in hive-site.xml in this module.
//conf.set(HiveConf.ConfVars.HIVE_SERVER2_AUTHENTICATION.varname, "NOSASL");
File localHiveDir = new File(localHiveLocation);
localHiveDir.mkdirs();
File metastoreDbDir = new File(localHiveDir, "metastore_db");
conf.set(HiveConf.ConfVars.METASTORECONNECTURLKEY.varname,
"jdbc:derby:" + metastoreDbDir.getPath() + ";create=true");
File derbyLogFile = new File(localHiveDir, "derby.log");
derbyLogFile.createNewFile();
setSystemProperty("derby.stream.error.file", derbyLogFile.getPath());
conf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname,
Files.createTempDir().getAbsolutePath());
return new HiveConf(conf, this.getClass());
}
private boolean waitForServerUp(HiveConf serverConf, String hostname, int port, int timeout) {
long start = System.currentTimeMillis();
while (true) {
try {
new HiveMetaStoreClient(serverConf);
return true;
} catch (MetaException e) {
// ignore as this is expected
LOG.info("server " + hostname + ":" + port + " not up " + e);
}
if (System.currentTimeMillis() > start + timeout) {
break;
}
try {
Thread.sleep(250);
} catch (InterruptedException e) {
// ignore
}
}
return false;
}
private void setSystemProperty(String name, String value) {
if (!sysProps.containsKey(name)) {
String currentValue = System.getProperty(name);
sysProps.put(name, currentValue);
}
if (value != null) {
System.setProperty(name, value);
} else {
System.getProperties().remove(name);
}
}
private void resetSystemProperties() {
for (Map.Entry<String, String> entry : sysProps.entrySet()) {
if (entry.getValue() != null) {
System.setProperty(entry.getKey(), entry.getValue());
} else {
System.getProperties().remove(entry.getKey());
}
}
sysProps.clear();
}
private static String getHiveLocation(String baseLocation) {
return baseLocation + Path.SEPARATOR + "hive";
}
private HiveServer2 startHiveServer(HiveConf serverConf) {
HiveServer2 hiveServer = new HiveServer2();
hiveServer.init(serverConf);
hiveServer.start();
return hiveServer;
}
// XXX: From org.apache.hadoop.hive.metastore.HiveMetaStore,
// with changes to support binding to a specified IP address (not only 0.0.0.0)
private static final class ChainedTTransportFactory extends TTransportFactory {
private final TTransportFactory parentTransFactory;
private final TTransportFactory childTransFactory;
private ChainedTTransportFactory(TTransportFactory parentTransFactory,
TTransportFactory childTransFactory) {
this.parentTransFactory = parentTransFactory;
this.childTransFactory = childTransFactory;
}
private HiveConf configureHive(Configuration conf, String localHiveLocation)
throws IOException {
conf.set("hive.metastore.local", "false");
conf.set(HiveConf.ConfVars.METASTOREURIS.varname,
"thrift://" + bindIP + ":" + metastorePort);
conf.set(HiveConf.ConfVars.HIVE_SERVER2_THRIFT_BIND_HOST.varname, bindIP);
conf.setInt(HiveConf.ConfVars.HIVE_SERVER2_THRIFT_PORT.varname, serverPort);
// The following line to turn of SASL has no effect since HiveAuthFactory calls
// 'new HiveConf()'. This is fixed by https://issues.apache.org/jira/browse/HIVE-6657,
// in Hive 0.14.
// As a workaround, the property is set in hive-site.xml in this module.
//conf.set(HiveConf.ConfVars.HIVE_SERVER2_AUTHENTICATION.varname, "NOSASL");
File localHiveDir = new File(localHiveLocation);
localHiveDir.mkdirs();
File metastoreDbDir = new File(localHiveDir, "metastore_db");
conf.set(HiveConf.ConfVars.METASTORECONNECTURLKEY.varname,
"jdbc:derby:" + metastoreDbDir.getPath() + ";create=true");
File derbyLogFile = new File(localHiveDir, "derby.log");
derbyLogFile.createNewFile();
setSystemProperty("derby.stream.error.file", derbyLogFile.getPath());
conf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname,
Files.createTempDir().getAbsolutePath());
@Override
public TTransport getTransport(TTransport trans) {
return childTransFactory.getTransport(parentTransFactory.getTransport(trans));
}
}
return new HiveConf(conf, this.getClass());
private static final class TServerSocketKeepAlive extends TServerSocket {
public TServerSocketKeepAlive(int port) throws TTransportException {
super(port, 0);
}
private boolean waitForServerUp(HiveConf serverConf, String hostname, int port, int timeout) {
long start = System.currentTimeMillis();
while (true) {
try {
new HiveMetaStoreClient(serverConf);
return true;
} catch (MetaException e) {
// ignore as this is expected
LOG.info("server " + hostname + ":" + port + " not up " + e);
}
if (System.currentTimeMillis() > start + timeout) {
break;
}
try {
Thread.sleep(250);
} catch (InterruptedException e) {
// ignore
}
}
return false;
public TServerSocketKeepAlive(InetSocketAddress address) throws TTransportException {
super(address, 0);
}
private void setSystemProperty(String name, String value) {
if (!sysProps.containsKey(name)) {
String currentValue = System.getProperty(name);
sysProps.put(name, currentValue);
}
if (value != null) {
System.setProperty(name, value);
} else {
System.getProperties().remove(name);
}
@Override
protected TSocket acceptImpl() throws TTransportException {
TSocket ts = super.acceptImpl();
try {
ts.getSocket().setKeepAlive(true);
} catch (SocketException e) {
throw new TTransportException(e);
}
return ts;
}
}
private void resetSystemProperties() {
for (Map.Entry<String, String> entry : sysProps.entrySet()) {
if (entry.getValue() != null) {
System.setProperty(entry.getKey(), entry.getValue());
} else {
System.getProperties().remove(entry.getKey());
}
}
sysProps.clear();
}
private static String getHiveLocation(String baseLocation) {
return baseLocation + Path.SEPARATOR + "hive";
}
private HiveServer2 startHiveServer(HiveConf serverConf) {
HiveServer2 hiveServer = new HiveServer2();
hiveServer.init(serverConf);
hiveServer.start();
return hiveServer;
}
// XXX: From org.apache.hadoop.hive.metastore.HiveMetaStore,
// with changes to support binding to a specified IP address (not only 0.0.0.0)
private static final class ChainedTTransportFactory extends TTransportFactory {
private final TTransportFactory parentTransFactory;
private final TTransportFactory childTransFactory;
private ChainedTTransportFactory(TTransportFactory parentTransFactory,
TTransportFactory childTransFactory) {
this.parentTransFactory = parentTransFactory;
this.childTransFactory = childTransFactory;
}
@Override public TTransport getTransport(TTransport trans) {
return childTransFactory.getTransport(parentTransFactory.getTransport(trans));
}
}
private static final class TServerSocketKeepAlive extends TServerSocket {
public TServerSocketKeepAlive(int port) throws TTransportException {
super(port, 0);
}
public TServerSocketKeepAlive(InetSocketAddress address) throws TTransportException {
super(address, 0);
}
@Override protected TSocket acceptImpl() throws TTransportException {
TSocket ts = super.acceptImpl();
try {
ts.getSocket().setKeepAlive(true);
} catch (SocketException e) {
throw new TTransportException(e);
}
return ts;
}
}
public TServer startMetaStore(String forceBindIP, int port, HiveConf conf) throws IOException {
try {
// Server will create new threads up to max as necessary. After an idle
// period, it will destory threads to keep the number of threads in the
// pool to min.
int minWorkerThreads = conf.getIntVar(HiveConf.ConfVars.METASTORESERVERMINTHREADS);
int maxWorkerThreads = conf.getIntVar(HiveConf.ConfVars.METASTORESERVERMAXTHREADS);
boolean tcpKeepAlive = conf.getBoolVar(HiveConf.ConfVars.METASTORE_TCP_KEEP_ALIVE);
boolean useFramedTransport =
conf.getBoolVar(HiveConf.ConfVars.METASTORE_USE_THRIFT_FRAMED_TRANSPORT);
// don't support SASL yet
//boolean useSasl = conf.getBoolVar(HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL);
TServerTransport serverTransport;
if (forceBindIP != null) {
InetSocketAddress address = new InetSocketAddress(forceBindIP, port);
serverTransport =
tcpKeepAlive ? new TServerSocketKeepAlive(address) : new TServerSocket(address);
} else {
serverTransport =
tcpKeepAlive ? new TServerSocketKeepAlive(port) : new TServerSocket(port);
}
TProcessor processor;
TTransportFactory transFactory;
IHMSHandler handler = (IHMSHandler) HiveMetaStore
.newRetryingHMSHandler("new db based metaserver", conf, true);
if (conf.getBoolVar(HiveConf.ConfVars.METASTORE_EXECUTE_SET_UGI)) {
transFactory = useFramedTransport ?
new ChainedTTransportFactory(new TFramedTransport.Factory(),
new TUGIContainingTransport.Factory()) :
new TUGIContainingTransport.Factory();
processor = new TUGIBasedProcessor<IHMSHandler>(handler);
LOG.info("Starting DB backed MetaStore Server with SetUGI enabled");
} else {
transFactory =
useFramedTransport ? new TFramedTransport.Factory() : new TTransportFactory();
processor = new TSetIpAddressProcessor<IHMSHandler>(handler);
LOG.info("Starting DB backed MetaStore Server");
}
TThreadPoolServer.Args args =
new TThreadPoolServer.Args(serverTransport).processor(processor)
.transportFactory(transFactory).protocolFactory(new TBinaryProtocol.Factory())
.minWorkerThreads(minWorkerThreads).maxWorkerThreads(maxWorkerThreads);
final TServer tServer = new TThreadPoolServer(args);
executorService.submit(new Runnable() {
@Override public void run() {
tServer.serve();
}
});
return tServer;
} catch (Throwable x) {
throw new IOException(x);
public TServer startMetaStore(String forceBindIP, int port, HiveConf conf) throws IOException {
try {
// Server will create new threads up to max as necessary. After an idle
// period, it will destory threads to keep the number of threads in the
// pool to min.
int minWorkerThreads = conf.getIntVar(HiveConf.ConfVars.METASTORESERVERMINTHREADS);
int maxWorkerThreads = conf.getIntVar(HiveConf.ConfVars.METASTORESERVERMAXTHREADS);
boolean tcpKeepAlive = conf.getBoolVar(HiveConf.ConfVars.METASTORE_TCP_KEEP_ALIVE);
boolean useFramedTransport =
conf.getBoolVar(HiveConf.ConfVars.METASTORE_USE_THRIFT_FRAMED_TRANSPORT);
// don't support SASL yet
//boolean useSasl = conf.getBoolVar(HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL);
TServerTransport serverTransport;
if (forceBindIP != null) {
InetSocketAddress address = new InetSocketAddress(forceBindIP, port);
serverTransport =
tcpKeepAlive ? new TServerSocketKeepAlive(address) : new TServerSocket(address);
} else {
serverTransport =
tcpKeepAlive ? new TServerSocketKeepAlive(port) : new TServerSocket(port);
}
TProcessor processor;
TTransportFactory transFactory;
IHMSHandler handler = (IHMSHandler) HiveMetaStore
.newRetryingHMSHandler("new db based metaserver", conf, true);
if (conf.getBoolVar(HiveConf.ConfVars.METASTORE_EXECUTE_SET_UGI)) {
transFactory = useFramedTransport ?
new ChainedTTransportFactory(new TFramedTransport.Factory(),
new TUGIContainingTransport.Factory()) :
new TUGIContainingTransport.Factory();
processor = new TUGIBasedProcessor<IHMSHandler>(handler);
LOG.info("Starting DB backed MetaStore Server with SetUGI enabled");
} else {
transFactory =
useFramedTransport ? new TFramedTransport.Factory() : new TTransportFactory();
processor = new TSetIpAddressProcessor<IHMSHandler>(handler);
LOG.info("Starting DB backed MetaStore Server");
}
TThreadPoolServer.Args args =
new TThreadPoolServer.Args(serverTransport).processor(processor)
.transportFactory(transFactory).protocolFactory(new TBinaryProtocol.Factory())
.minWorkerThreads(minWorkerThreads).maxWorkerThreads(maxWorkerThreads);
final TServer tServer = new TThreadPoolServer(args);
executorService.submit(new Runnable() {
@Override
public void run() {
tServer.serve();
}
});
return tServer;
} catch (Throwable x) {
throw new IOException(x);
}
}
}

View File

@@ -13,12 +13,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
log4j.rootLogger=WARN, A1
log4j.category.com.uber=INFO
log4j.category.org.apache.parquet.hadoop=WARN
log4j.category.parquet.hadoop=WARN
# A1 is set to be a ConsoleAppender.
log4j.appender.A1=org.apache.log4j.ConsoleAppender
# A1 uses PatternLayout.