Reformatting code per Google Code Style all over
This commit is contained in:
committed by
vinoth chandar
parent
5a62480a92
commit
e45679f5e2
@@ -15,8 +15,8 @@
|
||||
-->
|
||||
|
||||
<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3 http://maven.apache.org/xsd/assembly-1.1.3.xsd">
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3 http://maven.apache.org/xsd/assembly-1.1.3.xsd">
|
||||
<id>jar-with-dependencies</id>
|
||||
<formats>
|
||||
<format>jar</format>
|
||||
|
||||
@@ -19,7 +19,6 @@
|
||||
package com.uber.hoodie.hive;
|
||||
|
||||
import com.beust.jcommander.Parameter;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
@@ -26,6 +26,10 @@ import com.uber.hoodie.hadoop.realtime.HoodieRealtimeInputFormat;
|
||||
import com.uber.hoodie.hive.HoodieHiveClient.PartitionEvent;
|
||||
import com.uber.hoodie.hive.HoodieHiveClient.PartitionEvent.PartitionEventType;
|
||||
import com.uber.hoodie.hive.util.SchemaUtil;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.hive.conf.HiveConf;
|
||||
import org.apache.hadoop.hive.metastore.api.Partition;
|
||||
@@ -35,20 +39,14 @@ import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import parquet.schema.MessageType;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
|
||||
/**
|
||||
* Tool to sync a hoodie HDFS dataset with a hive metastore table.
|
||||
* Either use it as a api HiveSyncTool.syncHoodieTable(HiveSyncConfig)
|
||||
* or as a command line java -cp hoodie-hive.jar HiveSyncTool [args]
|
||||
* Tool to sync a hoodie HDFS dataset with a hive metastore table. Either use it as a api
|
||||
* HiveSyncTool.syncHoodieTable(HiveSyncConfig) or as a command line java -cp hoodie-hive.jar
|
||||
* HiveSyncTool [args]
|
||||
*
|
||||
* This utility will get the schema from the latest commit and will sync hive table schema
|
||||
* Also this will sync the partitions incrementally
|
||||
* (all the partitions modified since the last commit)
|
||||
* This utility will get the schema from the latest commit and will sync hive table schema Also this
|
||||
* will sync the partitions incrementally (all the partitions modified since the last commit)
|
||||
*/
|
||||
@SuppressWarnings("WeakerAccess")
|
||||
public class HiveSyncTool {
|
||||
@@ -64,7 +62,7 @@ public class HiveSyncTool {
|
||||
}
|
||||
|
||||
public void syncHoodieTable() {
|
||||
switch(hoodieHiveClient.getTableType()) {
|
||||
switch (hoodieHiveClient.getTableType()) {
|
||||
case COPY_ON_WRITE:
|
||||
syncHoodieTable(false);
|
||||
break;
|
||||
@@ -125,15 +123,15 @@ public class HiveSyncTool {
|
||||
// Check and sync schema
|
||||
if (!tableExists) {
|
||||
LOG.info("Table " + cfg.tableName + " is not found. Creating it");
|
||||
if(!isRealTime) {
|
||||
if (!isRealTime) {
|
||||
// TODO - RO Table for MOR only after major compaction (UnboundedCompaction is default for now)
|
||||
hoodieHiveClient.createTable(schema, HoodieInputFormat.class.getName(),
|
||||
MapredParquetOutputFormat.class.getName(), ParquetHiveSerDe.class.getName());
|
||||
MapredParquetOutputFormat.class.getName(), ParquetHiveSerDe.class.getName());
|
||||
} else {
|
||||
// Custom serde will not work with ALTER TABLE REPLACE COLUMNS
|
||||
// https://github.com/apache/hive/blob/release-1.1.0/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java#L3488
|
||||
hoodieHiveClient.createTable(schema, HoodieRealtimeInputFormat.class.getName(),
|
||||
MapredParquetOutputFormat.class.getName(), ParquetHiveSerDe.class.getName());
|
||||
// Custom serde will not work with ALTER TABLE REPLACE COLUMNS
|
||||
// https://github.com/apache/hive/blob/release-1.1.0/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java#L3488
|
||||
hoodieHiveClient.createTable(schema, HoodieRealtimeInputFormat.class.getName(),
|
||||
MapredParquetOutputFormat.class.getName(), ParquetHiveSerDe.class.getName());
|
||||
}
|
||||
} else {
|
||||
// Check if the dataset schema has evolved
|
||||
|
||||
@@ -198,8 +198,8 @@ public class HoodieHiveClient {
|
||||
}
|
||||
|
||||
/**
|
||||
* Iterate over the storage partitions and find if there are any new partitions that need
|
||||
* to be added or updated. Generate a list of PartitionEvent based on the changes required.
|
||||
* Iterate over the storage partitions and find if there are any new partitions that need to be
|
||||
* added or updated. Generate a list of PartitionEvent based on the changes required.
|
||||
*/
|
||||
List<PartitionEvent> getPartitionEvents(List<Partition> tablePartitions,
|
||||
List<String> partitionStoragePartitions) {
|
||||
@@ -297,9 +297,9 @@ public class HoodieHiveClient {
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the schema for a hoodie dataset.
|
||||
* Depending on the type of table, read from any file written in the latest commit.
|
||||
* We will assume that the schema has not changed within a single atomic write.
|
||||
* Gets the schema for a hoodie dataset. Depending on the type of table, read from any file
|
||||
* written in the latest commit. We will assume that the schema has not changed within a single
|
||||
* atomic write.
|
||||
*
|
||||
* @return Parquet schema for this dataset
|
||||
*/
|
||||
@@ -313,7 +313,8 @@ public class HoodieHiveClient {
|
||||
.orElseThrow(() -> new InvalidDatasetException(syncConfig.basePath));
|
||||
HoodieCommitMetadata commitMetadata = HoodieCommitMetadata
|
||||
.fromBytes(activeTimeline.getInstantDetails(lastCommit).get());
|
||||
String filePath = commitMetadata.getFileIdAndFullPaths(metaClient.getBasePath()).values().stream().findAny()
|
||||
String filePath = commitMetadata.getFileIdAndFullPaths(metaClient.getBasePath()).values()
|
||||
.stream().findAny()
|
||||
.orElseThrow(() -> new IllegalArgumentException(
|
||||
"Could not find any data file written for commit " + lastCommit
|
||||
+ ", could not get schema for dataset " + metaClient.getBasePath()));
|
||||
@@ -330,7 +331,8 @@ public class HoodieHiveClient {
|
||||
lastDeltaCommitAfterCompaction = metaClient.getActiveTimeline()
|
||||
.getDeltaCommitTimeline()
|
||||
.filterCompletedInstants()
|
||||
.findInstantsAfter(lastCompactionCommit.get().getTimestamp(), Integer.MAX_VALUE).lastInstant();
|
||||
.findInstantsAfter(lastCompactionCommit.get().getTimestamp(), Integer.MAX_VALUE)
|
||||
.lastInstant();
|
||||
}
|
||||
LOG.info("Found the last delta commit after last compaction as "
|
||||
+ lastDeltaCommitAfterCompaction);
|
||||
@@ -340,8 +342,9 @@ public class HoodieHiveClient {
|
||||
// read from the log file wrote
|
||||
commitMetadata = HoodieCommitMetadata
|
||||
.fromBytes(activeTimeline.getInstantDetails(lastDeltaCommit).get());
|
||||
filePath = commitMetadata.getFileIdAndFullPaths(metaClient.getBasePath()).values().stream().filter(s -> s.contains(
|
||||
HoodieLogFile.DELTA_EXTENSION)).findAny()
|
||||
filePath = commitMetadata.getFileIdAndFullPaths(metaClient.getBasePath()).values()
|
||||
.stream().filter(s -> s.contains(
|
||||
HoodieLogFile.DELTA_EXTENSION)).findAny()
|
||||
.orElseThrow(() -> new IllegalArgumentException(
|
||||
"Could not find any data file written for commit " + lastDeltaCommit
|
||||
+ ", could not get schema for dataset " + metaClient.getBasePath()));
|
||||
@@ -361,10 +364,6 @@ public class HoodieHiveClient {
|
||||
|
||||
/**
|
||||
* Read schema from a data file from the last compaction commit done.
|
||||
*
|
||||
* @param lastCompactionCommitOpt
|
||||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
|
||||
private MessageType readSchemaFromLastCompaction(Optional<HoodieInstant> lastCompactionCommitOpt)
|
||||
@@ -377,7 +376,8 @@ public class HoodieHiveClient {
|
||||
// Read from the compacted file wrote
|
||||
HoodieCompactionMetadata compactionMetadata = HoodieCompactionMetadata
|
||||
.fromBytes(activeTimeline.getInstantDetails(lastCompactionCommit).get());
|
||||
String filePath = compactionMetadata.getFileIdAndFullPaths(metaClient.getBasePath()).values().stream().findAny()
|
||||
String filePath = compactionMetadata.getFileIdAndFullPaths(metaClient.getBasePath()).values()
|
||||
.stream().findAny()
|
||||
.orElseThrow(() -> new IllegalArgumentException(
|
||||
"Could not find any data file written for compaction " + lastCompactionCommit
|
||||
+ ", could not get schema for dataset " + metaClient.getBasePath()));
|
||||
@@ -386,11 +386,6 @@ public class HoodieHiveClient {
|
||||
|
||||
/**
|
||||
* Read the schema from the log file on path
|
||||
*
|
||||
* @param lastCompactionCommitOpt
|
||||
* @param path
|
||||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
|
||||
private MessageType readSchemaFromLogFile(Optional<HoodieInstant> lastCompactionCommitOpt,
|
||||
@@ -422,7 +417,8 @@ public class HoodieHiveClient {
|
||||
+ ". File does not exist.");
|
||||
}
|
||||
ParquetMetadata fileFooter =
|
||||
ParquetFileReader.readFooter(fs.getConf(), parquetFilePath, ParquetMetadataConverter.NO_FILTER);
|
||||
ParquetFileReader
|
||||
.readFooter(fs.getConf(), parquetFilePath, ParquetMetadataConverter.NO_FILTER);
|
||||
return fileFooter.getFileMetaData().getSchema();
|
||||
}
|
||||
|
||||
@@ -530,7 +526,7 @@ public class HoodieHiveClient {
|
||||
if (connection != null) {
|
||||
connection.close();
|
||||
}
|
||||
if(client != null) {
|
||||
if (client != null) {
|
||||
client.close();
|
||||
}
|
||||
} catch (SQLException e) {
|
||||
|
||||
@@ -18,23 +18,23 @@ package com.uber.hoodie.hive;
|
||||
|
||||
public class HoodieHiveSyncException extends RuntimeException {
|
||||
|
||||
public HoodieHiveSyncException() {
|
||||
super();
|
||||
}
|
||||
public HoodieHiveSyncException() {
|
||||
super();
|
||||
}
|
||||
|
||||
public HoodieHiveSyncException(String message) {
|
||||
super(message);
|
||||
}
|
||||
public HoodieHiveSyncException(String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
public HoodieHiveSyncException(String message, Throwable t) {
|
||||
super(message, t);
|
||||
}
|
||||
public HoodieHiveSyncException(String message, Throwable t) {
|
||||
super(message, t);
|
||||
}
|
||||
|
||||
public HoodieHiveSyncException(Throwable t) {
|
||||
super(t);
|
||||
}
|
||||
public HoodieHiveSyncException(Throwable t) {
|
||||
super(t);
|
||||
}
|
||||
|
||||
protected static String format(String message, Object... args) {
|
||||
return String.format(String.valueOf(message), (Object[]) args);
|
||||
}
|
||||
protected static String format(String message, Object... args) {
|
||||
return String.format(String.valueOf(message), (Object[]) args);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,11 +21,13 @@ package com.uber.hoodie.hive;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* HDFS Path contain hive partition values for the keys it is partitioned on.
|
||||
* This mapping is not straight forward and requires a pluggable implementation to extract the partition value from HDFS path.
|
||||
* HDFS Path contain hive partition values for the keys it is partitioned on. This mapping is not
|
||||
* straight forward and requires a pluggable implementation to extract the partition value from HDFS
|
||||
* path.
|
||||
*
|
||||
* e.g. Hive table partitioned by datestr=yyyy-mm-dd and hdfs path /app/hoodie/dataset1/YYYY=[yyyy]/MM=[mm]/DD=[dd]
|
||||
*/
|
||||
public interface PartitionValueExtractor {
|
||||
|
||||
List<String> extractPartitionValuesInPath(String partitionPath);
|
||||
}
|
||||
|
||||
@@ -21,88 +21,92 @@ import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import parquet.schema.MessageType;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import parquet.schema.MessageType;
|
||||
|
||||
/**
|
||||
* Represents the schema difference between the storage schema and hive table schema
|
||||
*/
|
||||
public class SchemaDifference {
|
||||
|
||||
private final MessageType storageSchema;
|
||||
private final Map<String, String> tableSchema;
|
||||
private final List<String> deleteColumns;
|
||||
private final Map<String, String> updateColumnTypes;
|
||||
private final Map<String, String> addColumnTypes;
|
||||
|
||||
private SchemaDifference(MessageType storageSchema, Map<String, String> tableSchema,
|
||||
List<String> deleteColumns, Map<String, String> updateColumnTypes,
|
||||
Map<String, String> addColumnTypes) {
|
||||
this.storageSchema = storageSchema;
|
||||
this.tableSchema = tableSchema;
|
||||
this.deleteColumns = ImmutableList.copyOf(deleteColumns);
|
||||
this.updateColumnTypes = ImmutableMap.copyOf(updateColumnTypes);
|
||||
this.addColumnTypes = ImmutableMap.copyOf(addColumnTypes);
|
||||
}
|
||||
|
||||
public List<String> getDeleteColumns() {
|
||||
return deleteColumns;
|
||||
}
|
||||
|
||||
public Map<String, String> getUpdateColumnTypes() {
|
||||
return updateColumnTypes;
|
||||
}
|
||||
|
||||
public Map<String, String> getAddColumnTypes() {
|
||||
return addColumnTypes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return Objects.toStringHelper(this).add("deleteColumns", deleteColumns)
|
||||
.add("updateColumnTypes", updateColumnTypes).add("addColumnTypes", addColumnTypes)
|
||||
.toString();
|
||||
}
|
||||
|
||||
public static Builder newBuilder(MessageType storageSchema, Map<String, String> tableSchema) {
|
||||
return new Builder(storageSchema, tableSchema);
|
||||
}
|
||||
|
||||
public boolean isEmpty() {
|
||||
return deleteColumns.isEmpty() && updateColumnTypes.isEmpty() && addColumnTypes.isEmpty();
|
||||
}
|
||||
|
||||
public static class Builder {
|
||||
|
||||
private final MessageType storageSchema;
|
||||
private final Map<String, String> tableSchema;
|
||||
private final List<String> deleteColumns;
|
||||
private final Map<String, String> updateColumnTypes;
|
||||
private final Map<String, String> addColumnTypes;
|
||||
private List<String> deleteColumns;
|
||||
private Map<String, String> updateColumnTypes;
|
||||
private Map<String, String> addColumnTypes;
|
||||
|
||||
private SchemaDifference(MessageType storageSchema, Map<String, String> tableSchema,
|
||||
List<String> deleteColumns, Map<String, String> updateColumnTypes, Map<String, String> addColumnTypes) {
|
||||
this.storageSchema = storageSchema;
|
||||
this.tableSchema = tableSchema;
|
||||
this.deleteColumns = ImmutableList.copyOf(deleteColumns);
|
||||
this.updateColumnTypes = ImmutableMap.copyOf(updateColumnTypes);
|
||||
this.addColumnTypes = ImmutableMap.copyOf(addColumnTypes);
|
||||
public Builder(MessageType storageSchema, Map<String, String> tableSchema) {
|
||||
this.storageSchema = storageSchema;
|
||||
this.tableSchema = tableSchema;
|
||||
deleteColumns = Lists.newArrayList();
|
||||
updateColumnTypes = Maps.newHashMap();
|
||||
addColumnTypes = Maps.newHashMap();
|
||||
}
|
||||
|
||||
public List<String> getDeleteColumns() {
|
||||
return deleteColumns;
|
||||
public Builder deleteTableColumn(String column) {
|
||||
deleteColumns.add(column);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Map<String, String> getUpdateColumnTypes() {
|
||||
return updateColumnTypes;
|
||||
public Builder updateTableColumn(String column, String storageColumnType) {
|
||||
updateColumnTypes.put(column, storageColumnType);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Map<String, String> getAddColumnTypes() {
|
||||
return addColumnTypes;
|
||||
public Builder addTableColumn(String name, String type) {
|
||||
addColumnTypes.put(name, type);
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override public String toString() {
|
||||
return Objects.toStringHelper(this).add("deleteColumns", deleteColumns)
|
||||
.add("updateColumnTypes", updateColumnTypes).add("addColumnTypes", addColumnTypes)
|
||||
.toString();
|
||||
}
|
||||
|
||||
public static Builder newBuilder(MessageType storageSchema, Map<String, String> tableSchema) {
|
||||
return new Builder(storageSchema, tableSchema);
|
||||
}
|
||||
|
||||
public boolean isEmpty() {
|
||||
return deleteColumns.isEmpty() && updateColumnTypes.isEmpty() && addColumnTypes.isEmpty();
|
||||
}
|
||||
|
||||
public static class Builder {
|
||||
private final MessageType storageSchema;
|
||||
private final Map<String, String> tableSchema;
|
||||
private List<String> deleteColumns;
|
||||
private Map<String, String> updateColumnTypes;
|
||||
private Map<String, String> addColumnTypes;
|
||||
|
||||
public Builder(MessageType storageSchema, Map<String, String> tableSchema) {
|
||||
this.storageSchema = storageSchema;
|
||||
this.tableSchema = tableSchema;
|
||||
deleteColumns = Lists.newArrayList();
|
||||
updateColumnTypes = Maps.newHashMap();
|
||||
addColumnTypes = Maps.newHashMap();
|
||||
}
|
||||
|
||||
public Builder deleteTableColumn(String column) {
|
||||
deleteColumns.add(column);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder updateTableColumn(String column, String storageColumnType) {
|
||||
updateColumnTypes.put(column, storageColumnType);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder addTableColumn(String name, String type) {
|
||||
addColumnTypes.put(name, type);
|
||||
return this;
|
||||
}
|
||||
|
||||
public SchemaDifference build() {
|
||||
return new SchemaDifference(storageSchema, tableSchema, deleteColumns, updateColumnTypes, addColumnTypes);
|
||||
}
|
||||
public SchemaDifference build() {
|
||||
return new SchemaDifference(storageSchema, tableSchema, deleteColumns, updateColumnTypes,
|
||||
addColumnTypes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,9 +23,11 @@ import java.util.List;
|
||||
import org.joda.time.DateTime;
|
||||
import org.joda.time.format.DateTimeFormat;
|
||||
import org.joda.time.format.DateTimeFormatter;
|
||||
|
||||
/**
|
||||
* HDFS Path contain hive partition values for the keys it is partitioned on.
|
||||
* This mapping is not straight forward and requires a pluggable implementation to extract the partition value from HDFS path.
|
||||
* HDFS Path contain hive partition values for the keys it is partitioned on. This mapping is not
|
||||
* straight forward and requires a pluggable implementation to extract the partition value from HDFS
|
||||
* path.
|
||||
*
|
||||
* This implementation extracts datestr=yyyy-mm-dd from path of type /yyyy/mm/dd
|
||||
*/
|
||||
|
||||
@@ -17,32 +17,32 @@
|
||||
package com.uber.hoodie.hive.util;
|
||||
|
||||
import com.google.common.collect.Maps;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
|
||||
public class ColumnNameXLator {
|
||||
private static Map<String, String> xformMap = Maps.newHashMap();
|
||||
|
||||
public static String translateNestedColumn(String colName) {
|
||||
Map.Entry entry;
|
||||
for (Iterator i$ = xformMap.entrySet().iterator(); i$.hasNext();
|
||||
colName = colName.replaceAll((String) entry.getKey(), (String) entry.getValue())) {
|
||||
entry = (Map.Entry) i$.next();
|
||||
}
|
||||
private static Map<String, String> xformMap = Maps.newHashMap();
|
||||
|
||||
return colName;
|
||||
public static String translateNestedColumn(String colName) {
|
||||
Map.Entry entry;
|
||||
for (Iterator i$ = xformMap.entrySet().iterator(); i$.hasNext();
|
||||
colName = colName.replaceAll((String) entry.getKey(), (String) entry.getValue())) {
|
||||
entry = (Map.Entry) i$.next();
|
||||
}
|
||||
|
||||
public static String translateColumn(String colName) {
|
||||
return colName;
|
||||
}
|
||||
return colName;
|
||||
}
|
||||
|
||||
public static String translate(String colName, boolean nestedColumn) {
|
||||
return !nestedColumn ? translateColumn(colName) : translateNestedColumn(colName);
|
||||
}
|
||||
public static String translateColumn(String colName) {
|
||||
return colName;
|
||||
}
|
||||
|
||||
static {
|
||||
xformMap.put("\\$", "_dollar_");
|
||||
}
|
||||
public static String translate(String colName, boolean nestedColumn) {
|
||||
return !nestedColumn ? translateColumn(colName) : translateNestedColumn(colName);
|
||||
}
|
||||
|
||||
static {
|
||||
xformMap.put("\\$", "_dollar_");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,6 +21,10 @@ import com.google.common.collect.Sets;
|
||||
import com.uber.hoodie.hive.HiveSyncConfig;
|
||||
import com.uber.hoodie.hive.HoodieHiveSyncException;
|
||||
import com.uber.hoodie.hive.SchemaDifference;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import parquet.schema.DecimalMetadata;
|
||||
@@ -30,404 +34,386 @@ import parquet.schema.OriginalType;
|
||||
import parquet.schema.PrimitiveType;
|
||||
import parquet.schema.Type;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Schema Utilities
|
||||
*/
|
||||
public class SchemaUtil {
|
||||
private static Logger LOG = LoggerFactory.getLogger(SchemaUtil.class);
|
||||
|
||||
/**
|
||||
* Get the schema difference between the storage schema and hive table schema
|
||||
*
|
||||
* @param storageSchema
|
||||
* @param tableSchema
|
||||
* @param partitionKeys
|
||||
* @return
|
||||
*/
|
||||
public static SchemaDifference getSchemaDifference(MessageType storageSchema,
|
||||
Map<String, String> tableSchema, List<String> partitionKeys) {
|
||||
Map<String, String> newTableSchema;
|
||||
try {
|
||||
newTableSchema = convertParquetSchemaToHiveSchema(storageSchema);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieHiveSyncException("Failed to convert parquet schema to hive schema",
|
||||
e);
|
||||
private static Logger LOG = LoggerFactory.getLogger(SchemaUtil.class);
|
||||
|
||||
/**
|
||||
* Get the schema difference between the storage schema and hive table schema
|
||||
*/
|
||||
public static SchemaDifference getSchemaDifference(MessageType storageSchema,
|
||||
Map<String, String> tableSchema, List<String> partitionKeys) {
|
||||
Map<String, String> newTableSchema;
|
||||
try {
|
||||
newTableSchema = convertParquetSchemaToHiveSchema(storageSchema);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieHiveSyncException("Failed to convert parquet schema to hive schema",
|
||||
e);
|
||||
}
|
||||
LOG.info("Getting schema difference for " + tableSchema + "\r\n\r\n" + newTableSchema);
|
||||
SchemaDifference.Builder schemaDiffBuilder =
|
||||
SchemaDifference.newBuilder(storageSchema, tableSchema);
|
||||
Set<String> tableColumns = Sets.newHashSet();
|
||||
|
||||
for (Map.Entry<String, String> field : tableSchema.entrySet()) {
|
||||
String fieldName = field.getKey().toLowerCase();
|
||||
String tickSurroundedFieldName = tickSurround(fieldName);
|
||||
if (!isFieldExistsInSchema(newTableSchema, tickSurroundedFieldName) && !partitionKeys
|
||||
.contains(fieldName)) {
|
||||
schemaDiffBuilder.deleteTableColumn(fieldName);
|
||||
} else {
|
||||
// check type
|
||||
String tableColumnType = field.getValue();
|
||||
if (!isFieldExistsInSchema(newTableSchema, tickSurroundedFieldName)) {
|
||||
if (partitionKeys.contains(fieldName)) {
|
||||
// Partition key does not have to be part of the storage schema
|
||||
continue;
|
||||
}
|
||||
// We will log this and continue. Hive schema is a superset of all parquet schemas
|
||||
LOG.warn("Ignoring table column " + fieldName
|
||||
+ " as its not present in the parquet schema");
|
||||
continue;
|
||||
}
|
||||
LOG.info("Getting schema difference for " + tableSchema + "\r\n\r\n" + newTableSchema);
|
||||
SchemaDifference.Builder schemaDiffBuilder =
|
||||
SchemaDifference.newBuilder(storageSchema, tableSchema);
|
||||
Set<String> tableColumns = Sets.newHashSet();
|
||||
tableColumnType = tableColumnType.replaceAll("\\s+", "");
|
||||
|
||||
for (Map.Entry<String, String> field : tableSchema.entrySet()) {
|
||||
String fieldName = field.getKey().toLowerCase();
|
||||
String tickSurroundedFieldName = tickSurround(fieldName);
|
||||
if (!isFieldExistsInSchema(newTableSchema, tickSurroundedFieldName) && !partitionKeys.contains(fieldName)) {
|
||||
schemaDiffBuilder.deleteTableColumn(fieldName);
|
||||
} else {
|
||||
// check type
|
||||
String tableColumnType = field.getValue();
|
||||
if (!isFieldExistsInSchema(newTableSchema, tickSurroundedFieldName)) {
|
||||
if (partitionKeys.contains(fieldName)) {
|
||||
// Partition key does not have to be part of the storage schema
|
||||
continue;
|
||||
}
|
||||
// We will log this and continue. Hive schema is a superset of all parquet schemas
|
||||
LOG.warn("Ignoring table column " + fieldName
|
||||
+ " as its not present in the parquet schema");
|
||||
continue;
|
||||
}
|
||||
tableColumnType = tableColumnType.replaceAll("\\s+", "");
|
||||
String expectedType = getExpectedType(newTableSchema, tickSurroundedFieldName);
|
||||
expectedType = expectedType.replaceAll("\\s+", "");
|
||||
expectedType = expectedType.replaceAll("`", "");
|
||||
|
||||
String expectedType = getExpectedType(newTableSchema, tickSurroundedFieldName);
|
||||
expectedType = expectedType.replaceAll("\\s+", "");
|
||||
expectedType = expectedType.replaceAll("`", "");
|
||||
|
||||
if (!tableColumnType.equalsIgnoreCase(expectedType)) {
|
||||
// check for incremental datasets, the schema type change is allowed as per evolution rules
|
||||
if (!isSchemaTypeUpdateAllowed(tableColumnType, expectedType)) {
|
||||
throw new HoodieHiveSyncException(
|
||||
"Could not convert field Type from " + tableColumnType + " to "
|
||||
+ expectedType + " for field " + fieldName);
|
||||
}
|
||||
schemaDiffBuilder.updateTableColumn(fieldName,
|
||||
getExpectedType(newTableSchema, tickSurroundedFieldName));
|
||||
}
|
||||
}
|
||||
tableColumns.add(tickSurroundedFieldName);
|
||||
if (!tableColumnType.equalsIgnoreCase(expectedType)) {
|
||||
// check for incremental datasets, the schema type change is allowed as per evolution rules
|
||||
if (!isSchemaTypeUpdateAllowed(tableColumnType, expectedType)) {
|
||||
throw new HoodieHiveSyncException(
|
||||
"Could not convert field Type from " + tableColumnType + " to "
|
||||
+ expectedType + " for field " + fieldName);
|
||||
}
|
||||
schemaDiffBuilder.updateTableColumn(fieldName,
|
||||
getExpectedType(newTableSchema, tickSurroundedFieldName));
|
||||
}
|
||||
|
||||
for (Map.Entry<String, String> entry : newTableSchema.entrySet()) {
|
||||
if (!tableColumns.contains(entry.getKey().toLowerCase())) {
|
||||
schemaDiffBuilder.addTableColumn(entry.getKey(), entry.getValue());
|
||||
}
|
||||
}
|
||||
LOG.info("Difference between schemas: " + schemaDiffBuilder.build().toString());
|
||||
|
||||
return schemaDiffBuilder.build();
|
||||
}
|
||||
tableColumns.add(tickSurroundedFieldName);
|
||||
}
|
||||
|
||||
private static String getExpectedType(Map<String, String> newTableSchema, String fieldName) {
|
||||
for (Map.Entry<String, String> entry : newTableSchema.entrySet()) {
|
||||
if (entry.getKey().toLowerCase().equals(fieldName)) {
|
||||
return entry.getValue();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
for (Map.Entry<String, String> entry : newTableSchema.entrySet()) {
|
||||
if (!tableColumns.contains(entry.getKey().toLowerCase())) {
|
||||
schemaDiffBuilder.addTableColumn(entry.getKey(), entry.getValue());
|
||||
}
|
||||
}
|
||||
LOG.info("Difference between schemas: " + schemaDiffBuilder.build().toString());
|
||||
|
||||
private static boolean isFieldExistsInSchema(Map<String, String> newTableSchema,
|
||||
String fieldName) {
|
||||
for (String entry : newTableSchema.keySet()) {
|
||||
if (entry.toLowerCase().equals(fieldName)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
return schemaDiffBuilder.build();
|
||||
}
|
||||
|
||||
private static String getExpectedType(Map<String, String> newTableSchema, String fieldName) {
|
||||
for (Map.Entry<String, String> entry : newTableSchema.entrySet()) {
|
||||
if (entry.getKey().toLowerCase().equals(fieldName)) {
|
||||
return entry.getValue();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private static boolean isFieldExistsInSchema(Map<String, String> newTableSchema,
|
||||
String fieldName) {
|
||||
for (String entry : newTableSchema.keySet()) {
|
||||
if (entry.toLowerCase().equals(fieldName)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns equivalent Hive table schema read from a parquet file
|
||||
*
|
||||
* @param messageType : Parquet Schema
|
||||
* @return : Hive Table schema read from parquet file MAP[String,String]
|
||||
* @throws IOException
|
||||
*/
|
||||
public static Map<String, String> convertParquetSchemaToHiveSchema(MessageType messageType)
|
||||
throws IOException {
|
||||
Map<String, String> schema = Maps.newLinkedHashMap();
|
||||
List<Type> parquetFields = messageType.getFields();
|
||||
for (Type parquetType : parquetFields) {
|
||||
StringBuilder result = new StringBuilder();
|
||||
String key = parquetType.getName();
|
||||
if (parquetType.isRepetition(Type.Repetition.REPEATED)) {
|
||||
result.append(createHiveArray(parquetType, ""));
|
||||
} else {
|
||||
result.append(convertField(parquetType));
|
||||
/**
|
||||
* Returns equivalent Hive table schema read from a parquet file
|
||||
*
|
||||
* @param messageType : Parquet Schema
|
||||
* @return : Hive Table schema read from parquet file MAP[String,String]
|
||||
*/
|
||||
public static Map<String, String> convertParquetSchemaToHiveSchema(MessageType messageType)
|
||||
throws IOException {
|
||||
Map<String, String> schema = Maps.newLinkedHashMap();
|
||||
List<Type> parquetFields = messageType.getFields();
|
||||
for (Type parquetType : parquetFields) {
|
||||
StringBuilder result = new StringBuilder();
|
||||
String key = parquetType.getName();
|
||||
if (parquetType.isRepetition(Type.Repetition.REPEATED)) {
|
||||
result.append(createHiveArray(parquetType, ""));
|
||||
} else {
|
||||
result.append(convertField(parquetType));
|
||||
}
|
||||
|
||||
schema.put(hiveCompatibleFieldName(key, false), result.toString());
|
||||
}
|
||||
return schema;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert one field data type of parquet schema into an equivalent Hive schema
|
||||
*
|
||||
* @param parquetType : Single paruet field
|
||||
* @return : Equivalent sHive schema
|
||||
*/
|
||||
private static String convertField(final Type parquetType) {
|
||||
StringBuilder field = new StringBuilder();
|
||||
if (parquetType.isPrimitive()) {
|
||||
final PrimitiveType.PrimitiveTypeName parquetPrimitiveTypeName =
|
||||
parquetType.asPrimitiveType().getPrimitiveTypeName();
|
||||
final OriginalType originalType = parquetType.getOriginalType();
|
||||
if (originalType == OriginalType.DECIMAL) {
|
||||
final DecimalMetadata decimalMetadata =
|
||||
parquetType.asPrimitiveType().getDecimalMetadata();
|
||||
return field.append("DECIMAL(").append(decimalMetadata.getPrecision()).
|
||||
append(" , ").append(decimalMetadata.getScale()).append(")").toString();
|
||||
}
|
||||
// TODO - fix the method naming here
|
||||
return parquetPrimitiveTypeName
|
||||
.convert(new PrimitiveType.PrimitiveTypeNameConverter<String, RuntimeException>() {
|
||||
@Override
|
||||
public String convertBOOLEAN(
|
||||
PrimitiveType.PrimitiveTypeName primitiveTypeName) {
|
||||
return "boolean";
|
||||
}
|
||||
|
||||
schema.put(hiveCompatibleFieldName(key, false), result.toString());
|
||||
}
|
||||
return schema;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert one field data type of parquet schema into an equivalent Hive
|
||||
* schema
|
||||
*
|
||||
* @param parquetType : Single paruet field
|
||||
* @return : Equivalent sHive schema
|
||||
*/
|
||||
private static String convertField(final Type parquetType) {
|
||||
StringBuilder field = new StringBuilder();
|
||||
if (parquetType.isPrimitive()) {
|
||||
final PrimitiveType.PrimitiveTypeName parquetPrimitiveTypeName =
|
||||
parquetType.asPrimitiveType().getPrimitiveTypeName();
|
||||
final OriginalType originalType = parquetType.getOriginalType();
|
||||
if (originalType == OriginalType.DECIMAL) {
|
||||
final DecimalMetadata decimalMetadata =
|
||||
parquetType.asPrimitiveType().getDecimalMetadata();
|
||||
return field.append("DECIMAL(").append(decimalMetadata.getPrecision()).
|
||||
append(" , ").append(decimalMetadata.getScale()).append(")").toString();
|
||||
@Override
|
||||
public String convertINT32(PrimitiveType.PrimitiveTypeName primitiveTypeName) {
|
||||
return "int";
|
||||
}
|
||||
// TODO - fix the method naming here
|
||||
return parquetPrimitiveTypeName
|
||||
.convert(new PrimitiveType.PrimitiveTypeNameConverter<String, RuntimeException>() {
|
||||
@Override
|
||||
public String convertBOOLEAN(
|
||||
PrimitiveType.PrimitiveTypeName primitiveTypeName) {
|
||||
return "boolean";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String convertINT32(PrimitiveType.PrimitiveTypeName primitiveTypeName) {
|
||||
return "int";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String convertINT64(PrimitiveType.PrimitiveTypeName primitiveTypeName) {
|
||||
return "bigint";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String convertINT96(PrimitiveType.PrimitiveTypeName primitiveTypeName) {
|
||||
return "timestamp-millis";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String convertFLOAT(PrimitiveType.PrimitiveTypeName primitiveTypeName) {
|
||||
return "float";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String convertDOUBLE(PrimitiveType.PrimitiveTypeName primitiveTypeName) {
|
||||
return "double";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String convertFIXED_LEN_BYTE_ARRAY(
|
||||
PrimitiveType.PrimitiveTypeName primitiveTypeName) {
|
||||
return "binary";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String convertBINARY(PrimitiveType.PrimitiveTypeName primitiveTypeName) {
|
||||
if (originalType == OriginalType.UTF8
|
||||
|| originalType == OriginalType.ENUM) {
|
||||
return "string";
|
||||
} else {
|
||||
return "binary";
|
||||
}
|
||||
}
|
||||
});
|
||||
} else {
|
||||
GroupType parquetGroupType = parquetType.asGroupType();
|
||||
OriginalType originalType = parquetGroupType.getOriginalType();
|
||||
if (originalType != null) {
|
||||
switch (originalType) {
|
||||
case LIST:
|
||||
if (parquetGroupType.getFieldCount() != 1) {
|
||||
throw new UnsupportedOperationException(
|
||||
"Invalid list type " + parquetGroupType);
|
||||
}
|
||||
Type elementType = parquetGroupType.getType(0);
|
||||
if (!elementType.isRepetition(Type.Repetition.REPEATED)) {
|
||||
throw new UnsupportedOperationException(
|
||||
"Invalid list type " + parquetGroupType);
|
||||
}
|
||||
return createHiveArray(elementType, parquetGroupType.getName());
|
||||
case MAP:
|
||||
if (parquetGroupType.getFieldCount() != 1 || parquetGroupType.getType(0)
|
||||
.isPrimitive()) {
|
||||
throw new UnsupportedOperationException(
|
||||
"Invalid map type " + parquetGroupType);
|
||||
}
|
||||
GroupType mapKeyValType = parquetGroupType.getType(0).asGroupType();
|
||||
if (!mapKeyValType.isRepetition(Type.Repetition.REPEATED) ||
|
||||
!mapKeyValType.getOriginalType().equals(OriginalType.MAP_KEY_VALUE) ||
|
||||
mapKeyValType.getFieldCount() != 2) {
|
||||
throw new UnsupportedOperationException(
|
||||
"Invalid map type " + parquetGroupType);
|
||||
}
|
||||
Type keyType = mapKeyValType.getType(0);
|
||||
if (!keyType.isPrimitive() ||
|
||||
!keyType.asPrimitiveType().getPrimitiveTypeName()
|
||||
.equals(PrimitiveType.PrimitiveTypeName.BINARY) ||
|
||||
!keyType.getOriginalType().equals(OriginalType.UTF8)) {
|
||||
throw new UnsupportedOperationException(
|
||||
"Map key type must be binary (UTF8): " + keyType);
|
||||
}
|
||||
Type valueType = mapKeyValType.getType(1);
|
||||
return createHiveMap(convertField(keyType), convertField(valueType));
|
||||
case ENUM:
|
||||
case UTF8:
|
||||
return "string";
|
||||
case MAP_KEY_VALUE:
|
||||
// MAP_KEY_VALUE was supposed to be used to annotate key and
|
||||
// value group levels in a
|
||||
// MAP. However, that is always implied by the structure of
|
||||
// MAP. Hence, PARQUET-113
|
||||
// dropped the requirement for having MAP_KEY_VALUE.
|
||||
default:
|
||||
throw new UnsupportedOperationException(
|
||||
"Cannot convert Parquet type " + parquetType);
|
||||
}
|
||||
} else {
|
||||
// if no original type then it's a record
|
||||
return createHiveStruct(parquetGroupType.getFields());
|
||||
@Override
|
||||
public String convertINT64(PrimitiveType.PrimitiveTypeName primitiveTypeName) {
|
||||
return "bigint";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a 'struct' Hive schema from a list of Parquet fields
|
||||
*
|
||||
* @param parquetFields : list of parquet fields
|
||||
* @return : Equivalent 'struct' Hive schema
|
||||
*/
|
||||
private static String createHiveStruct(List<Type> parquetFields) {
|
||||
StringBuilder struct = new StringBuilder();
|
||||
struct.append("STRUCT< ");
|
||||
for (Type field : parquetFields) {
|
||||
//TODO: struct field name is only translated to support special char($)
|
||||
//We will need to extend it to other collection type
|
||||
struct.append(hiveCompatibleFieldName(field.getName(), true)).append(" : ");
|
||||
struct.append(convertField(field)).append(", ");
|
||||
}
|
||||
struct.delete(struct.length() - 2, struct.length()); // Remove the last
|
||||
// ", "
|
||||
struct.append(">");
|
||||
String finalStr = struct.toString();
|
||||
// Struct cannot have - in them. userstore_udr_entities has uuid in struct. This breaks the schema.
|
||||
// HDrone sync should not fail because of this.
|
||||
finalStr = finalStr.replaceAll("-", "_");
|
||||
return finalStr;
|
||||
}
|
||||
|
||||
|
||||
private static String hiveCompatibleFieldName(String fieldName, boolean isNested) {
|
||||
String result = fieldName;
|
||||
if (isNested) {
|
||||
result = ColumnNameXLator.translateNestedColumn(fieldName);
|
||||
}
|
||||
return tickSurround(result);
|
||||
}
|
||||
|
||||
private static String tickSurround(String result) {
|
||||
if (!result.startsWith("`")) {
|
||||
result = "`" + result;
|
||||
}
|
||||
if (!result.endsWith("`")) {
|
||||
result = result + "`";
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a 'Map' schema from Parquet map field
|
||||
*
|
||||
* @param keyType
|
||||
* @param valueType
|
||||
* @return
|
||||
*/
|
||||
private static String createHiveMap(String keyType, String valueType) {
|
||||
return "MAP< " + keyType + ", " + valueType + ">";
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an Array Hive schema from equivalent parquet list type
|
||||
*
|
||||
* @param elementType
|
||||
* @param elementName
|
||||
* @return
|
||||
*/
|
||||
private static String createHiveArray(Type elementType, String elementName) {
|
||||
StringBuilder array = new StringBuilder();
|
||||
array.append("ARRAY< ");
|
||||
if (elementType.isPrimitive()) {
|
||||
array.append(convertField(elementType));
|
||||
} else {
|
||||
final GroupType groupType = elementType.asGroupType();
|
||||
final List<Type> groupFields = groupType.getFields();
|
||||
if (groupFields.size() > 1 || (groupFields.size() == 1 && (
|
||||
elementType.getName().equals("array") || elementType.getName()
|
||||
.equals(elementName + "_tuple")))) {
|
||||
array.append(convertField(elementType));
|
||||
} else {
|
||||
array.append(convertField(groupType.getFields().get(0)));
|
||||
@Override
|
||||
public String convertINT96(PrimitiveType.PrimitiveTypeName primitiveTypeName) {
|
||||
return "timestamp-millis";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String convertFLOAT(PrimitiveType.PrimitiveTypeName primitiveTypeName) {
|
||||
return "float";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String convertDOUBLE(PrimitiveType.PrimitiveTypeName primitiveTypeName) {
|
||||
return "double";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String convertFIXED_LEN_BYTE_ARRAY(
|
||||
PrimitiveType.PrimitiveTypeName primitiveTypeName) {
|
||||
return "binary";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String convertBINARY(PrimitiveType.PrimitiveTypeName primitiveTypeName) {
|
||||
if (originalType == OriginalType.UTF8
|
||||
|| originalType == OriginalType.ENUM) {
|
||||
return "string";
|
||||
} else {
|
||||
return "binary";
|
||||
}
|
||||
}
|
||||
});
|
||||
} else {
|
||||
GroupType parquetGroupType = parquetType.asGroupType();
|
||||
OriginalType originalType = parquetGroupType.getOriginalType();
|
||||
if (originalType != null) {
|
||||
switch (originalType) {
|
||||
case LIST:
|
||||
if (parquetGroupType.getFieldCount() != 1) {
|
||||
throw new UnsupportedOperationException(
|
||||
"Invalid list type " + parquetGroupType);
|
||||
}
|
||||
Type elementType = parquetGroupType.getType(0);
|
||||
if (!elementType.isRepetition(Type.Repetition.REPEATED)) {
|
||||
throw new UnsupportedOperationException(
|
||||
"Invalid list type " + parquetGroupType);
|
||||
}
|
||||
return createHiveArray(elementType, parquetGroupType.getName());
|
||||
case MAP:
|
||||
if (parquetGroupType.getFieldCount() != 1 || parquetGroupType.getType(0)
|
||||
.isPrimitive()) {
|
||||
throw new UnsupportedOperationException(
|
||||
"Invalid map type " + parquetGroupType);
|
||||
}
|
||||
GroupType mapKeyValType = parquetGroupType.getType(0).asGroupType();
|
||||
if (!mapKeyValType.isRepetition(Type.Repetition.REPEATED) ||
|
||||
!mapKeyValType.getOriginalType().equals(OriginalType.MAP_KEY_VALUE) ||
|
||||
mapKeyValType.getFieldCount() != 2) {
|
||||
throw new UnsupportedOperationException(
|
||||
"Invalid map type " + parquetGroupType);
|
||||
}
|
||||
Type keyType = mapKeyValType.getType(0);
|
||||
if (!keyType.isPrimitive() ||
|
||||
!keyType.asPrimitiveType().getPrimitiveTypeName()
|
||||
.equals(PrimitiveType.PrimitiveTypeName.BINARY) ||
|
||||
!keyType.getOriginalType().equals(OriginalType.UTF8)) {
|
||||
throw new UnsupportedOperationException(
|
||||
"Map key type must be binary (UTF8): " + keyType);
|
||||
}
|
||||
Type valueType = mapKeyValType.getType(1);
|
||||
return createHiveMap(convertField(keyType), convertField(valueType));
|
||||
case ENUM:
|
||||
case UTF8:
|
||||
return "string";
|
||||
case MAP_KEY_VALUE:
|
||||
// MAP_KEY_VALUE was supposed to be used to annotate key and
|
||||
// value group levels in a
|
||||
// MAP. However, that is always implied by the structure of
|
||||
// MAP. Hence, PARQUET-113
|
||||
// dropped the requirement for having MAP_KEY_VALUE.
|
||||
default:
|
||||
throw new UnsupportedOperationException(
|
||||
"Cannot convert Parquet type " + parquetType);
|
||||
}
|
||||
array.append(">");
|
||||
return array.toString();
|
||||
} else {
|
||||
// if no original type then it's a record
|
||||
return createHiveStruct(parquetGroupType.getFields());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a 'struct' Hive schema from a list of Parquet fields
|
||||
*
|
||||
* @param parquetFields : list of parquet fields
|
||||
* @return : Equivalent 'struct' Hive schema
|
||||
*/
|
||||
private static String createHiveStruct(List<Type> parquetFields) {
|
||||
StringBuilder struct = new StringBuilder();
|
||||
struct.append("STRUCT< ");
|
||||
for (Type field : parquetFields) {
|
||||
//TODO: struct field name is only translated to support special char($)
|
||||
//We will need to extend it to other collection type
|
||||
struct.append(hiveCompatibleFieldName(field.getName(), true)).append(" : ");
|
||||
struct.append(convertField(field)).append(", ");
|
||||
}
|
||||
struct.delete(struct.length() - 2, struct.length()); // Remove the last
|
||||
// ", "
|
||||
struct.append(">");
|
||||
String finalStr = struct.toString();
|
||||
// Struct cannot have - in them. userstore_udr_entities has uuid in struct. This breaks the schema.
|
||||
// HDrone sync should not fail because of this.
|
||||
finalStr = finalStr.replaceAll("-", "_");
|
||||
return finalStr;
|
||||
}
|
||||
|
||||
|
||||
private static String hiveCompatibleFieldName(String fieldName, boolean isNested) {
|
||||
String result = fieldName;
|
||||
if (isNested) {
|
||||
result = ColumnNameXLator.translateNestedColumn(fieldName);
|
||||
}
|
||||
return tickSurround(result);
|
||||
}
|
||||
|
||||
private static String tickSurround(String result) {
|
||||
if (!result.startsWith("`")) {
|
||||
result = "`" + result;
|
||||
}
|
||||
if (!result.endsWith("`")) {
|
||||
result = result + "`";
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a 'Map' schema from Parquet map field
|
||||
*/
|
||||
private static String createHiveMap(String keyType, String valueType) {
|
||||
return "MAP< " + keyType + ", " + valueType + ">";
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an Array Hive schema from equivalent parquet list type
|
||||
*/
|
||||
private static String createHiveArray(Type elementType, String elementName) {
|
||||
StringBuilder array = new StringBuilder();
|
||||
array.append("ARRAY< ");
|
||||
if (elementType.isPrimitive()) {
|
||||
array.append(convertField(elementType));
|
||||
} else {
|
||||
final GroupType groupType = elementType.asGroupType();
|
||||
final List<Type> groupFields = groupType.getFields();
|
||||
if (groupFields.size() > 1 || (groupFields.size() == 1 && (
|
||||
elementType.getName().equals("array") || elementType.getName()
|
||||
.equals(elementName + "_tuple")))) {
|
||||
array.append(convertField(elementType));
|
||||
} else {
|
||||
array.append(convertField(groupType.getFields().get(0)));
|
||||
}
|
||||
}
|
||||
array.append(">");
|
||||
return array.toString();
|
||||
}
|
||||
|
||||
public static boolean isSchemaTypeUpdateAllowed(String prevType, String newType) {
|
||||
if (prevType == null || prevType.trim().isEmpty() ||
|
||||
newType == null || newType.trim().isEmpty()) {
|
||||
return false;
|
||||
}
|
||||
prevType = prevType.toLowerCase();
|
||||
newType = newType.toLowerCase();
|
||||
if (prevType.equals(newType)) {
|
||||
return true;
|
||||
} else if (prevType.equalsIgnoreCase("int") && newType.equalsIgnoreCase("bigint")) {
|
||||
return true;
|
||||
} else if (prevType.equalsIgnoreCase("float") && newType.equalsIgnoreCase("double")) {
|
||||
return true;
|
||||
} else if (prevType.contains("struct") && newType.toLowerCase().contains("struct")) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public static String generateSchemaString(MessageType storageSchema) throws IOException {
|
||||
Map<String, String> hiveSchema = convertParquetSchemaToHiveSchema(storageSchema);
|
||||
StringBuilder columns = new StringBuilder();
|
||||
for (Map.Entry<String, String> hiveSchemaEntry : hiveSchema.entrySet()) {
|
||||
columns.append(hiveSchemaEntry.getKey()).append(" ");
|
||||
columns.append(hiveSchemaEntry.getValue()).append(", ");
|
||||
}
|
||||
// Remove the last ", "
|
||||
columns.delete(columns.length() - 2, columns.length());
|
||||
return columns.toString();
|
||||
}
|
||||
|
||||
public static String generateCreateDDL(MessageType storageSchema,
|
||||
HiveSyncConfig config, String inputFormatClass,
|
||||
String outputFormatClass, String serdeClass) throws IOException {
|
||||
Map<String, String> hiveSchema = convertParquetSchemaToHiveSchema(storageSchema);
|
||||
String columns = generateSchemaString(storageSchema);
|
||||
|
||||
StringBuilder partitionFields = new StringBuilder();
|
||||
for (String partitionKey : config.partitionFields) {
|
||||
partitionFields.append(partitionKey).append(" ")
|
||||
.append(getPartitionKeyType(hiveSchema, partitionKey));
|
||||
}
|
||||
|
||||
public static boolean isSchemaTypeUpdateAllowed(String prevType, String newType) {
|
||||
if (prevType == null || prevType.trim().isEmpty() ||
|
||||
newType == null || newType.trim().isEmpty()) {
|
||||
return false;
|
||||
}
|
||||
prevType = prevType.toLowerCase();
|
||||
newType = newType.toLowerCase();
|
||||
if (prevType.equals(newType)) {
|
||||
return true;
|
||||
} else if (prevType.equalsIgnoreCase("int") && newType.equalsIgnoreCase("bigint")) {
|
||||
return true;
|
||||
} else if (prevType.equalsIgnoreCase("float") && newType.equalsIgnoreCase("double")) {
|
||||
return true;
|
||||
} else if (prevType.contains("struct") && newType.toLowerCase().contains("struct")) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
StringBuilder sb = new StringBuilder("CREATE EXTERNAL TABLE IF NOT EXISTS ");
|
||||
sb = sb.append(config.databaseName).append(".").append(config.tableName);
|
||||
sb = sb.append("( ").append(columns).append(")");
|
||||
if (!config.partitionFields.isEmpty()) {
|
||||
sb = sb.append(" PARTITIONED BY (").append(partitionFields).append(")");
|
||||
}
|
||||
sb = sb.append(" ROW FORMAT SERDE '").append(serdeClass).append("'");
|
||||
sb = sb.append(" STORED AS INPUTFORMAT '").append(inputFormatClass).append("'");
|
||||
sb = sb.append(" OUTPUTFORMAT '").append(outputFormatClass).append("' LOCATION '")
|
||||
.append(config.basePath).append("'");
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public static String generateSchemaString(MessageType storageSchema) throws IOException {
|
||||
Map<String, String> hiveSchema = convertParquetSchemaToHiveSchema(storageSchema);
|
||||
StringBuilder columns = new StringBuilder();
|
||||
for (Map.Entry<String, String> hiveSchemaEntry : hiveSchema.entrySet()) {
|
||||
columns.append(hiveSchemaEntry.getKey()).append(" ");
|
||||
columns.append(hiveSchemaEntry.getValue()).append(", ");
|
||||
}
|
||||
// Remove the last ", "
|
||||
columns.delete(columns.length() - 2, columns.length());
|
||||
return columns.toString();
|
||||
}
|
||||
|
||||
public static String generateCreateDDL(MessageType storageSchema,
|
||||
HiveSyncConfig config, String inputFormatClass,
|
||||
String outputFormatClass, String serdeClass) throws IOException {
|
||||
Map<String, String> hiveSchema = convertParquetSchemaToHiveSchema(storageSchema);
|
||||
String columns = generateSchemaString(storageSchema);
|
||||
|
||||
StringBuilder partitionFields = new StringBuilder();
|
||||
for (String partitionKey : config.partitionFields) {
|
||||
partitionFields.append(partitionKey).append(" ")
|
||||
.append(getPartitionKeyType(hiveSchema, partitionKey));
|
||||
}
|
||||
|
||||
StringBuilder sb = new StringBuilder("CREATE EXTERNAL TABLE IF NOT EXISTS ");
|
||||
sb = sb.append(config.databaseName).append(".").append(config.tableName);
|
||||
sb = sb.append("( ").append(columns).append(")");
|
||||
if (!config.partitionFields.isEmpty()) {
|
||||
sb = sb.append(" PARTITIONED BY (").append(partitionFields).append(")");
|
||||
}
|
||||
sb = sb.append(" ROW FORMAT SERDE '").append(serdeClass).append("'");
|
||||
sb = sb.append(" STORED AS INPUTFORMAT '").append(inputFormatClass).append("'");
|
||||
sb = sb.append(" OUTPUTFORMAT '").append(outputFormatClass).append("' LOCATION '")
|
||||
.append(config.basePath).append("'");
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
private static String getPartitionKeyType(Map<String, String> hiveSchema, String partitionKey) {
|
||||
if (hiveSchema.containsKey(partitionKey)) {
|
||||
return hiveSchema.get(partitionKey);
|
||||
}
|
||||
// Default the unknown partition fields to be String
|
||||
// TODO - all partition fields should be part of the schema. datestr is treated as special. Dont do that
|
||||
return "String";
|
||||
private static String getPartitionKeyType(Map<String, String> hiveSchema, String partitionKey) {
|
||||
if (hiveSchema.containsKey(partitionKey)) {
|
||||
return hiveSchema.get(partitionKey);
|
||||
}
|
||||
// Default the unknown partition fields to be String
|
||||
// TODO - all partition fields should be part of the schema. datestr is treated as special. Dont do that
|
||||
return "String";
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,7 +18,9 @@
|
||||
|
||||
package com.uber.hoodie.hive;
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import com.uber.hoodie.common.util.SchemaTestUtil;
|
||||
import com.uber.hoodie.hive.HoodieHiveClient.PartitionEvent;
|
||||
@@ -52,9 +54,8 @@ public class HiveSyncToolTest {
|
||||
}
|
||||
|
||||
/**
|
||||
* Testing converting array types to Hive field declaration strings,
|
||||
* according to the Parquet-113 spec:
|
||||
* https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists
|
||||
* Testing converting array types to Hive field declaration strings, according to the Parquet-113
|
||||
* spec: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists
|
||||
*/
|
||||
@Test
|
||||
public void testSchemaConvertArray() throws IOException {
|
||||
@@ -274,7 +275,8 @@ public class HiveSyncToolTest {
|
||||
assertTrue("Table " + TestUtil.hiveSyncConfig.tableName + " should exist after sync completes",
|
||||
hiveClient.doesTableExist());
|
||||
assertEquals("Hive Schema should match the dataset schema + partition field",
|
||||
hiveClient.getTableSchema().size(), SchemaTestUtil.getSimpleSchema().getFields().size() + 1);
|
||||
hiveClient.getTableSchema().size(),
|
||||
SchemaTestUtil.getSimpleSchema().getFields().size() + 1);
|
||||
assertEquals("Table partitions should match the number of partitions we wrote", 5,
|
||||
hiveClient.scanTablePartitions().size());
|
||||
assertEquals("The last commit that was sycned should be updated in the TBLPROPERTIES",
|
||||
@@ -296,7 +298,8 @@ public class HiveSyncToolTest {
|
||||
TestUtil.getHiveConf(), TestUtil.fileSystem);
|
||||
|
||||
assertEquals("Hive Schema should match the evolved dataset schema + partition field",
|
||||
hiveClient.getTableSchema().size(), SchemaTestUtil.getEvolvedSchema().getFields().size() + 1);
|
||||
hiveClient.getTableSchema().size(),
|
||||
SchemaTestUtil.getEvolvedSchema().getFields().size() + 1);
|
||||
// Sync should add the one partition
|
||||
assertEquals("The 2 partitions we wrote should be added to hive", 6,
|
||||
hiveClient.scanTablePartitions().size());
|
||||
@@ -307,33 +310,37 @@ public class HiveSyncToolTest {
|
||||
|
||||
@Test
|
||||
public void testSyncMergeOnReadRT()
|
||||
throws IOException, InitializationError, URISyntaxException, TException, InterruptedException {
|
||||
throws IOException, InitializationError, URISyntaxException, TException, InterruptedException {
|
||||
String commitTime = "100";
|
||||
String deltaCommitTime = "101";
|
||||
String roTablename = TestUtil.hiveSyncConfig.tableName;
|
||||
TestUtil.hiveSyncConfig.tableName = TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_REALTIME_TABLE;
|
||||
TestUtil.hiveSyncConfig.tableName =
|
||||
TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_REALTIME_TABLE;
|
||||
TestUtil.createMORDataset(commitTime, deltaCommitTime, 5);
|
||||
HoodieHiveClient hiveClientRT = new HoodieHiveClient(TestUtil.hiveSyncConfig,
|
||||
TestUtil.getHiveConf(), TestUtil.fileSystem);
|
||||
TestUtil.getHiveConf(), TestUtil.fileSystem);
|
||||
|
||||
assertFalse("Table " + TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_REALTIME_TABLE + " should not exist initially",
|
||||
hiveClientRT.doesTableExist());
|
||||
assertFalse("Table " + TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_REALTIME_TABLE
|
||||
+ " should not exist initially",
|
||||
hiveClientRT.doesTableExist());
|
||||
|
||||
// Lets do the sync
|
||||
HiveSyncTool tool = new HiveSyncTool(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(),
|
||||
TestUtil.fileSystem);
|
||||
TestUtil.fileSystem);
|
||||
tool.syncHoodieTable();
|
||||
|
||||
assertTrue("Table " + TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_REALTIME_TABLE + " should exist after sync completes",
|
||||
hiveClientRT.doesTableExist());
|
||||
assertTrue("Table " + TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_REALTIME_TABLE
|
||||
+ " should exist after sync completes",
|
||||
hiveClientRT.doesTableExist());
|
||||
|
||||
assertEquals("Hive Schema should match the dataset schema + partition field",
|
||||
hiveClientRT.getTableSchema().size(), SchemaTestUtil.getSimpleSchema().getFields().size() + 1);
|
||||
hiveClientRT.getTableSchema().size(),
|
||||
SchemaTestUtil.getSimpleSchema().getFields().size() + 1);
|
||||
assertEquals("Table partitions should match the number of partitions we wrote", 5,
|
||||
hiveClientRT.scanTablePartitions().size());
|
||||
hiveClientRT.scanTablePartitions().size());
|
||||
assertEquals("The last commit that was sycned should be updated in the TBLPROPERTIES",
|
||||
deltaCommitTime,
|
||||
hiveClientRT.getLastCommitTimeSynced().get());
|
||||
deltaCommitTime,
|
||||
hiveClientRT.getLastCommitTimeSynced().get());
|
||||
|
||||
// Now lets create more parititions and these are the only ones which needs to be synced
|
||||
DateTime dateTime = DateTime.now().plusDays(6);
|
||||
@@ -344,20 +351,21 @@ public class HiveSyncToolTest {
|
||||
TestUtil.addMORPartitions(1, true, false, dateTime, commitTime2, deltaCommitTime2);
|
||||
// Lets do the sync
|
||||
tool = new HiveSyncTool(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(),
|
||||
TestUtil.fileSystem);
|
||||
TestUtil.fileSystem);
|
||||
tool.syncHoodieTable();
|
||||
hiveClientRT = new HoodieHiveClient(TestUtil.hiveSyncConfig,
|
||||
TestUtil.getHiveConf(), TestUtil.fileSystem);
|
||||
TestUtil.getHiveConf(), TestUtil.fileSystem);
|
||||
|
||||
assertEquals("Hive Schema should match the evolved dataset schema + partition field",
|
||||
hiveClientRT.getTableSchema().size(), SchemaTestUtil.getEvolvedSchema().getFields().size() + 1);
|
||||
hiveClientRT.getTableSchema().size(),
|
||||
SchemaTestUtil.getEvolvedSchema().getFields().size() + 1);
|
||||
// Sync should add the one partition
|
||||
assertEquals("The 2 partitions we wrote should be added to hive", 6,
|
||||
hiveClientRT.scanTablePartitions().size());
|
||||
hiveClientRT.scanTablePartitions().size());
|
||||
assertEquals("The last commit that was sycned should be 103",
|
||||
deltaCommitTime2,
|
||||
hiveClientRT.getLastCommitTimeSynced().get());
|
||||
deltaCommitTime2,
|
||||
hiveClientRT.getLastCommitTimeSynced().get());
|
||||
TestUtil.hiveSyncConfig.tableName = roTablename;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,6 +16,9 @@
|
||||
|
||||
package com.uber.hoodie.hive;
|
||||
|
||||
import static com.uber.hoodie.common.model.HoodieTestUtils.DEFAULT_TASK_PARTITIONID;
|
||||
import static org.junit.Assert.fail;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import com.google.common.collect.Sets;
|
||||
@@ -41,6 +44,15 @@ import com.uber.hoodie.common.table.log.block.HoodieLogBlock;
|
||||
import com.uber.hoodie.common.util.FSUtils;
|
||||
import com.uber.hoodie.common.util.SchemaTestUtil;
|
||||
import com.uber.hoodie.hive.util.HiveTestService;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.net.URISyntaxException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.IndexedRecord;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
@@ -60,19 +72,6 @@ import org.joda.time.format.DateTimeFormat;
|
||||
import org.joda.time.format.DateTimeFormatter;
|
||||
import org.junit.runners.model.InitializationError;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.net.URISyntaxException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
|
||||
import static com.uber.hoodie.common.model.HoodieTestUtils.DEFAULT_TASK_PARTITIONID;
|
||||
import static org.junit.Assert.fail;
|
||||
|
||||
@SuppressWarnings("SameParameterValue")
|
||||
public class TestUtil {
|
||||
|
||||
@@ -161,7 +160,8 @@ public class TestUtil {
|
||||
boolean result = fileSystem.mkdirs(path);
|
||||
checkResult(result);
|
||||
DateTime dateTime = DateTime.now();
|
||||
HoodieCommitMetadata commitMetadata = createPartitions(numberOfPartitions, true, dateTime, commitTime);
|
||||
HoodieCommitMetadata commitMetadata = createPartitions(numberOfPartitions, true, dateTime,
|
||||
commitTime);
|
||||
createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName);
|
||||
createCommitFile(commitMetadata, commitTime);
|
||||
}
|
||||
@@ -177,16 +177,19 @@ public class TestUtil {
|
||||
boolean result = fileSystem.mkdirs(path);
|
||||
checkResult(result);
|
||||
DateTime dateTime = DateTime.now();
|
||||
HoodieCommitMetadata commitMetadata = createPartitions(numberOfPartitions, true, dateTime, commitTime);
|
||||
HoodieCommitMetadata commitMetadata = createPartitions(numberOfPartitions, true, dateTime,
|
||||
commitTime);
|
||||
createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName);
|
||||
createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_REALTIME_TABLE);
|
||||
createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName
|
||||
+ HiveSyncTool.SUFFIX_REALTIME_TABLE);
|
||||
HoodieCompactionMetadata compactionMetadata = new HoodieCompactionMetadata();
|
||||
commitMetadata.getPartitionToWriteStats()
|
||||
.forEach((key, value) -> value.stream().map(k -> new CompactionWriteStat(k, key, 0, 0, 0))
|
||||
.forEach(l -> compactionMetadata.addWriteStat(key, l)));
|
||||
createCompactionCommitFile(compactionMetadata, commitTime);
|
||||
// Write a delta commit
|
||||
HoodieCommitMetadata deltaMetadata = createLogFiles(commitMetadata.getPartitionToWriteStats(), true);
|
||||
HoodieCommitMetadata deltaMetadata = createLogFiles(commitMetadata.getPartitionToWriteStats(),
|
||||
true);
|
||||
createDeltaCommitFile(deltaMetadata, deltaCommitTime);
|
||||
}
|
||||
|
||||
@@ -206,18 +209,20 @@ public class TestUtil {
|
||||
HoodieCommitMetadata commitMetadata = createPartitions(numberOfPartitions,
|
||||
isParquetSchemaSimple, startFrom, commitTime);
|
||||
createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName);
|
||||
createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_REALTIME_TABLE);
|
||||
createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName
|
||||
+ HiveSyncTool.SUFFIX_REALTIME_TABLE);
|
||||
HoodieCompactionMetadata compactionMetadata = new HoodieCompactionMetadata();
|
||||
commitMetadata.getPartitionToWriteStats()
|
||||
.forEach((key, value) -> value.stream().map(k -> new CompactionWriteStat(k, key, 0, 0, 0))
|
||||
.forEach(l -> compactionMetadata.addWriteStat(key, l)));
|
||||
createCompactionCommitFile(compactionMetadata, commitTime);
|
||||
HoodieCommitMetadata deltaMetadata = createLogFiles(commitMetadata.getPartitionToWriteStats(), isLogSchemaSimple);
|
||||
HoodieCommitMetadata deltaMetadata = createLogFiles(commitMetadata.getPartitionToWriteStats(),
|
||||
isLogSchemaSimple);
|
||||
createDeltaCommitFile(deltaMetadata, deltaCommitTime);
|
||||
}
|
||||
|
||||
private static HoodieCommitMetadata createLogFiles(
|
||||
Map<String, List<HoodieWriteStat>> partitionWriteStats, boolean isLogSchemaSimple)
|
||||
Map<String, List<HoodieWriteStat>> partitionWriteStats, boolean isLogSchemaSimple)
|
||||
throws InterruptedException, IOException, URISyntaxException {
|
||||
HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata();
|
||||
for (Entry<String, List<HoodieWriteStat>> wEntry : partitionWriteStats.entrySet()) {
|
||||
@@ -246,7 +251,8 @@ public class TestUtil {
|
||||
Path partPath = new Path(hiveSyncConfig.basePath + "/" + partitionPath);
|
||||
fileSystem.makeQualified(partPath);
|
||||
fileSystem.mkdirs(partPath);
|
||||
List<HoodieWriteStat> writeStats = createTestData(partPath, isParquetSchemaSimple, commitTime);
|
||||
List<HoodieWriteStat> writeStats = createTestData(partPath, isParquetSchemaSimple,
|
||||
commitTime);
|
||||
startFrom = startFrom.minusDays(1);
|
||||
writeStats.forEach(s -> commitMetadata.addWriteStat(partitionPath, s));
|
||||
}
|
||||
|
||||
@@ -20,6 +20,13 @@ package com.uber.hoodie.hive.util;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.Maps;
|
||||
import com.google.common.io.Files;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.net.InetSocketAddress;
|
||||
import java.net.SocketException;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
@@ -46,277 +53,274 @@ import org.apache.thrift.transport.TTransportFactory;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.net.InetSocketAddress;
|
||||
import java.net.SocketException;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
|
||||
public class HiveTestService {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(HiveTestService.class);
|
||||
private static final Logger LOG = LoggerFactory.getLogger(HiveTestService.class);
|
||||
|
||||
private static final int CONNECTION_TIMEOUT = 30000;
|
||||
private static final int CONNECTION_TIMEOUT = 30000;
|
||||
|
||||
/**
|
||||
* Configuration settings
|
||||
*/
|
||||
private Configuration hadoopConf;
|
||||
private String workDir;
|
||||
private String bindIP = "127.0.0.1";
|
||||
private int metastorePort = 9083;
|
||||
private int serverPort = 9999;
|
||||
private boolean clean = true;
|
||||
/**
|
||||
* Configuration settings
|
||||
*/
|
||||
private Configuration hadoopConf;
|
||||
private String workDir;
|
||||
private String bindIP = "127.0.0.1";
|
||||
private int metastorePort = 9083;
|
||||
private int serverPort = 9999;
|
||||
private boolean clean = true;
|
||||
|
||||
private Map<String, String> sysProps = Maps.newHashMap();
|
||||
private ExecutorService executorService;
|
||||
private TServer tServer;
|
||||
private HiveServer2 hiveServer;
|
||||
private Map<String, String> sysProps = Maps.newHashMap();
|
||||
private ExecutorService executorService;
|
||||
private TServer tServer;
|
||||
private HiveServer2 hiveServer;
|
||||
|
||||
public HiveTestService(Configuration configuration) {
|
||||
this.workDir = Files.createTempDir().getAbsolutePath();
|
||||
public HiveTestService(Configuration configuration) {
|
||||
this.workDir = Files.createTempDir().getAbsolutePath();
|
||||
}
|
||||
|
||||
public Configuration getHadoopConf() {
|
||||
return hadoopConf;
|
||||
}
|
||||
|
||||
public HiveServer2 start() throws IOException {
|
||||
Preconditions
|
||||
.checkState(workDir != null, "The work dir must be set before starting cluster.");
|
||||
|
||||
if (hadoopConf == null) {
|
||||
hadoopConf = new Configuration();
|
||||
}
|
||||
|
||||
public Configuration getHadoopConf() {
|
||||
return hadoopConf;
|
||||
String localHiveLocation = getHiveLocation(workDir);
|
||||
if (clean) {
|
||||
LOG.info(
|
||||
"Cleaning Hive cluster data at: " + localHiveLocation + " and starting fresh.");
|
||||
File file = new File(localHiveLocation);
|
||||
FileUtils.deleteDirectory(file);
|
||||
}
|
||||
|
||||
public HiveServer2 start() throws IOException {
|
||||
Preconditions
|
||||
.checkState(workDir != null, "The work dir must be set before starting cluster.");
|
||||
HiveConf serverConf = configureHive(hadoopConf, localHiveLocation);
|
||||
|
||||
if (hadoopConf == null) {
|
||||
hadoopConf = new Configuration();
|
||||
}
|
||||
executorService = Executors.newSingleThreadExecutor();
|
||||
tServer = startMetaStore(bindIP, metastorePort, serverConf);
|
||||
|
||||
String localHiveLocation = getHiveLocation(workDir);
|
||||
if (clean) {
|
||||
LOG.info(
|
||||
"Cleaning Hive cluster data at: " + localHiveLocation + " and starting fresh.");
|
||||
File file = new File(localHiveLocation);
|
||||
FileUtils.deleteDirectory(file);
|
||||
}
|
||||
hiveServer = startHiveServer(serverConf);
|
||||
|
||||
HiveConf serverConf = configureHive(hadoopConf, localHiveLocation);
|
||||
|
||||
executorService = Executors.newSingleThreadExecutor();
|
||||
tServer = startMetaStore(bindIP, metastorePort, serverConf);
|
||||
|
||||
hiveServer = startHiveServer(serverConf);
|
||||
|
||||
String serverHostname;
|
||||
if (bindIP.equals("0.0.0.0")) {
|
||||
serverHostname = "localhost";
|
||||
} else {
|
||||
serverHostname = bindIP;
|
||||
}
|
||||
if (!waitForServerUp(serverConf, serverHostname, metastorePort, CONNECTION_TIMEOUT)) {
|
||||
throw new IOException("Waiting for startup of standalone server");
|
||||
}
|
||||
|
||||
LOG.info("Hive Minicluster service started.");
|
||||
return hiveServer;
|
||||
String serverHostname;
|
||||
if (bindIP.equals("0.0.0.0")) {
|
||||
serverHostname = "localhost";
|
||||
} else {
|
||||
serverHostname = bindIP;
|
||||
}
|
||||
if (!waitForServerUp(serverConf, serverHostname, metastorePort, CONNECTION_TIMEOUT)) {
|
||||
throw new IOException("Waiting for startup of standalone server");
|
||||
}
|
||||
|
||||
public void stop() throws IOException {
|
||||
resetSystemProperties();
|
||||
if (tServer != null) {
|
||||
tServer.stop();
|
||||
}
|
||||
if (hiveServer != null) {
|
||||
hiveServer.stop();
|
||||
}
|
||||
LOG.info("Hive Minicluster service shut down.");
|
||||
tServer = null;
|
||||
hiveServer = null;
|
||||
hadoopConf = null;
|
||||
LOG.info("Hive Minicluster service started.");
|
||||
return hiveServer;
|
||||
}
|
||||
|
||||
public void stop() throws IOException {
|
||||
resetSystemProperties();
|
||||
if (tServer != null) {
|
||||
tServer.stop();
|
||||
}
|
||||
if (hiveServer != null) {
|
||||
hiveServer.stop();
|
||||
}
|
||||
LOG.info("Hive Minicluster service shut down.");
|
||||
tServer = null;
|
||||
hiveServer = null;
|
||||
hadoopConf = null;
|
||||
}
|
||||
|
||||
private HiveConf configureHive(Configuration conf, String localHiveLocation)
|
||||
throws IOException {
|
||||
conf.set("hive.metastore.local", "false");
|
||||
conf.set(HiveConf.ConfVars.METASTOREURIS.varname,
|
||||
"thrift://" + bindIP + ":" + metastorePort);
|
||||
conf.set(HiveConf.ConfVars.HIVE_SERVER2_THRIFT_BIND_HOST.varname, bindIP);
|
||||
conf.setInt(HiveConf.ConfVars.HIVE_SERVER2_THRIFT_PORT.varname, serverPort);
|
||||
// The following line to turn of SASL has no effect since HiveAuthFactory calls
|
||||
// 'new HiveConf()'. This is fixed by https://issues.apache.org/jira/browse/HIVE-6657,
|
||||
// in Hive 0.14.
|
||||
// As a workaround, the property is set in hive-site.xml in this module.
|
||||
//conf.set(HiveConf.ConfVars.HIVE_SERVER2_AUTHENTICATION.varname, "NOSASL");
|
||||
File localHiveDir = new File(localHiveLocation);
|
||||
localHiveDir.mkdirs();
|
||||
File metastoreDbDir = new File(localHiveDir, "metastore_db");
|
||||
conf.set(HiveConf.ConfVars.METASTORECONNECTURLKEY.varname,
|
||||
"jdbc:derby:" + metastoreDbDir.getPath() + ";create=true");
|
||||
File derbyLogFile = new File(localHiveDir, "derby.log");
|
||||
derbyLogFile.createNewFile();
|
||||
setSystemProperty("derby.stream.error.file", derbyLogFile.getPath());
|
||||
conf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname,
|
||||
Files.createTempDir().getAbsolutePath());
|
||||
|
||||
return new HiveConf(conf, this.getClass());
|
||||
}
|
||||
|
||||
private boolean waitForServerUp(HiveConf serverConf, String hostname, int port, int timeout) {
|
||||
long start = System.currentTimeMillis();
|
||||
while (true) {
|
||||
try {
|
||||
new HiveMetaStoreClient(serverConf);
|
||||
return true;
|
||||
} catch (MetaException e) {
|
||||
// ignore as this is expected
|
||||
LOG.info("server " + hostname + ":" + port + " not up " + e);
|
||||
}
|
||||
|
||||
if (System.currentTimeMillis() > start + timeout) {
|
||||
break;
|
||||
}
|
||||
try {
|
||||
Thread.sleep(250);
|
||||
} catch (InterruptedException e) {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private void setSystemProperty(String name, String value) {
|
||||
if (!sysProps.containsKey(name)) {
|
||||
String currentValue = System.getProperty(name);
|
||||
sysProps.put(name, currentValue);
|
||||
}
|
||||
if (value != null) {
|
||||
System.setProperty(name, value);
|
||||
} else {
|
||||
System.getProperties().remove(name);
|
||||
}
|
||||
}
|
||||
|
||||
private void resetSystemProperties() {
|
||||
for (Map.Entry<String, String> entry : sysProps.entrySet()) {
|
||||
if (entry.getValue() != null) {
|
||||
System.setProperty(entry.getKey(), entry.getValue());
|
||||
} else {
|
||||
System.getProperties().remove(entry.getKey());
|
||||
}
|
||||
}
|
||||
sysProps.clear();
|
||||
}
|
||||
|
||||
private static String getHiveLocation(String baseLocation) {
|
||||
return baseLocation + Path.SEPARATOR + "hive";
|
||||
}
|
||||
|
||||
private HiveServer2 startHiveServer(HiveConf serverConf) {
|
||||
HiveServer2 hiveServer = new HiveServer2();
|
||||
hiveServer.init(serverConf);
|
||||
hiveServer.start();
|
||||
return hiveServer;
|
||||
}
|
||||
|
||||
// XXX: From org.apache.hadoop.hive.metastore.HiveMetaStore,
|
||||
// with changes to support binding to a specified IP address (not only 0.0.0.0)
|
||||
|
||||
|
||||
private static final class ChainedTTransportFactory extends TTransportFactory {
|
||||
|
||||
private final TTransportFactory parentTransFactory;
|
||||
private final TTransportFactory childTransFactory;
|
||||
|
||||
private ChainedTTransportFactory(TTransportFactory parentTransFactory,
|
||||
TTransportFactory childTransFactory) {
|
||||
this.parentTransFactory = parentTransFactory;
|
||||
this.childTransFactory = childTransFactory;
|
||||
}
|
||||
|
||||
private HiveConf configureHive(Configuration conf, String localHiveLocation)
|
||||
throws IOException {
|
||||
conf.set("hive.metastore.local", "false");
|
||||
conf.set(HiveConf.ConfVars.METASTOREURIS.varname,
|
||||
"thrift://" + bindIP + ":" + metastorePort);
|
||||
conf.set(HiveConf.ConfVars.HIVE_SERVER2_THRIFT_BIND_HOST.varname, bindIP);
|
||||
conf.setInt(HiveConf.ConfVars.HIVE_SERVER2_THRIFT_PORT.varname, serverPort);
|
||||
// The following line to turn of SASL has no effect since HiveAuthFactory calls
|
||||
// 'new HiveConf()'. This is fixed by https://issues.apache.org/jira/browse/HIVE-6657,
|
||||
// in Hive 0.14.
|
||||
// As a workaround, the property is set in hive-site.xml in this module.
|
||||
//conf.set(HiveConf.ConfVars.HIVE_SERVER2_AUTHENTICATION.varname, "NOSASL");
|
||||
File localHiveDir = new File(localHiveLocation);
|
||||
localHiveDir.mkdirs();
|
||||
File metastoreDbDir = new File(localHiveDir, "metastore_db");
|
||||
conf.set(HiveConf.ConfVars.METASTORECONNECTURLKEY.varname,
|
||||
"jdbc:derby:" + metastoreDbDir.getPath() + ";create=true");
|
||||
File derbyLogFile = new File(localHiveDir, "derby.log");
|
||||
derbyLogFile.createNewFile();
|
||||
setSystemProperty("derby.stream.error.file", derbyLogFile.getPath());
|
||||
conf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname,
|
||||
Files.createTempDir().getAbsolutePath());
|
||||
@Override
|
||||
public TTransport getTransport(TTransport trans) {
|
||||
return childTransFactory.getTransport(parentTransFactory.getTransport(trans));
|
||||
}
|
||||
}
|
||||
|
||||
return new HiveConf(conf, this.getClass());
|
||||
|
||||
private static final class TServerSocketKeepAlive extends TServerSocket {
|
||||
|
||||
public TServerSocketKeepAlive(int port) throws TTransportException {
|
||||
super(port, 0);
|
||||
}
|
||||
|
||||
private boolean waitForServerUp(HiveConf serverConf, String hostname, int port, int timeout) {
|
||||
long start = System.currentTimeMillis();
|
||||
while (true) {
|
||||
try {
|
||||
new HiveMetaStoreClient(serverConf);
|
||||
return true;
|
||||
} catch (MetaException e) {
|
||||
// ignore as this is expected
|
||||
LOG.info("server " + hostname + ":" + port + " not up " + e);
|
||||
}
|
||||
|
||||
if (System.currentTimeMillis() > start + timeout) {
|
||||
break;
|
||||
}
|
||||
try {
|
||||
Thread.sleep(250);
|
||||
} catch (InterruptedException e) {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
return false;
|
||||
public TServerSocketKeepAlive(InetSocketAddress address) throws TTransportException {
|
||||
super(address, 0);
|
||||
}
|
||||
|
||||
private void setSystemProperty(String name, String value) {
|
||||
if (!sysProps.containsKey(name)) {
|
||||
String currentValue = System.getProperty(name);
|
||||
sysProps.put(name, currentValue);
|
||||
}
|
||||
if (value != null) {
|
||||
System.setProperty(name, value);
|
||||
} else {
|
||||
System.getProperties().remove(name);
|
||||
}
|
||||
@Override
|
||||
protected TSocket acceptImpl() throws TTransportException {
|
||||
TSocket ts = super.acceptImpl();
|
||||
try {
|
||||
ts.getSocket().setKeepAlive(true);
|
||||
} catch (SocketException e) {
|
||||
throw new TTransportException(e);
|
||||
}
|
||||
return ts;
|
||||
}
|
||||
}
|
||||
|
||||
private void resetSystemProperties() {
|
||||
for (Map.Entry<String, String> entry : sysProps.entrySet()) {
|
||||
if (entry.getValue() != null) {
|
||||
System.setProperty(entry.getKey(), entry.getValue());
|
||||
} else {
|
||||
System.getProperties().remove(entry.getKey());
|
||||
}
|
||||
}
|
||||
sysProps.clear();
|
||||
}
|
||||
|
||||
private static String getHiveLocation(String baseLocation) {
|
||||
return baseLocation + Path.SEPARATOR + "hive";
|
||||
}
|
||||
|
||||
private HiveServer2 startHiveServer(HiveConf serverConf) {
|
||||
HiveServer2 hiveServer = new HiveServer2();
|
||||
hiveServer.init(serverConf);
|
||||
hiveServer.start();
|
||||
return hiveServer;
|
||||
}
|
||||
|
||||
// XXX: From org.apache.hadoop.hive.metastore.HiveMetaStore,
|
||||
// with changes to support binding to a specified IP address (not only 0.0.0.0)
|
||||
|
||||
|
||||
private static final class ChainedTTransportFactory extends TTransportFactory {
|
||||
private final TTransportFactory parentTransFactory;
|
||||
private final TTransportFactory childTransFactory;
|
||||
|
||||
private ChainedTTransportFactory(TTransportFactory parentTransFactory,
|
||||
TTransportFactory childTransFactory) {
|
||||
this.parentTransFactory = parentTransFactory;
|
||||
this.childTransFactory = childTransFactory;
|
||||
}
|
||||
|
||||
@Override public TTransport getTransport(TTransport trans) {
|
||||
return childTransFactory.getTransport(parentTransFactory.getTransport(trans));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static final class TServerSocketKeepAlive extends TServerSocket {
|
||||
public TServerSocketKeepAlive(int port) throws TTransportException {
|
||||
super(port, 0);
|
||||
}
|
||||
|
||||
public TServerSocketKeepAlive(InetSocketAddress address) throws TTransportException {
|
||||
super(address, 0);
|
||||
}
|
||||
|
||||
@Override protected TSocket acceptImpl() throws TTransportException {
|
||||
TSocket ts = super.acceptImpl();
|
||||
try {
|
||||
ts.getSocket().setKeepAlive(true);
|
||||
} catch (SocketException e) {
|
||||
throw new TTransportException(e);
|
||||
}
|
||||
return ts;
|
||||
}
|
||||
}
|
||||
|
||||
public TServer startMetaStore(String forceBindIP, int port, HiveConf conf) throws IOException {
|
||||
try {
|
||||
// Server will create new threads up to max as necessary. After an idle
|
||||
// period, it will destory threads to keep the number of threads in the
|
||||
// pool to min.
|
||||
int minWorkerThreads = conf.getIntVar(HiveConf.ConfVars.METASTORESERVERMINTHREADS);
|
||||
int maxWorkerThreads = conf.getIntVar(HiveConf.ConfVars.METASTORESERVERMAXTHREADS);
|
||||
boolean tcpKeepAlive = conf.getBoolVar(HiveConf.ConfVars.METASTORE_TCP_KEEP_ALIVE);
|
||||
boolean useFramedTransport =
|
||||
conf.getBoolVar(HiveConf.ConfVars.METASTORE_USE_THRIFT_FRAMED_TRANSPORT);
|
||||
|
||||
// don't support SASL yet
|
||||
//boolean useSasl = conf.getBoolVar(HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL);
|
||||
|
||||
TServerTransport serverTransport;
|
||||
if (forceBindIP != null) {
|
||||
InetSocketAddress address = new InetSocketAddress(forceBindIP, port);
|
||||
serverTransport =
|
||||
tcpKeepAlive ? new TServerSocketKeepAlive(address) : new TServerSocket(address);
|
||||
|
||||
} else {
|
||||
serverTransport =
|
||||
tcpKeepAlive ? new TServerSocketKeepAlive(port) : new TServerSocket(port);
|
||||
}
|
||||
|
||||
TProcessor processor;
|
||||
TTransportFactory transFactory;
|
||||
|
||||
IHMSHandler handler = (IHMSHandler) HiveMetaStore
|
||||
.newRetryingHMSHandler("new db based metaserver", conf, true);
|
||||
|
||||
if (conf.getBoolVar(HiveConf.ConfVars.METASTORE_EXECUTE_SET_UGI)) {
|
||||
transFactory = useFramedTransport ?
|
||||
new ChainedTTransportFactory(new TFramedTransport.Factory(),
|
||||
new TUGIContainingTransport.Factory()) :
|
||||
new TUGIContainingTransport.Factory();
|
||||
|
||||
processor = new TUGIBasedProcessor<IHMSHandler>(handler);
|
||||
LOG.info("Starting DB backed MetaStore Server with SetUGI enabled");
|
||||
} else {
|
||||
transFactory =
|
||||
useFramedTransport ? new TFramedTransport.Factory() : new TTransportFactory();
|
||||
processor = new TSetIpAddressProcessor<IHMSHandler>(handler);
|
||||
LOG.info("Starting DB backed MetaStore Server");
|
||||
}
|
||||
|
||||
TThreadPoolServer.Args args =
|
||||
new TThreadPoolServer.Args(serverTransport).processor(processor)
|
||||
.transportFactory(transFactory).protocolFactory(new TBinaryProtocol.Factory())
|
||||
.minWorkerThreads(minWorkerThreads).maxWorkerThreads(maxWorkerThreads);
|
||||
|
||||
final TServer tServer = new TThreadPoolServer(args);
|
||||
executorService.submit(new Runnable() {
|
||||
@Override public void run() {
|
||||
tServer.serve();
|
||||
}
|
||||
});
|
||||
return tServer;
|
||||
} catch (Throwable x) {
|
||||
throw new IOException(x);
|
||||
public TServer startMetaStore(String forceBindIP, int port, HiveConf conf) throws IOException {
|
||||
try {
|
||||
// Server will create new threads up to max as necessary. After an idle
|
||||
// period, it will destory threads to keep the number of threads in the
|
||||
// pool to min.
|
||||
int minWorkerThreads = conf.getIntVar(HiveConf.ConfVars.METASTORESERVERMINTHREADS);
|
||||
int maxWorkerThreads = conf.getIntVar(HiveConf.ConfVars.METASTORESERVERMAXTHREADS);
|
||||
boolean tcpKeepAlive = conf.getBoolVar(HiveConf.ConfVars.METASTORE_TCP_KEEP_ALIVE);
|
||||
boolean useFramedTransport =
|
||||
conf.getBoolVar(HiveConf.ConfVars.METASTORE_USE_THRIFT_FRAMED_TRANSPORT);
|
||||
|
||||
// don't support SASL yet
|
||||
//boolean useSasl = conf.getBoolVar(HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL);
|
||||
|
||||
TServerTransport serverTransport;
|
||||
if (forceBindIP != null) {
|
||||
InetSocketAddress address = new InetSocketAddress(forceBindIP, port);
|
||||
serverTransport =
|
||||
tcpKeepAlive ? new TServerSocketKeepAlive(address) : new TServerSocket(address);
|
||||
|
||||
} else {
|
||||
serverTransport =
|
||||
tcpKeepAlive ? new TServerSocketKeepAlive(port) : new TServerSocket(port);
|
||||
}
|
||||
|
||||
TProcessor processor;
|
||||
TTransportFactory transFactory;
|
||||
|
||||
IHMSHandler handler = (IHMSHandler) HiveMetaStore
|
||||
.newRetryingHMSHandler("new db based metaserver", conf, true);
|
||||
|
||||
if (conf.getBoolVar(HiveConf.ConfVars.METASTORE_EXECUTE_SET_UGI)) {
|
||||
transFactory = useFramedTransport ?
|
||||
new ChainedTTransportFactory(new TFramedTransport.Factory(),
|
||||
new TUGIContainingTransport.Factory()) :
|
||||
new TUGIContainingTransport.Factory();
|
||||
|
||||
processor = new TUGIBasedProcessor<IHMSHandler>(handler);
|
||||
LOG.info("Starting DB backed MetaStore Server with SetUGI enabled");
|
||||
} else {
|
||||
transFactory =
|
||||
useFramedTransport ? new TFramedTransport.Factory() : new TTransportFactory();
|
||||
processor = new TSetIpAddressProcessor<IHMSHandler>(handler);
|
||||
LOG.info("Starting DB backed MetaStore Server");
|
||||
}
|
||||
|
||||
TThreadPoolServer.Args args =
|
||||
new TThreadPoolServer.Args(serverTransport).processor(processor)
|
||||
.transportFactory(transFactory).protocolFactory(new TBinaryProtocol.Factory())
|
||||
.minWorkerThreads(minWorkerThreads).maxWorkerThreads(maxWorkerThreads);
|
||||
|
||||
final TServer tServer = new TThreadPoolServer(args);
|
||||
executorService.submit(new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
tServer.serve();
|
||||
}
|
||||
});
|
||||
return tServer;
|
||||
} catch (Throwable x) {
|
||||
throw new IOException(x);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,12 +13,10 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
log4j.rootLogger=WARN, A1
|
||||
log4j.category.com.uber=INFO
|
||||
log4j.category.org.apache.parquet.hadoop=WARN
|
||||
log4j.category.parquet.hadoop=WARN
|
||||
|
||||
# A1 is set to be a ConsoleAppender.
|
||||
log4j.appender.A1=org.apache.log4j.ConsoleAppender
|
||||
# A1 uses PatternLayout.
|
||||
|
||||
Reference in New Issue
Block a user