Moving depedencies off cdh to apache + Hive2 support
- Tests redone in the process - Main changes are to RealtimeRecordReader and how it treats maps/arrays - Make hive sync work with Hive 1/2 and CDH environments - Fixes to make corner cases for Hive queries - Spark Hive integration - Working version across Apache and CDH versions - Known Issue - https://github.com/uber/hudi/issues/439
This commit is contained in:
committed by
vinoth chandar
parent
2b1af18941
commit
a5359662be
@@ -43,38 +43,32 @@
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-auth</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hive</groupId>
|
||||
<artifactId>hive-common</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hive</groupId>
|
||||
<artifactId>hive-jdbc</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.guava</groupId>
|
||||
<artifactId>guava</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hive</groupId>
|
||||
<artifactId>hive-service</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hive</groupId>
|
||||
<artifactId>hive-metastore</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.thrift</groupId>
|
||||
<artifactId>libthrift</artifactId>
|
||||
<version>0.9.2</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>joda-time</groupId>
|
||||
<artifactId>joda-time</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Apache commons -->
|
||||
<dependency>
|
||||
<groupId>commons-dbcp</groupId>
|
||||
<artifactId>commons-dbcp</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>commons-io</groupId>
|
||||
<artifactId>commons-io</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Logging -->
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
@@ -90,6 +84,16 @@
|
||||
<artifactId>jcommander</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpcore</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpclient</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Hadoop Testing -->
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
@@ -159,52 +163,79 @@
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-assembly-plugin</artifactId>
|
||||
<version>2.4.1</version>
|
||||
<configuration>
|
||||
<descriptors>
|
||||
<descriptor>src/assembly/src.xml</descriptor>
|
||||
</descriptors>
|
||||
<archive>
|
||||
<manifest>
|
||||
<mainClass>com.uber.hoodie.hive.HiveSyncTool</mainClass>
|
||||
</manifest>
|
||||
</archive>
|
||||
|
||||
</configuration>
|
||||
<artifactId>maven-jar-plugin</artifactId>
|
||||
<version>2.5</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>make-assembly</id>
|
||||
<!-- bind to the packaging phase -->
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>single</goal>
|
||||
<goal>test-jar</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-dependency-plugin</artifactId>
|
||||
<version>2.4</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>copy-dependencies</id>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>copy-dependencies</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<outputDirectory>${project.build.directory}/jars</outputDirectory>
|
||||
<overWriteReleases>false</overWriteReleases>
|
||||
<overWriteSnapshots>false</overWriteSnapshots>
|
||||
<overWriteIfNewer>true</overWriteIfNewer>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
|
||||
</build>
|
||||
|
||||
<profiles>
|
||||
<profile>
|
||||
<id>hive12</id>
|
||||
<activation>
|
||||
<property>
|
||||
<name>!hive11</name>
|
||||
</property>
|
||||
</activation>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>${hive12.groupid}</groupId>
|
||||
<artifactId>hive-service</artifactId>
|
||||
<version>${hive12.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${hive12.groupid}</groupId>
|
||||
<artifactId>hive-jdbc</artifactId>
|
||||
<version>${hive12.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${hive12.groupid}</groupId>
|
||||
<artifactId>hive-metastore</artifactId>
|
||||
<version>${hive12.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${hive12.groupid}</groupId>
|
||||
<artifactId>hive-common</artifactId>
|
||||
<version>${hive12.version}</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</profile>
|
||||
<profile>
|
||||
<id>hive11</id>
|
||||
<activation>
|
||||
<property>
|
||||
<name>hive11</name>
|
||||
</property>
|
||||
</activation>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.hive</groupId>
|
||||
<artifactId>hive-service</artifactId>
|
||||
<version>${hive11.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hive</groupId>
|
||||
<artifactId>hive-jdbc</artifactId>
|
||||
<version>${hive11.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hive</groupId>
|
||||
<artifactId>hive-metastore</artifactId>
|
||||
<version>${hive11.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hive</groupId>
|
||||
<artifactId>hive-common</artifactId>
|
||||
<version>${hive11.version}</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</profile>
|
||||
</profiles>
|
||||
</project>
|
||||
|
||||
39
hoodie-hive/run_sync_tool.sh
Normal file
39
hoodie-hive/run_sync_tool.sh
Normal file
@@ -0,0 +1,39 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
function error_exit {
|
||||
echo "$1" >&2 ## Send message to stderr. Exclude >&2 if you don't want it that way.
|
||||
exit "${2:-1}" ## Return a code specified by $2 or 1 by default.
|
||||
}
|
||||
|
||||
if [ -z "${HADOOP_HOME}" ]; then
|
||||
error_exit "Please make sure the environment variable HADOOP_HOME is setup"
|
||||
fi
|
||||
|
||||
if [ -z "${HIVE_HOME}" ]; then
|
||||
error_exit "Please make sure the environment variable HIVE_HOME is setup"
|
||||
fi
|
||||
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||
#Ensure we pick the right jar even for hive11 builds
|
||||
HOODIE_HIVE_UBER_JAR=`ls -c $DIR/../packaging/hoodie-hive-bundle/target/hoodie-hive-*.jar | head -1`
|
||||
|
||||
if [ -z "$HADOOP_CONF_DIR" ]; then
|
||||
echo "setting hadoop conf dir"
|
||||
HADOOP_CONF_DIR="${HADOOP_HOME}/etc/hadoop"
|
||||
fi
|
||||
|
||||
## Include only specific packages from HIVE_HOME/lib to avoid version mismatches
|
||||
HIVE_EXEC=`ls ${HIVE_HOME}/lib/hive-exec-*.jar`
|
||||
HIVE_SERVICE=`ls ${HIVE_HOME}/lib/hive-service-*.jar | grep -v rpc`
|
||||
HIVE_METASTORE=`ls ${HIVE_HOME}/lib/hive-metastore-*.jar`
|
||||
# Hive 1.x/CDH has standalone jdbc jar which is no longer available in 2.x
|
||||
HIVE_JDBC=`ls ${HIVE_HOME}/lib/hive-jdbc-*standalone*.jar`
|
||||
if [ -z "${HIVE_JDBC}" ]; then
|
||||
HIVE_JDBC=`ls ${HIVE_HOME}/lib/hive-jdbc-*.jar | grep -v handler`
|
||||
fi
|
||||
HIVE_JARS=$HIVE_METASTORE:$HIVE_SERVICE:$HIVE_EXEC:$HIVE_SERVICE:$HIVE_JDBC
|
||||
|
||||
HADOOP_HIVE_JARS=${HIVE_JARS}:${HADOOP_HOME}/share/hadoop/common/*:${HADOOP_HOME}/share/hadoop/mapreduce/*:${HADOOP_HOME}/share/hadoop/hdfs/*:${HADOOP_HOME}/share/hadoop/common/lib/*:${HADOOP_HOME}/share/hadoop/hdfs/lib/*
|
||||
|
||||
echo "Running Command : java -cp ${HADOOP_HIVE_JARS}:${HADOOP_CONF_DIR}:$HOODIE_HIVE_UBER_JAR com.uber.hoodie.hive.HiveSyncTool $@"
|
||||
java -cp $HOODIE_HIVE_UBER_JAR:${HADOOP_HIVE_JARS}:${HADOOP_CONF_DIR} com.uber.hoodie.hive.HiveSyncTool "$@"
|
||||
@@ -69,4 +69,20 @@ public class HiveSyncConfig implements Serializable {
|
||||
|
||||
@Parameter(names = {"--help", "-h"}, help = true)
|
||||
public Boolean help = false;
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "HiveSyncConfig{"
|
||||
+ "databaseName='" + databaseName + '\''
|
||||
+ ", tableName='" + tableName + '\''
|
||||
+ ", hiveUser='" + hiveUser + '\''
|
||||
+ ", hivePass='" + hivePass + '\''
|
||||
+ ", jdbcUrl='" + jdbcUrl + '\''
|
||||
+ ", basePath='" + basePath + '\''
|
||||
+ ", partitionFields=" + partitionFields
|
||||
+ ", partitionValueExtractorClass='" + partitionValueExtractorClass + '\''
|
||||
+ ", assumeDatePartitioning=" + assumeDatePartitioning
|
||||
+ ", help=" + help
|
||||
+ '}';
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,6 +22,7 @@ import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import com.uber.hoodie.common.model.HoodieCommitMetadata;
|
||||
import com.uber.hoodie.common.model.HoodieFileFormat;
|
||||
import com.uber.hoodie.common.model.HoodieLogFile;
|
||||
import com.uber.hoodie.common.model.HoodieTableType;
|
||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||
@@ -34,6 +35,7 @@ import com.uber.hoodie.hive.util.SchemaUtil;
|
||||
import java.io.IOException;
|
||||
import java.sql.Connection;
|
||||
import java.sql.DatabaseMetaData;
|
||||
import java.sql.Driver;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.sql.Statement;
|
||||
@@ -43,6 +45,9 @@ import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.commons.dbcp.BasicDataSource;
|
||||
import org.apache.commons.dbcp.ConnectionFactory;
|
||||
import org.apache.commons.dbcp.DriverConnectionFactory;
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hive.conf.HiveConf;
|
||||
@@ -185,8 +190,7 @@ public class HoodieHiveClient {
|
||||
|
||||
String fullPartitionPath = new Path(syncConfig.basePath, partition).toString();
|
||||
String changePartition =
|
||||
alterTable + " PARTITION (" + partBuilder.toString() + ") SET LOCATION '"
|
||||
+ "hdfs://nameservice1" + fullPartitionPath + "'";
|
||||
alterTable + " PARTITION (" + partBuilder.toString() + ") SET LOCATION '" + fullPartitionPath + "'";
|
||||
changePartitions.add(changePartition);
|
||||
}
|
||||
return changePartitions;
|
||||
@@ -234,7 +238,7 @@ public class HoodieHiveClient {
|
||||
|
||||
void updateTableDefinition(MessageType newSchema) {
|
||||
try {
|
||||
String newSchemaStr = SchemaUtil.generateSchemaString(newSchema);
|
||||
String newSchemaStr = SchemaUtil.generateSchemaString(newSchema, syncConfig.partitionFields);
|
||||
// Cascade clause should not be present for non-partitioned tables
|
||||
String cascadeClause = syncConfig.partitionFields.size() > 0 ? " cascade" : "";
|
||||
StringBuilder sqlBuilder = new StringBuilder("ALTER TABLE ").append("`")
|
||||
@@ -242,7 +246,7 @@ public class HoodieHiveClient {
|
||||
.append(syncConfig.tableName).append("`")
|
||||
.append(" REPLACE COLUMNS(").append(newSchemaStr).append(" )")
|
||||
.append(cascadeClause);
|
||||
LOG.info("Creating table with " + sqlBuilder);
|
||||
LOG.info("Updating table definition with " + sqlBuilder);
|
||||
updateHiveSQL(sqlBuilder.toString());
|
||||
} catch (IOException e) {
|
||||
throw new HoodieHiveSyncException("Failed to update table for " + syncConfig.tableName, e);
|
||||
@@ -311,7 +315,8 @@ public class HoodieHiveClient {
|
||||
String filePath = commitMetadata.getFileIdAndFullPaths(metaClient.getBasePath()).values()
|
||||
.stream().findAny().orElseThrow(() -> new IllegalArgumentException(
|
||||
"Could not find any data file written for commit " + lastCommit
|
||||
+ ", could not get schema for dataset " + metaClient.getBasePath()));
|
||||
+ ", could not get schema for dataset " + metaClient.getBasePath()
|
||||
+ ", Metadata :" + commitMetadata));
|
||||
return readSchemaFromDataFile(new Path(filePath));
|
||||
case MERGE_ON_READ:
|
||||
// If this is MOR, depending on whether the latest commit is a delta commit or
|
||||
@@ -340,12 +345,31 @@ public class HoodieHiveClient {
|
||||
// read from the log file wrote
|
||||
commitMetadata = HoodieCommitMetadata.fromBytes(
|
||||
activeTimeline.getInstantDetails(lastDeltaInstant).get(), HoodieCommitMetadata.class);
|
||||
filePath = commitMetadata.getFileIdAndFullPaths(metaClient.getBasePath()).values()
|
||||
.stream().filter(s -> s.contains(HoodieLogFile.DELTA_EXTENSION))
|
||||
.findAny().orElseThrow(() -> new IllegalArgumentException(
|
||||
"Could not find any data file written for commit " + lastDeltaInstant
|
||||
+ ", could not get schema for dataset " + metaClient.getBasePath()));
|
||||
return readSchemaFromLogFile(lastCompactionCommit, new Path(filePath));
|
||||
Pair<String, HoodieFileFormat> filePathWithFormat =
|
||||
commitMetadata.getFileIdAndFullPaths(metaClient.getBasePath()).values()
|
||||
.stream().filter(s -> s.contains(HoodieLogFile.DELTA_EXTENSION))
|
||||
.findAny().map(f -> Pair.of(f, HoodieFileFormat.HOODIE_LOG))
|
||||
.orElseGet(() -> {
|
||||
// No Log files in Delta-Commit. Check if there are any parquet files
|
||||
return commitMetadata.getFileIdAndFullPaths(metaClient.getBasePath()).values().stream()
|
||||
.filter(s -> s.contains((metaClient.getTableConfig().getROFileFormat().getFileExtension())))
|
||||
.findAny()
|
||||
.map(f -> Pair.of(f, HoodieFileFormat.PARQUET)).orElseThrow(() -> {
|
||||
return new IllegalArgumentException(
|
||||
"Could not find any data file written for commit " + lastDeltaInstant
|
||||
+ ", could not get schema for dataset " + metaClient.getBasePath()
|
||||
+ ", CommitMetadata :" + commitMetadata);
|
||||
});
|
||||
});
|
||||
switch (filePathWithFormat.getRight()) {
|
||||
case HOODIE_LOG:
|
||||
return readSchemaFromLogFile(lastCompactionCommit, new Path(filePathWithFormat.getLeft()));
|
||||
case PARQUET:
|
||||
return readSchemaFromDataFile(new Path(filePathWithFormat.getLeft()));
|
||||
default:
|
||||
throw new IllegalArgumentException("Unknown file format :" + filePathWithFormat.getRight()
|
||||
+ " for file " + filePathWithFormat.getLeft());
|
||||
}
|
||||
} else {
|
||||
return readSchemaFromLastCompaction(lastCompactionCommit);
|
||||
}
|
||||
@@ -442,14 +466,15 @@ public class HoodieHiveClient {
|
||||
|
||||
private void createHiveConnection() {
|
||||
if (connection == null) {
|
||||
BasicDataSource ds = new BasicDataSource();
|
||||
ds.setDriverClassName(driverName);
|
||||
BasicDataSource ds = new HiveDataSource();
|
||||
ds.setDriverClassName(HiveDriver.class.getCanonicalName());
|
||||
ds.setUrl(getHiveJdbcUrlWithDefaultDBName());
|
||||
ds.setUsername(syncConfig.hiveUser);
|
||||
ds.setPassword(syncConfig.hivePass);
|
||||
LOG.info("Getting Hive Connection from Datasource " + ds);
|
||||
try {
|
||||
this.connection = ds.getConnection();
|
||||
LOG.info("Successfully got Hive Connection from Datasource " + ds);
|
||||
} catch (SQLException e) {
|
||||
throw new HoodieHiveSyncException(
|
||||
"Cannot create hive connection " + getHiveJdbcUrlWithDefaultDBName(), e);
|
||||
@@ -589,4 +614,54 @@ public class HoodieHiveClient {
|
||||
return new PartitionEvent(PartitionEventType.UPDATE, storagePartition);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* There is a bug in BasicDataSource implementation (dbcp-1.4) which does not allow custom version of Driver (needed
|
||||
* to talk to older version of HiveServer2 including CDH-5x). This is fixed in dbcp-2x but we are using dbcp1.4.
|
||||
* Adding a workaround here. TODO: varadarb We need to investigate moving to dbcp-2x
|
||||
*/
|
||||
protected class HiveDataSource extends BasicDataSource {
|
||||
|
||||
protected ConnectionFactory createConnectionFactory() throws SQLException {
|
||||
try {
|
||||
Driver driver = HiveDriver.class.newInstance();
|
||||
// Can't test without a validationQuery
|
||||
if (validationQuery == null) {
|
||||
setTestOnBorrow(false);
|
||||
setTestOnReturn(false);
|
||||
setTestWhileIdle(false);
|
||||
}
|
||||
|
||||
// Set up the driver connection factory we will use
|
||||
String user = username;
|
||||
if (user != null) {
|
||||
connectionProperties.put("user", user);
|
||||
} else {
|
||||
log("DBCP DataSource configured without a 'username'");
|
||||
}
|
||||
|
||||
String pwd = password;
|
||||
if (pwd != null) {
|
||||
connectionProperties.put("password", pwd);
|
||||
} else {
|
||||
log("DBCP DataSource configured without a 'password'");
|
||||
}
|
||||
|
||||
ConnectionFactory driverConnectionFactory = new DriverConnectionFactory(driver, url, connectionProperties);
|
||||
return driverConnectionFactory;
|
||||
} catch (Throwable x) {
|
||||
LOG.warn("Got exception trying to instantiate connection factory. Trying default instantiation", x);
|
||||
return super.createConnectionFactory();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "HiveDataSource{"
|
||||
+ "driverClassName='" + driverClassName + '\''
|
||||
+ ", driverClassLoader=" + driverClassLoader
|
||||
+ ", url='" + url + '\''
|
||||
+ '}';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -27,9 +27,11 @@ import com.uber.hoodie.hive.HiveSyncConfig;
|
||||
import com.uber.hoodie.hive.HoodieHiveSyncException;
|
||||
import com.uber.hoodie.hive.SchemaDifference;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.slf4j.Logger;
|
||||
@@ -322,6 +324,14 @@ public class SchemaUtil {
|
||||
return result;
|
||||
}
|
||||
|
||||
private static String removeSurroundingTick(String result) {
|
||||
if (result.startsWith("`") && result.endsWith("`")) {
|
||||
result = result.substring(1, result.length() - 1);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a 'Map' schema from Parquet map field
|
||||
*/
|
||||
@@ -372,11 +382,17 @@ public class SchemaUtil {
|
||||
}
|
||||
|
||||
public static String generateSchemaString(MessageType storageSchema) throws IOException {
|
||||
return generateSchemaString(storageSchema, new ArrayList<>());
|
||||
}
|
||||
|
||||
public static String generateSchemaString(MessageType storageSchema, List<String> colsToSkip) throws IOException {
|
||||
Map<String, String> hiveSchema = convertParquetSchemaToHiveSchema(storageSchema);
|
||||
StringBuilder columns = new StringBuilder();
|
||||
for (Map.Entry<String, String> hiveSchemaEntry : hiveSchema.entrySet()) {
|
||||
columns.append(hiveSchemaEntry.getKey()).append(" ");
|
||||
columns.append(hiveSchemaEntry.getValue()).append(", ");
|
||||
if (!colsToSkip.contains(removeSurroundingTick(hiveSchemaEntry.getKey()))) {
|
||||
columns.append(hiveSchemaEntry.getKey()).append(" ");
|
||||
columns.append(hiveSchemaEntry.getValue()).append(", ");
|
||||
}
|
||||
}
|
||||
// Remove the last ", "
|
||||
columns.delete(columns.length() - 2, columns.length());
|
||||
@@ -386,19 +402,20 @@ public class SchemaUtil {
|
||||
public static String generateCreateDDL(MessageType storageSchema, HiveSyncConfig config,
|
||||
String inputFormatClass, String outputFormatClass, String serdeClass) throws IOException {
|
||||
Map<String, String> hiveSchema = convertParquetSchemaToHiveSchema(storageSchema);
|
||||
String columns = generateSchemaString(storageSchema);
|
||||
String columns = generateSchemaString(storageSchema, config.partitionFields);
|
||||
|
||||
StringBuilder partitionFields = new StringBuilder();
|
||||
List<String> partitionFields = new ArrayList<>();
|
||||
for (String partitionKey : config.partitionFields) {
|
||||
partitionFields.append(partitionKey).append(" ")
|
||||
.append(getPartitionKeyType(hiveSchema, partitionKey));
|
||||
partitionFields.add(new StringBuilder().append(partitionKey).append(" ")
|
||||
.append(getPartitionKeyType(hiveSchema, partitionKey)).toString());
|
||||
}
|
||||
|
||||
String paritionsStr = partitionFields.stream().collect(Collectors.joining(","));
|
||||
StringBuilder sb = new StringBuilder("CREATE EXTERNAL TABLE IF NOT EXISTS ");
|
||||
sb = sb.append(config.databaseName).append(".").append(config.tableName);
|
||||
sb = sb.append("( ").append(columns).append(")");
|
||||
if (!config.partitionFields.isEmpty()) {
|
||||
sb = sb.append(" PARTITIONED BY (").append(partitionFields).append(")");
|
||||
sb = sb.append(" PARTITIONED BY (").append(paritionsStr).append(")");
|
||||
}
|
||||
sb = sb.append(" ROW FORMAT SERDE '").append(serdeClass).append("'");
|
||||
sb = sb.append(" STORED AS INPUTFORMAT '").append(inputFormatClass).append("'");
|
||||
|
||||
Reference in New Issue
Block a user