1
0

[HUDI-1192] Make create hive database automatically configurable (#1968)

This commit is contained in:
liujinhui
2020-09-27 14:10:13 +08:00
committed by GitHub
parent b0f1b736f8
commit a86f5574ed
5 changed files with 39 additions and 9 deletions

View File

@@ -71,6 +71,9 @@ public class HiveSyncConfig implements Serializable {
@Parameter(names = {"--use-jdbc"}, description = "Hive jdbc connect url")
public Boolean useJdbc = true;
@Parameter(names = {"--auto-create-database"}, description = "Auto create hive database")
public Boolean autoCreateDatabase = true;
@Parameter(names = {"--skip-ro-suffix"}, description = "Skip the `_ro` suffix for Read optimized table, when registering")
public Boolean skipROSuffix = false;

View File

@@ -117,11 +117,17 @@ public class HiveSyncTool extends AbstractSyncTool {
boolean tableExists = hoodieHiveClient.doesTableExist(tableName);
// check if the database exists else create it
try {
hoodieHiveClient.updateHiveSQL("create database if not exists " + cfg.databaseName);
} catch (Exception e) {
// this is harmless since table creation will fail anyways, creation of DB is needed for in-memory testing
LOG.warn("Unable to create database", e);
if (cfg.autoCreateDatabase) {
try {
hoodieHiveClient.updateHiveSQL("create database if not exists " + cfg.databaseName);
} catch (Exception e) {
// this is harmless since table creation will fail anyways, creation of DB is needed for in-memory testing
LOG.warn("Unable to create database", e);
}
} else {
if (!hoodieHiveClient.doesDataBaseExist(cfg.databaseName)) {
throw new HoodieHiveSyncException("hive database does not exist " + cfg.databaseName);
}
}
// Get the parquet schema for this table looking at the latest commit

View File

@@ -18,6 +18,11 @@
package org.apache.hudi.hive;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.fs.StorageSchemes;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
@@ -29,10 +34,6 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
@@ -336,6 +337,22 @@ public class HoodieHiveClient extends AbstractSyncHoodieClient {
}
}
/**
* @param databaseName
* @return true if the configured database exists
*/
public boolean doesDataBaseExist(String databaseName) {
try {
Database database = client.getDatabase(databaseName);
if (database != null && databaseName.equals(database.getName())) {
return true;
}
} catch (TException e) {
throw new HoodieHiveSyncException("Failed to check if database exists " + databaseName, e);
}
return false;
}
/**
* Execute a update in hive metastore with this SQL.
*