1
0

[HUDI-2244] Fix database alreadyExists exception while hive sync (#3361)

This commit is contained in:
swuferhong
2021-07-28 19:40:16 +08:00
committed by GitHub
parent 91c2213412
commit eedfadeb46
4 changed files with 56 additions and 9 deletions

View File

@@ -150,7 +150,9 @@ public class HiveSyncTool extends AbstractSyncTool {
// check if the database exists else create it
if (cfg.autoCreateDatabase) {
try {
hoodieHiveClient.createDatabase(cfg.databaseName);
if (!hoodieHiveClient.doesDataBaseExist(cfg.databaseName)) {
hoodieHiveClient.createDatabase(cfg.databaseName);
}
} catch (Exception e) {
// this is harmless since table creation will fail anyways, creation of DB is needed for in-memory testing
LOG.warn("Unable to create database", e);

View File

@@ -33,7 +33,6 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.Table;
@@ -221,14 +220,14 @@ public class HoodieHiveClient extends AbstractSyncHoodieClient {
*/
public boolean doesDataBaseExist(String databaseName) {
try {
Database database = client.getDatabase(databaseName);
if (database != null && databaseName.equals(database.getName())) {
return true;
}
client.getDatabase(databaseName);
return true;
} catch (NoSuchObjectException noSuchObjectException) {
// NoSuchObjectException is thrown when there is no existing database of the name.
return false;
} catch (TException e) {
throw new HoodieHiveSyncException("Failed to check if database exists " + databaseName, e);
}
return false;
}
public void createDatabase(String databaseName) {

View File

@@ -62,8 +62,10 @@ import java.util.Map;
import static org.apache.hudi.hive.testutils.HiveTestUtil.ddlExecutor;
import static org.apache.hudi.hive.testutils.HiveTestUtil.fileSystem;
import static org.apache.hudi.hive.testutils.HiveTestUtil.hiveSyncConfig;
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
public class TestHiveSyncTool {
@@ -302,6 +304,49 @@ public class TestHiveSyncTool {
"The last commit that was synced should be 100");
}
@ParameterizedTest
@MethodSource({"syncMode"})
public void testSyncDataBase(String syncMode) throws Exception {
hiveSyncConfig.syncMode = syncMode;
HiveTestUtil.hiveSyncConfig.batchSyncNum = 3;
String instantTime = "100";
HiveTestUtil.createCOWTable(instantTime, 5, true);
hiveSyncConfig.databaseName = "database1";
// while autoCreateDatabase is false and database not exists;
hiveSyncConfig.autoCreateDatabase = false;
// Lets do the sync
assertThrows(Exception.class, () -> {
new HiveSyncTool(hiveSyncConfig, HiveTestUtil.getHiveConf(), fileSystem).syncHoodieTable();
});
// while autoCreateDatabase is true and database not exists;
hiveSyncConfig.autoCreateDatabase = true;
HoodieHiveClient hiveClient =
new HoodieHiveClient(HiveTestUtil.hiveSyncConfig, HiveTestUtil.getHiveConf(), fileSystem);
assertDoesNotThrow(() -> {
new HiveSyncTool(hiveSyncConfig, HiveTestUtil.getHiveConf(), fileSystem).syncHoodieTable();
});
assertTrue(hiveClient.doesDataBaseExist(hiveSyncConfig.databaseName),
"DataBases " + hiveSyncConfig.databaseName + " should exist after sync completes");
// while autoCreateDatabase is false and database exists;
hiveSyncConfig.autoCreateDatabase = false;
assertDoesNotThrow(() -> {
new HiveSyncTool(hiveSyncConfig, HiveTestUtil.getHiveConf(), fileSystem).syncHoodieTable();
});
assertTrue(hiveClient.doesDataBaseExist(hiveSyncConfig.databaseName),
"DataBases " + hiveSyncConfig.databaseName + " should exist after sync completes");
// while autoCreateDatabase is true and database exists;
hiveSyncConfig.autoCreateDatabase = true;
assertDoesNotThrow(() -> {
new HiveSyncTool(hiveSyncConfig, HiveTestUtil.getHiveConf(), fileSystem).syncHoodieTable();
});
assertTrue(hiveClient.doesDataBaseExist(hiveSyncConfig.databaseName),
"DataBases " + hiveSyncConfig.databaseName + " should exist after sync completes");
}
@ParameterizedTest
@MethodSource({"syncDataSourceTableParams"})
public void testSyncCOWTableWithProperties(boolean useSchemaFromCommitMetadata,
@@ -1054,6 +1099,8 @@ public class TestHiveSyncTool {
hiveSyncConfig.syncMode = syncMode;
HiveTestUtil.hiveSyncConfig.batchSyncNum = 2;
HiveTestUtil.createCOWTable("100", 5, true);
// create database.
ddlExecutor.runSQL("create database " + hiveSyncConfig.databaseName);
HoodieHiveClient hiveClient =
new HoodieHiveClient(HiveTestUtil.hiveSyncConfig, HiveTestUtil.getHiveConf(), HiveTestUtil.fileSystem);
String tableName = HiveTestUtil.hiveSyncConfig.tableName;

View File

@@ -138,8 +138,7 @@ public class HiveTestUtil {
ddlExecutor.runSQL("drop table if exists " + tableName);
}
createdTablesSet.clear();
ddlExecutor.runSQL("drop database if exists " + hiveSyncConfig.databaseName);
ddlExecutor.runSQL("create database " + hiveSyncConfig.databaseName);
ddlExecutor.runSQL("drop database if exists " + hiveSyncConfig.databaseName + " cascade");
}
public static HiveConf getHiveConf() {