[HUDI-717] Fixed usage of HiveDriver for DDL statements. (#1416)
When using HiveDriver mode in HudiHiveClient, Hive 2.x DDL operations like ALTER PARTITION may fail. This is because Hive 2.x doesn't like `db`.`table_name` for operations. In this fix, we set the name of the database in the SessionState create for the Driver.
This commit is contained in:
@@ -198,7 +198,8 @@ public class HoodieHiveClient {
|
|||||||
for (String partition : partitions) {
|
for (String partition : partitions) {
|
||||||
String partitionClause = getPartitionClause(partition);
|
String partitionClause = getPartitionClause(partition);
|
||||||
Path partitionPath = FSUtils.getPartitionPath(syncConfig.basePath, partition);
|
Path partitionPath = FSUtils.getPartitionPath(syncConfig.basePath, partition);
|
||||||
String fullPartitionPath = partitionPath.toUri().getScheme().equals(StorageSchemes.HDFS.getScheme())
|
String partitionScheme = partitionPath.toUri().getScheme();
|
||||||
|
String fullPartitionPath = StorageSchemes.HDFS.getScheme().equals(partitionScheme)
|
||||||
? FSUtils.getDFSFullPartitionPath(fs, partitionPath) : partitionPath.toString();
|
? FSUtils.getDFSFullPartitionPath(fs, partitionPath) : partitionPath.toString();
|
||||||
String changePartition =
|
String changePartition =
|
||||||
alterTable + " PARTITION (" + partitionClause + ") SET LOCATION '" + fullPartitionPath + "'";
|
alterTable + " PARTITION (" + partitionClause + ") SET LOCATION '" + fullPartitionPath + "'";
|
||||||
@@ -505,6 +506,7 @@ public class HoodieHiveClient {
|
|||||||
try {
|
try {
|
||||||
final long startTime = System.currentTimeMillis();
|
final long startTime = System.currentTimeMillis();
|
||||||
ss = SessionState.start(configuration);
|
ss = SessionState.start(configuration);
|
||||||
|
ss.setCurrentDatabase(syncConfig.databaseName);
|
||||||
hiveDriver = new org.apache.hadoop.hive.ql.Driver(configuration);
|
hiveDriver = new org.apache.hadoop.hive.ql.Driver(configuration);
|
||||||
final long endTime = System.currentTimeMillis();
|
final long endTime = System.currentTimeMillis();
|
||||||
LOG.info(String.format("Time taken to start SessionState and create Driver: %s ms", (endTime - startTime)));
|
LOG.info(String.format("Time taken to start SessionState and create Driver: %s ms", (endTime - startTime)));
|
||||||
|
|||||||
@@ -168,6 +168,47 @@ public class TestHiveSyncTool {
|
|||||||
hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size());
|
hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size());
|
||||||
assertEquals("The last commit that was sycned should be updated in the TBLPROPERTIES", instantTime,
|
assertEquals("The last commit that was sycned should be updated in the TBLPROPERTIES", instantTime,
|
||||||
hiveClient.getLastCommitTimeSynced(TestUtil.hiveSyncConfig.tableName).get());
|
hiveClient.getLastCommitTimeSynced(TestUtil.hiveSyncConfig.tableName).get());
|
||||||
|
|
||||||
|
// Adding of new partitions
|
||||||
|
List<String> newPartition = Arrays.asList("2050/01/01");
|
||||||
|
hiveClient.addPartitionsToTable(TestUtil.hiveSyncConfig.tableName, Arrays.asList());
|
||||||
|
assertEquals("No new partition should be added", 5,
|
||||||
|
hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size());
|
||||||
|
hiveClient.addPartitionsToTable(TestUtil.hiveSyncConfig.tableName, newPartition);
|
||||||
|
assertEquals("New partition should be added", 6,
|
||||||
|
hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size());
|
||||||
|
|
||||||
|
// Update partitions
|
||||||
|
hiveClient.updatePartitionsToTable(TestUtil.hiveSyncConfig.tableName, Arrays.asList());
|
||||||
|
assertEquals("Partition count should remain the same", 6,
|
||||||
|
hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size());
|
||||||
|
hiveClient.updatePartitionsToTable(TestUtil.hiveSyncConfig.tableName, newPartition);
|
||||||
|
assertEquals("Partition count should remain the same", 6,
|
||||||
|
hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size());
|
||||||
|
|
||||||
|
// Alter partitions
|
||||||
|
// Manually change a hive partition location to check if the sync will detect
|
||||||
|
// it and generage a partition update event for it.
|
||||||
|
hiveClient.updateHiveSQL("ALTER TABLE `" + TestUtil.hiveSyncConfig.tableName
|
||||||
|
+ "` PARTITION (`datestr`='2050-01-01') SET LOCATION '/some/new/location'");
|
||||||
|
|
||||||
|
hiveClient = new HoodieHiveClient(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
|
||||||
|
List<Partition> hivePartitions = hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName);
|
||||||
|
List<String> writtenPartitionsSince = hiveClient.getPartitionsWrittenToSince(Option.empty());
|
||||||
|
writtenPartitionsSince.add(newPartition.get(0));
|
||||||
|
List<PartitionEvent> partitionEvents = hiveClient.getPartitionEvents(hivePartitions, writtenPartitionsSince);
|
||||||
|
assertEquals("There should be only one paritition event", 1, partitionEvents.size());
|
||||||
|
assertEquals("The one partition event must of type UPDATE", PartitionEventType.UPDATE,
|
||||||
|
partitionEvents.iterator().next().eventType);
|
||||||
|
|
||||||
|
tool = new HiveSyncTool(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
|
||||||
|
tool.syncHoodieTable();
|
||||||
|
// Sync should update the changed partition to correct path
|
||||||
|
List<Partition> tablePartitions = hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName);
|
||||||
|
assertEquals("The one partition we wrote should be added to hive", 6, tablePartitions.size());
|
||||||
|
assertEquals("The last commit that was sycned should be 100", instantTime,
|
||||||
|
hiveClient.getLastCommitTimeSynced(TestUtil.hiveSyncConfig.tableName).get());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@@ -250,7 +291,7 @@ public class TestHiveSyncTool {
|
|||||||
TestUtil.hiveSyncConfig.useJdbc = this.useJdbc;
|
TestUtil.hiveSyncConfig.useJdbc = this.useJdbc;
|
||||||
String instantTime = "100";
|
String instantTime = "100";
|
||||||
String deltaCommitTime = "101";
|
String deltaCommitTime = "101";
|
||||||
TestUtil.createMORTable(instantTime, deltaCommitTime, 5);
|
TestUtil.createMORTable(instantTime, deltaCommitTime, 5, true);
|
||||||
|
|
||||||
String roTableName = TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_READ_OPTIMIZED_TABLE;
|
String roTableName = TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_READ_OPTIMIZED_TABLE;
|
||||||
HoodieHiveClient hiveClient = new HoodieHiveClient(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
|
HoodieHiveClient hiveClient = new HoodieHiveClient(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
|
||||||
@@ -294,7 +335,7 @@ public class TestHiveSyncTool {
|
|||||||
String instantTime = "100";
|
String instantTime = "100";
|
||||||
String deltaCommitTime = "101";
|
String deltaCommitTime = "101";
|
||||||
String snapshotTableName = TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE;
|
String snapshotTableName = TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE;
|
||||||
TestUtil.createMORTable(instantTime, deltaCommitTime, 5);
|
TestUtil.createMORTable(instantTime, deltaCommitTime, 5, true);
|
||||||
HoodieHiveClient hiveClientRT =
|
HoodieHiveClient hiveClientRT =
|
||||||
new HoodieHiveClient(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
|
new HoodieHiveClient(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
|
||||||
|
|
||||||
@@ -363,4 +404,50 @@ public class TestHiveSyncTool {
|
|||||||
assertEquals("The last commit that was sycned should be updated in the TBLPROPERTIES", instantTime,
|
assertEquals("The last commit that was sycned should be updated in the TBLPROPERTIES", instantTime,
|
||||||
hiveClient.getLastCommitTimeSynced(hiveSyncConfig.tableName).get());
|
hiveClient.getLastCommitTimeSynced(hiveSyncConfig.tableName).get());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testReadSchemaForMOR() throws Exception {
|
||||||
|
TestUtil.hiveSyncConfig.useJdbc = this.useJdbc;
|
||||||
|
String commitTime = "100";
|
||||||
|
String snapshotTableName = TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE;
|
||||||
|
TestUtil.createMORTable(commitTime, "", 5, false);
|
||||||
|
HoodieHiveClient hiveClientRT =
|
||||||
|
new HoodieHiveClient(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
|
||||||
|
|
||||||
|
assertFalse("Table " + TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE
|
||||||
|
+ " should not exist initially", hiveClientRT.doesTableExist(snapshotTableName));
|
||||||
|
|
||||||
|
// Lets do the sync
|
||||||
|
HiveSyncTool tool = new HiveSyncTool(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
|
||||||
|
tool.syncHoodieTable();
|
||||||
|
|
||||||
|
assertTrue("Table " + TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE
|
||||||
|
+ " should exist after sync completes", hiveClientRT.doesTableExist(snapshotTableName));
|
||||||
|
|
||||||
|
// Schema being read from compacted base files
|
||||||
|
assertEquals("Hive Schema should match the table schema + partition field", hiveClientRT.getTableSchema(snapshotTableName).size(),
|
||||||
|
SchemaTestUtil.getSimpleSchema().getFields().size() + 1);
|
||||||
|
assertEquals("Table partitions should match the number of partitions we wrote", 5,
|
||||||
|
hiveClientRT.scanTablePartitions(snapshotTableName).size());
|
||||||
|
|
||||||
|
// Now lets create more partitions and these are the only ones which needs to be synced
|
||||||
|
DateTime dateTime = DateTime.now().plusDays(6);
|
||||||
|
String commitTime2 = "102";
|
||||||
|
String deltaCommitTime2 = "103";
|
||||||
|
|
||||||
|
TestUtil.addMORPartitions(1, true, false, dateTime, commitTime2, deltaCommitTime2);
|
||||||
|
// Lets do the sync
|
||||||
|
tool = new HiveSyncTool(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
|
||||||
|
tool.syncHoodieTable();
|
||||||
|
hiveClientRT = new HoodieHiveClient(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
|
||||||
|
|
||||||
|
// Schema being read from the log files
|
||||||
|
assertEquals("Hive Schema should match the evolved table schema + partition field",
|
||||||
|
hiveClientRT.getTableSchema(snapshotTableName).size(), SchemaTestUtil.getEvolvedSchema().getFields().size() + 1);
|
||||||
|
// Sync should add the one partition
|
||||||
|
assertEquals("The 1 partition we wrote should be added to hive", 6, hiveClientRT.scanTablePartitions(snapshotTableName).size());
|
||||||
|
assertEquals("The last commit that was sycned should be 103", deltaCommitTime2,
|
||||||
|
hiveClientRT.getLastCommitTimeSynced(snapshotTableName).get());
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -107,7 +107,6 @@ public class TestUtil {
|
|||||||
|
|
||||||
hiveSyncConfig = new HiveSyncConfig();
|
hiveSyncConfig = new HiveSyncConfig();
|
||||||
hiveSyncConfig.jdbcUrl = "jdbc:hive2://127.0.0.1:9999/";
|
hiveSyncConfig.jdbcUrl = "jdbc:hive2://127.0.0.1:9999/";
|
||||||
hiveSyncConfig.databaseName = "hdrone_test";
|
|
||||||
hiveSyncConfig.hiveUser = "";
|
hiveSyncConfig.hiveUser = "";
|
||||||
hiveSyncConfig.hivePass = "";
|
hiveSyncConfig.hivePass = "";
|
||||||
hiveSyncConfig.databaseName = "testdb";
|
hiveSyncConfig.databaseName = "testdb";
|
||||||
@@ -167,7 +166,8 @@ public class TestUtil {
|
|||||||
createCommitFile(commitMetadata, instantTime);
|
createCommitFile(commitMetadata, instantTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void createMORTable(String instantTime, String deltaCommitTime, int numberOfPartitions)
|
static void createMORTable(String commitTime, String deltaCommitTime, int numberOfPartitions,
|
||||||
|
boolean createDeltaCommit)
|
||||||
throws IOException, InitializationError, URISyntaxException, InterruptedException {
|
throws IOException, InitializationError, URISyntaxException, InterruptedException {
|
||||||
Path path = new Path(hiveSyncConfig.basePath);
|
Path path = new Path(hiveSyncConfig.basePath);
|
||||||
FileIOUtils.deleteDirectory(new File(hiveSyncConfig.basePath));
|
FileIOUtils.deleteDirectory(new File(hiveSyncConfig.basePath));
|
||||||
@@ -177,17 +177,19 @@ public class TestUtil {
|
|||||||
boolean result = fileSystem.mkdirs(path);
|
boolean result = fileSystem.mkdirs(path);
|
||||||
checkResult(result);
|
checkResult(result);
|
||||||
DateTime dateTime = DateTime.now();
|
DateTime dateTime = DateTime.now();
|
||||||
HoodieCommitMetadata commitMetadata = createPartitions(numberOfPartitions, true, dateTime, instantTime);
|
HoodieCommitMetadata commitMetadata = createPartitions(numberOfPartitions, true, dateTime, commitTime);
|
||||||
createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName);
|
createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName);
|
||||||
createdTablesSet
|
createdTablesSet
|
||||||
.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE);
|
.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE);
|
||||||
HoodieCommitMetadata compactionMetadata = new HoodieCommitMetadata();
|
HoodieCommitMetadata compactionMetadata = new HoodieCommitMetadata();
|
||||||
commitMetadata.getPartitionToWriteStats()
|
commitMetadata.getPartitionToWriteStats()
|
||||||
.forEach((key, value) -> value.forEach(l -> compactionMetadata.addWriteStat(key, l)));
|
.forEach((key, value) -> value.forEach(l -> compactionMetadata.addWriteStat(key, l)));
|
||||||
createCompactionCommitFile(compactionMetadata, instantTime);
|
createCompactionCommitFile(compactionMetadata, commitTime);
|
||||||
// Write a delta commit
|
if (createDeltaCommit) {
|
||||||
HoodieCommitMetadata deltaMetadata = createLogFiles(commitMetadata.getPartitionToWriteStats(), true);
|
// Write a delta commit
|
||||||
createDeltaCommitFile(deltaMetadata, deltaCommitTime);
|
HoodieCommitMetadata deltaMetadata = createLogFiles(commitMetadata.getPartitionToWriteStats(), true);
|
||||||
|
createDeltaCommitFile(deltaMetadata, deltaCommitTime);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void addCOWPartitions(int numberOfPartitions, boolean isParquetSchemaSimple, DateTime startFrom,
|
static void addCOWPartitions(int numberOfPartitions, boolean isParquetSchemaSimple, DateTime startFrom,
|
||||||
|
|||||||
Reference in New Issue
Block a user