[HUDI-485] Corrected the check for incremental sql (#2768)
* [HUDI-485]: corrected the check for incremental sql * [HUDI-485]: added tests * code review comments addressed * [HUDI-485]: added happy flow test case
This commit is contained in:
@@ -52,7 +52,8 @@ import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* Utility to pull data after a given commit, based on the supplied HiveQL and save the delta as another hive temporary
|
||||
* table.
|
||||
* table. This temporary table can be further read using {@link org.apache.hudi.utilities.sources.HiveIncrPullSource} and the changes can
|
||||
* be applied to the target table.
|
||||
* <p>
|
||||
* Current Limitations:
|
||||
* <p>
|
||||
@@ -149,7 +150,7 @@ public class HiveIncrementalPuller {
|
||||
String tempDbTable = config.tmpDb + "." + config.targetTable + "__" + config.sourceTable;
|
||||
String tempDbTablePath =
|
||||
config.hoodieTmpDir + "/" + config.targetTable + "__" + config.sourceTable + "/" + lastCommitTime;
|
||||
executeStatement("drop table " + tempDbTable, stmt);
|
||||
executeStatement("drop table if exists " + tempDbTable, stmt);
|
||||
deleteHDFSPath(fs, tempDbTablePath);
|
||||
if (!ensureTempPathExists(fs, lastCommitTime)) {
|
||||
throw new IllegalStateException("Could not create target path at "
|
||||
@@ -188,12 +189,12 @@ public class HiveIncrementalPuller {
|
||||
throw new HoodieIncrementalPullSQLException(
|
||||
"Incremental SQL does not have " + config.sourceDb + "." + config.sourceTable);
|
||||
}
|
||||
if (!incrementalSQL.contains("`_hoodie_commit_time` > '%targetBasePath'")) {
|
||||
if (!incrementalSQL.contains("`_hoodie_commit_time` > '%s'")) {
|
||||
LOG.error("Incremental SQL : " + incrementalSQL
|
||||
+ " does not contain `_hoodie_commit_time` > '%targetBasePath'. Please add "
|
||||
+ " does not contain `_hoodie_commit_time` > '%s'. Please add "
|
||||
+ "this clause for incremental to work properly.");
|
||||
throw new HoodieIncrementalPullSQLException(
|
||||
"Incremental SQL does not have clause `_hoodie_commit_time` > '%targetBasePath', which "
|
||||
"Incremental SQL does not have clause `_hoodie_commit_time` > '%s', which "
|
||||
+ "means its not pulling incrementally");
|
||||
}
|
||||
|
||||
|
||||
@@ -18,18 +18,45 @@
|
||||
|
||||
package org.apache.hudi.utilities;
|
||||
|
||||
import org.apache.hadoop.hive.metastore.api.MetaException;
|
||||
import org.apache.hadoop.hive.ql.metadata.HiveException;
|
||||
import org.apache.hudi.hive.HiveSyncConfig;
|
||||
import org.apache.hudi.hive.HiveSyncTool;
|
||||
import org.apache.hudi.hive.HoodieHiveClient;
|
||||
import org.apache.hudi.hive.testutils.HiveTestUtil;
|
||||
import org.apache.hudi.utilities.exception.HoodieIncrementalPullSQLException;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.net.URISyntaxException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Paths;
|
||||
import java.time.Instant;
|
||||
|
||||
import static org.apache.hudi.hive.testutils.HiveTestUtil.fileSystem;
|
||||
import static org.apache.hudi.hive.testutils.HiveTestUtil.hiveSyncConfig;
|
||||
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
public class TestHiveIncrementalPuller {
|
||||
|
||||
private HiveIncrementalPuller.Config config;
|
||||
private String targetBasePath = null;
|
||||
|
||||
@BeforeEach
|
||||
public void setup() {
|
||||
public void setup() throws HiveException, IOException, InterruptedException, MetaException {
|
||||
config = new HiveIncrementalPuller.Config();
|
||||
HiveTestUtil.setUp();
|
||||
}
|
||||
|
||||
@AfterEach
|
||||
public void teardown() throws Exception {
|
||||
HiveTestUtil.clearIncrementalPullSetup(config.hoodieTmpDir, targetBasePath);
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -41,4 +68,104 @@ public class TestHiveIncrementalPuller {
|
||||
|
||||
}
|
||||
|
||||
private HiveIncrementalPuller.Config getHivePullerConfig(String incrementalSql) throws IOException {
|
||||
config.hiveJDBCUrl = hiveSyncConfig.jdbcUrl;
|
||||
config.hiveUsername = hiveSyncConfig.hiveUser;
|
||||
config.hivePassword = hiveSyncConfig.hivePass;
|
||||
config.hoodieTmpDir = Files.createTempDirectory("hivePullerTest").toUri().toString();
|
||||
config.sourceDb = hiveSyncConfig.databaseName;
|
||||
config.sourceTable = hiveSyncConfig.tableName;
|
||||
config.targetDb = "tgtdb";
|
||||
config.targetTable = "test2";
|
||||
config.tmpDb = "tmp_db";
|
||||
config.fromCommitTime = "100";
|
||||
createIncrementalSqlFile(incrementalSql, config);
|
||||
return config;
|
||||
}
|
||||
|
||||
private void createIncrementalSqlFile(String text, HiveIncrementalPuller.Config cfg) throws IOException {
|
||||
java.nio.file.Path path = Paths.get(cfg.hoodieTmpDir + "/incremental_pull.txt");
|
||||
Files.createDirectories(path.getParent());
|
||||
Files.createFile(path);
|
||||
try (FileWriter fr = new FileWriter(new File(path.toUri()))) {
|
||||
fr.write(text);
|
||||
} catch (Exception e) {
|
||||
// no-op
|
||||
}
|
||||
cfg.incrementalSQLFile = path.toString();
|
||||
}
|
||||
|
||||
private void createSourceTable() throws IOException, URISyntaxException {
|
||||
String instantTime = "101";
|
||||
HiveTestUtil.createCOWTable(instantTime, 5, true);
|
||||
hiveSyncConfig.syncMode = "jdbc";
|
||||
HiveTestUtil.hiveSyncConfig.batchSyncNum = 3;
|
||||
HiveSyncTool tool = new HiveSyncTool(hiveSyncConfig, HiveTestUtil.getHiveConf(), fileSystem);
|
||||
tool.syncHoodieTable();
|
||||
}
|
||||
|
||||
private void createTargetTable() throws IOException, URISyntaxException {
|
||||
String instantTime = "100";
|
||||
targetBasePath = Files.createTempDirectory("hivesynctest1" + Instant.now().toEpochMilli()).toUri().toString();
|
||||
HiveTestUtil.createCOWTable(instantTime, 5, true,
|
||||
targetBasePath, "tgtdb", "test2");
|
||||
HiveSyncTool tool = new HiveSyncTool(getTargetHiveSyncConfig(targetBasePath), HiveTestUtil.getHiveConf(), fileSystem);
|
||||
tool.syncHoodieTable();
|
||||
}
|
||||
|
||||
private HiveSyncConfig getTargetHiveSyncConfig(String basePath) {
|
||||
HiveSyncConfig config = HiveSyncConfig.copy(hiveSyncConfig);
|
||||
config.databaseName = "tgtdb";
|
||||
config.tableName = "test2";
|
||||
config.basePath = basePath;
|
||||
config.batchSyncNum = 3;
|
||||
config.syncMode = "jdbc";
|
||||
return config;
|
||||
}
|
||||
|
||||
private HiveSyncConfig getAssertionSyncConfig(String databaseName) {
|
||||
HiveSyncConfig config = HiveSyncConfig.copy(hiveSyncConfig);
|
||||
config.databaseName = databaseName;
|
||||
return config;
|
||||
}
|
||||
|
||||
private void createTables() throws IOException, URISyntaxException {
|
||||
createSourceTable();
|
||||
createTargetTable();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPullerWithoutIncrementalClause() throws IOException, URISyntaxException {
|
||||
createTables();
|
||||
HiveIncrementalPuller puller = new HiveIncrementalPuller(getHivePullerConfig(
|
||||
"select name from testdb.test1"));
|
||||
Exception e = assertThrows(HoodieIncrementalPullSQLException.class, puller::saveDelta,
|
||||
"Should fail when incremental clause not provided!");
|
||||
assertTrue(e.getMessage().contains("Incremental SQL does not have clause `_hoodie_commit_time` > '%s', which means its not pulling incrementally"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPullerWithoutSourceInSql() throws IOException, URISyntaxException {
|
||||
createTables();
|
||||
HiveIncrementalPuller puller = new HiveIncrementalPuller(getHivePullerConfig(
|
||||
"select name from tgtdb.test2 where `_hoodie_commit_time` > '%s'"));
|
||||
Exception e = assertThrows(HoodieIncrementalPullSQLException.class, puller::saveDelta,
|
||||
"Should fail when source db and table names not provided!");
|
||||
assertTrue(e.getMessage().contains("Incremental SQL does not have testdb.test1"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPuller() throws IOException, URISyntaxException {
|
||||
createTables();
|
||||
HiveIncrementalPuller.Config cfg = getHivePullerConfig("select name from testdb.test1 where `_hoodie_commit_time` > '%s'");
|
||||
HoodieHiveClient hiveClient = new HoodieHiveClient(hiveSyncConfig, HiveTestUtil.getHiveConf(), fileSystem);
|
||||
hiveClient.createDatabase(cfg.tmpDb);
|
||||
HiveIncrementalPuller puller = new HiveIncrementalPuller(cfg);
|
||||
puller.saveDelta();
|
||||
HiveSyncConfig assertingConfig = getAssertionSyncConfig(cfg.tmpDb);
|
||||
HoodieHiveClient assertingClient = new HoodieHiveClient(assertingConfig, HiveTestUtil.getHiveConf(), fileSystem);
|
||||
String tmpTable = cfg.targetTable + "__" + cfg.sourceTable;
|
||||
assertTrue(assertingClient.doesTableExist(tmpTable));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user