1
0

[HUDI-2883] Refactor hive sync tool / config to use reflection and standardize configs (#4175)

- Refactor hive sync tool / config to use reflection and standardize configs

Co-authored-by: sivabalan <n.siva.b@gmail.com>
Co-authored-by: Rajesh Mahindra <rmahindra@Rajeshs-MacBook-Pro.local>
Co-authored-by: Raymond Xu <2701446+xushiyan@users.noreply.github.com>
This commit is contained in:
Rajesh Mahindra
2022-03-21 19:56:31 -07:00
committed by GitHub
parent 9b6e138af2
commit 5f570ea151
43 changed files with 1521 additions and 1217 deletions

View File

@@ -20,10 +20,13 @@ package org.apache.hudi.utilities;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.hive.HiveSyncConfig;
import org.apache.hudi.hive.HiveSyncTool;
import org.apache.hudi.hive.HoodieHiveClient;
import org.apache.hudi.hive.testutils.HiveTestUtil;
import org.apache.hudi.sync.common.HoodieSyncConfig;
import org.apache.hudi.utilities.exception.HoodieIncrementalPullSQLException;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
@@ -38,7 +41,7 @@ import java.nio.file.Paths;
import java.time.Instant;
import static org.apache.hudi.hive.testutils.HiveTestUtil.fileSystem;
import static org.apache.hudi.hive.testutils.HiveTestUtil.hiveSyncConfig;
import static org.apache.hudi.hive.testutils.HiveTestUtil.hiveSyncProps;
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -69,12 +72,12 @@ public class TestHiveIncrementalPuller {
}
private HiveIncrementalPuller.Config getHivePullerConfig(String incrementalSql) throws IOException {
config.hiveJDBCUrl = hiveSyncConfig.jdbcUrl;
config.hiveUsername = hiveSyncConfig.hiveUser;
config.hivePassword = hiveSyncConfig.hivePass;
config.hiveJDBCUrl = hiveSyncProps.getString(HiveSyncConfig.HIVE_URL.key());
config.hiveUsername = hiveSyncProps.getString(HiveSyncConfig.HIVE_USER.key());
config.hivePassword = hiveSyncProps.getString(HiveSyncConfig.HIVE_PASS.key());
config.hoodieTmpDir = Files.createTempDirectory("hivePullerTest").toUri().toString();
config.sourceDb = hiveSyncConfig.databaseName;
config.sourceTable = hiveSyncConfig.tableName;
config.sourceDb = hiveSyncProps.getString(HoodieSyncConfig.META_SYNC_DATABASE_NAME.key());
config.sourceTable = hiveSyncProps.getString(HoodieSyncConfig.META_SYNC_TABLE_NAME.key());
config.targetDb = "tgtdb";
config.targetTable = "test2";
config.tmpDb = "tmp_db";
@@ -98,9 +101,9 @@ public class TestHiveIncrementalPuller {
private void createSourceTable() throws IOException, URISyntaxException {
String instantTime = "101";
HiveTestUtil.createCOWTable(instantTime, 5, true);
hiveSyncConfig.syncMode = "jdbc";
HiveTestUtil.hiveSyncConfig.batchSyncNum = 3;
HiveSyncTool tool = new HiveSyncTool(hiveSyncConfig, HiveTestUtil.getHiveConf(), fileSystem);
hiveSyncProps.setProperty(HiveSyncConfig.HIVE_SYNC_MODE.key(), "jdbc");
HiveSyncTool tool = new HiveSyncTool(hiveSyncProps, HiveTestUtil.getHiveConf(), fileSystem);
tool.syncHoodieTable();
}
@@ -113,20 +116,20 @@ public class TestHiveIncrementalPuller {
tool.syncHoodieTable();
}
private HiveSyncConfig getTargetHiveSyncConfig(String basePath) {
HiveSyncConfig config = HiveSyncConfig.copy(hiveSyncConfig);
config.databaseName = "tgtdb";
config.tableName = "test2";
config.basePath = basePath;
config.batchSyncNum = 3;
config.syncMode = "jdbc";
return config;
private TypedProperties getTargetHiveSyncConfig(String basePath) {
TypedProperties targetHiveSyncProps = new TypedProperties(hiveSyncProps);
targetHiveSyncProps.setProperty(HoodieSyncConfig.META_SYNC_DATABASE_NAME.key(), "tgtdb");
targetHiveSyncProps.setProperty(HoodieSyncConfig.META_SYNC_TABLE_NAME.key(), "test2");
targetHiveSyncProps.setProperty(HoodieSyncConfig.META_SYNC_BASE_PATH.key(), basePath);
targetHiveSyncProps.setProperty(HiveSyncConfig.HIVE_SYNC_MODE.key(), "jdbc");
return targetHiveSyncProps;
}
private HiveSyncConfig getAssertionSyncConfig(String databaseName) {
HiveSyncConfig config = HiveSyncConfig.copy(hiveSyncConfig);
config.databaseName = databaseName;
return config;
private TypedProperties getAssertionSyncConfig(String databaseName) {
TypedProperties assertHiveSyncProps = new TypedProperties(hiveSyncProps);
assertHiveSyncProps.setProperty(HoodieSyncConfig.META_SYNC_DATABASE_NAME.key(), databaseName);
return assertHiveSyncProps;
}
private void createTables() throws IOException, URISyntaxException {
@@ -158,12 +161,11 @@ public class TestHiveIncrementalPuller {
public void testPuller() throws IOException, URISyntaxException {
createTables();
HiveIncrementalPuller.Config cfg = getHivePullerConfig("select name from testdb.test1 where `_hoodie_commit_time` > '%s'");
HoodieHiveClient hiveClient = new HoodieHiveClient(hiveSyncConfig, HiveTestUtil.getHiveConf(), fileSystem);
HoodieHiveClient hiveClient = new HoodieHiveClient(new HiveSyncConfig(hiveSyncProps), HiveTestUtil.getHiveConf(), fileSystem);
hiveClient.createDatabase(cfg.tmpDb);
HiveIncrementalPuller puller = new HiveIncrementalPuller(cfg);
puller.saveDelta();
HiveSyncConfig assertingConfig = getAssertionSyncConfig(cfg.tmpDb);
HoodieHiveClient assertingClient = new HoodieHiveClient(assertingConfig, HiveTestUtil.getHiveConf(), fileSystem);
HoodieHiveClient assertingClient = new HoodieHiveClient(new HiveSyncConfig(getAssertionSyncConfig(cfg.tmpDb)), HiveTestUtil.getHiveConf(), fileSystem);
String tmpTable = cfg.targetTable + "__" + cfg.sourceTable;
assertTrue(assertingClient.doesTableExist(tmpTable));
}

View File

@@ -18,7 +18,6 @@
package org.apache.hudi.utilities.functional;
import org.apache.hudi.DataSourceWriteOptions;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.WriteOperationType;
@@ -28,6 +27,7 @@ import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.hive.HiveSyncConfig;
import org.apache.hudi.hive.MultiPartKeysValueExtractor;
import org.apache.hudi.utilities.schema.FilebasedSchemaProvider;
import org.apache.hudi.utilities.testutils.UtilitiesTestBase;
@@ -178,11 +178,11 @@ public class HoodieDeltaStreamerTestBase extends UtilitiesTestBase {
props.setProperty("hoodie.deltastreamer.schemaprovider.target.schema.file", dfsBasePath + "/target.avsc");
// Hive Configs
props.setProperty(DataSourceWriteOptions.HIVE_URL().key(), "jdbc:hive2://127.0.0.1:9999/");
props.setProperty(DataSourceWriteOptions.HIVE_DATABASE().key(), "testdb1");
props.setProperty(DataSourceWriteOptions.HIVE_TABLE().key(), "hive_trips");
props.setProperty(DataSourceWriteOptions.HIVE_PARTITION_FIELDS().key(), "datestr");
props.setProperty(DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS().key(),
props.setProperty(HiveSyncConfig.HIVE_URL.key(), "jdbc:hive2://127.0.0.1:9999/");
props.setProperty(HiveSyncConfig.META_SYNC_DATABASE_NAME.key(), "testdb1");
props.setProperty(HiveSyncConfig.META_SYNC_TABLE_NAME.key(), "hive_trips");
props.setProperty(HiveSyncConfig.META_SYNC_PARTITION_FIELDS.key(), "datestr");
props.setProperty(HiveSyncConfig.META_SYNC_PARTITION_EXTRACTOR_CLASS.key(),
MultiPartKeysValueExtractor.class.getName());
UtilitiesTestBase.Helpers.savePropsToDFS(props, dfs, dfsBasePath + "/" + PROPS_FILENAME_TEST_SOURCE);
}
@@ -237,11 +237,11 @@ public class HoodieDeltaStreamerTestBase extends UtilitiesTestBase {
protected static void populateCommonHiveProps(TypedProperties props) {
// Hive Configs
props.setProperty(DataSourceWriteOptions.HIVE_URL().key(), "jdbc:hive2://127.0.0.1:9999/");
props.setProperty(DataSourceWriteOptions.HIVE_DATABASE().key(), "testdb2");
props.setProperty(DataSourceWriteOptions.HIVE_ASSUME_DATE_PARTITION().key(), "false");
props.setProperty(DataSourceWriteOptions.HIVE_PARTITION_FIELDS().key(), "datestr");
props.setProperty(DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS().key(),
props.setProperty(HiveSyncConfig.HIVE_URL.key(), "jdbc:hive2://127.0.0.1:9999/");
props.setProperty(HiveSyncConfig.META_SYNC_DATABASE_NAME.key(), "testdb2");
props.setProperty(HiveSyncConfig.META_SYNC_ASSUME_DATE_PARTITION.key(), "false");
props.setProperty(HiveSyncConfig.META_SYNC_PARTITION_FIELDS.key(), "datestr");
props.setProperty(HiveSyncConfig.META_SYNC_PARTITION_EXTRACTOR_CLASS.key(),
MultiPartKeysValueExtractor.class.getName());
}