1
0

[HUDI-4324] Remove use_jdbc config from hudi sync (#6072)

* [HUDI-4324] Remove use_jdbc config from hudi sync
* Users should use HIVE_SYNC_MODE instead
This commit is contained in:
Shiyan Xu
2022-07-10 00:46:09 -05:00
committed by GitHub
parent 10aec07fd2
commit 046044c83d
20 changed files with 29 additions and 72 deletions

View File

@@ -45,7 +45,6 @@ import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_TABLE_PROPERTIES;
import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_TABLE_SERDE_PROPERTIES;
import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_URL;
import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_USER;
import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_USE_JDBC;
import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_USE_PRE_APACHE_INPUT_FORMAT;
import static org.apache.hudi.hive.HiveSyncConfigHolder.METASTORE_URIS;
@@ -95,9 +94,6 @@ public class HiveSyncConfig extends HoodieSyncConfig {
+ "com.uber.hoodie to org.apache.hudi. Stop using this after you migrated the table definition to "
+ "org.apache.hudi input format.")
public Boolean usePreApacheInputFormat;
@Deprecated
@Parameter(names = {"--use-jdbc"}, description = "Hive jdbc connect url")
public Boolean useJdbc;
@Parameter(names = {"--metastore-uris"}, description = "Hive metastore uris")
public String metastoreUris;
@Parameter(names = {"--sync-mode"}, description = "Mode to choose for Hive ops. Valid values are hms,glue,jdbc and hiveql")
@@ -142,7 +138,6 @@ public class HiveSyncConfig extends HoodieSyncConfig {
props.setPropertyIfNonNull(HIVE_PASS.key(), hivePass);
props.setPropertyIfNonNull(HIVE_URL.key(), jdbcUrl);
props.setPropertyIfNonNull(HIVE_USE_PRE_APACHE_INPUT_FORMAT.key(), usePreApacheInputFormat);
props.setPropertyIfNonNull(HIVE_USE_JDBC.key(), useJdbc);
props.setPropertyIfNonNull(HIVE_SYNC_MODE.key(), syncMode);
props.setPropertyIfNonNull(METASTORE_URIS.key(), metastoreUris);
props.setPropertyIfNonNull(HIVE_AUTO_CREATE_DATABASE.key(), autoCreateDatabase);

View File

@@ -52,15 +52,6 @@ public class HiveSyncConfigHolder {
.withDocumentation("Flag to choose InputFormat under com.uber.hoodie package instead of org.apache.hudi package. "
+ "Use this when you are in the process of migrating from "
+ "com.uber.hoodie to org.apache.hudi. Stop using this after you migrated the table definition to org.apache.hudi input format");
/**
* @deprecated Use {@link #HIVE_SYNC_MODE} instead of this config from 0.9.0
*/
@Deprecated
public static final ConfigProperty<String> HIVE_USE_JDBC = ConfigProperty
.key("hoodie.datasource.hive_sync.use_jdbc")
.defaultValue("true")
.deprecatedAfter("0.9.0")
.withDocumentation("Use JDBC when hive synchronization is enabled");
public static final ConfigProperty<String> METASTORE_URIS = ConfigProperty
.key("hoodie.datasource.hive_sync.metastore.uris")
.defaultValue("thrift://localhost:9083")
@@ -109,7 +100,7 @@ public class HiveSyncConfigHolder {
.withDocumentation("The number of partitions one batch when synchronous partitions to hive.");
public static final ConfigProperty<String> HIVE_SYNC_MODE = ConfigProperty
.key("hoodie.datasource.hive_sync.mode")
.noDefaultValue()
.defaultValue("jdbc")
.withDocumentation("Mode to choose for Hive ops. Valid values are hms, jdbc and hiveql.");
public static final ConfigProperty<Boolean> HIVE_SYNC_BUCKET_SYNC = ConfigProperty
.key("hoodie.datasource.hive_sync.bucket_sync")

View File

@@ -21,7 +21,6 @@ package org.apache.hudi.hive;
import org.apache.hudi.common.table.TableSchemaResolver;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.hive.ddl.DDLExecutor;
import org.apache.hudi.hive.ddl.HMSDDLExecutor;
@@ -49,7 +48,6 @@ import java.util.stream.Collectors;
import static org.apache.hudi.hadoop.utils.HoodieHiveUtils.GLOBALLY_CONSISTENT_READ_TIMESTAMP;
import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_SYNC_MODE;
import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_USE_JDBC;
import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_DATABASE_NAME;
import static org.apache.hudi.sync.common.util.TableUtils.tableId;
@@ -72,23 +70,19 @@ public class HoodieHiveSyncClient extends HoodieSyncClient {
// Support JDBC, HiveQL and metastore based implementations for backwards compatibility. Future users should
// disable jdbc and depend on metastore client for all hive registrations
try {
if (!StringUtils.isNullOrEmpty(config.getString(HIVE_SYNC_MODE))) {
HiveSyncMode syncMode = HiveSyncMode.of(config.getString(HIVE_SYNC_MODE));
switch (syncMode) {
case HMS:
ddlExecutor = new HMSDDLExecutor(config);
break;
case HIVEQL:
ddlExecutor = new HiveQueryDDLExecutor(config);
break;
case JDBC:
ddlExecutor = new JDBCExecutor(config);
break;
default:
throw new HoodieHiveSyncException("Invalid sync mode given " + config.getString(HIVE_SYNC_MODE));
}
} else {
ddlExecutor = config.getBoolean(HIVE_USE_JDBC) ? new JDBCExecutor(config) : new HiveQueryDDLExecutor(config);
HiveSyncMode syncMode = HiveSyncMode.of(config.getStringOrDefault(HIVE_SYNC_MODE));
switch (syncMode) {
case HMS:
ddlExecutor = new HMSDDLExecutor(config);
break;
case HIVEQL:
ddlExecutor = new HiveQueryDDLExecutor(config);
break;
case JDBC:
ddlExecutor = new JDBCExecutor(config);
break;
default:
throw new HoodieHiveSyncException("Invalid sync mode given " + config.getString(HIVE_SYNC_MODE));
}
this.client = Hive.get(config.getHiveConf()).getMSC();
} catch (Exception e) {

View File

@@ -47,7 +47,7 @@ import java.util.stream.Collectors;
import static org.apache.hudi.sync.common.util.TableUtils.tableId;
/**
* This class offers DDL executor backed by the hive.ql Driver This class preserves the old useJDBC = false way of doing things.
* This class offers DDL executor backed by the HiveQL Driver.
*/
public class HiveQueryDDLExecutor extends QueryBasedDDLExecutor {

View File

@@ -43,7 +43,7 @@ import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_USER;
import static org.apache.hudi.hive.util.HiveSchemaUtil.HIVE_ESCAPE_CHARACTER;
/**
* This class offers DDL executor backed by the jdbc This class preserves the old useJDBC = true way of doing things.
* This class offers DDL executor backed by the jdbc.
*/
public class JDBCExecutor extends QueryBasedDDLExecutor {

View File

@@ -32,6 +32,7 @@ import java.io.IOException;
import java.io.InputStream;
import java.util.Properties;
import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_SYNC_MODE;
import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_URL;
import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_BASE_PATH;
@@ -92,6 +93,7 @@ public class HiveSyncGlobalCommitParams {
String jdbcUrl = forRemote ? loadedProps.getProperty(REMOTE_HIVE_SERVER_JDBC_URLS)
: loadedProps.getProperty(LOCAL_HIVE_SERVER_JDBC_URLS, loadedProps.getProperty(HIVE_URL.key()));
props.setPropertyIfNonNull(HIVE_URL.key(), jdbcUrl);
props.setProperty(HIVE_SYNC_MODE.key(), "jdbc");
LOG.info("building hivesync config forRemote: " + forRemote + " " + jdbcUrl + " "
+ basePath);
return props;

View File

@@ -120,7 +120,7 @@ public class TestHiveSyncTool {
return opts;
}
// (useJdbc, useSchemaFromCommitMetadata, syncAsDataSource)
// (useSchemaFromCommitMetadata, syncAsDataSource, syncMode)
private static Iterable<Object[]> syncDataSourceTableParams() {
List<Object[]> opts = new ArrayList<>();
for (Object mode : SYNC_MODES) {