1
0

[HUDI-4323] Make database table names optional in sync tool (#6073)

* [HUDI-4323] Make database table names optional in sync tool
* Infer from these properties from the table config
This commit is contained in:
Shiyan Xu
2022-07-10 23:33:31 -05:00
committed by GitHub
parent 63f95ab801
commit 51244eba82
3 changed files with 65 additions and 16 deletions

View File

@@ -414,8 +414,6 @@ object DataSourceWriteOptions {
@Deprecated
val HIVE_DATABASE: ConfigProperty[String] = HoodieSyncConfig.META_SYNC_DATABASE_NAME
@Deprecated
val hiveTableOptKeyInferFunc: JavaFunction[HoodieConfig, Option[String]] = HoodieSyncConfig.TABLE_NAME_INFERENCE_FUNCTION
@Deprecated
val HIVE_TABLE: ConfigProperty[String] = HoodieSyncConfig.META_SYNC_TABLE_NAME
@Deprecated
val HIVE_BASE_FILE_FORMAT: ConfigProperty[String] = HoodieSyncConfig.META_SYNC_BASE_FILE_FORMAT

View File

@@ -23,7 +23,6 @@ import org.apache.hudi.common.config.HoodieConfig;
import org.apache.hudi.common.config.HoodieMetadataConfig;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
@@ -37,6 +36,10 @@ import java.util.List;
import java.util.Properties;
import java.util.function.Function;
import static org.apache.hudi.common.table.HoodieTableConfig.DATABASE_NAME;
import static org.apache.hudi.common.table.HoodieTableConfig.HOODIE_TABLE_NAME_KEY;
import static org.apache.hudi.common.table.HoodieTableConfig.HOODIE_WRITE_TABLE_NAME_KEY;
/**
* Configs needed to sync data into external meta stores, catalogs, etc.
*/
@@ -56,22 +59,14 @@ public class HoodieSyncConfig extends HoodieConfig {
public static final ConfigProperty<String> META_SYNC_DATABASE_NAME = ConfigProperty
.key("hoodie.datasource.hive_sync.database")
.defaultValue("default")
.withInferFunction(cfg -> Option.ofNullable(cfg.getString(DATABASE_NAME)))
.withDocumentation("The name of the destination database that we should sync the hudi table to.");
// If the table name for the metastore destination is not provided, pick it up from write or table configs.
public static final Function<HoodieConfig, Option<String>> TABLE_NAME_INFERENCE_FUNCTION = cfg -> {
if (cfg.contains(HoodieTableConfig.HOODIE_WRITE_TABLE_NAME_KEY)) {
return Option.of(cfg.getString(HoodieTableConfig.HOODIE_WRITE_TABLE_NAME_KEY));
} else if (cfg.contains(HoodieTableConfig.HOODIE_TABLE_NAME_KEY)) {
return Option.of(cfg.getString(HoodieTableConfig.HOODIE_TABLE_NAME_KEY));
} else {
return Option.empty();
}
};
public static final ConfigProperty<String> META_SYNC_TABLE_NAME = ConfigProperty
.key("hoodie.datasource.hive_sync.table")
.defaultValue("unknown")
.withInferFunction(TABLE_NAME_INFERENCE_FUNCTION)
.withInferFunction(cfg -> Option.ofNullable(cfg.getString(HOODIE_WRITE_TABLE_NAME_KEY))
.or(() -> Option.ofNullable(cfg.getString(HOODIE_TABLE_NAME_KEY))))
.withDocumentation("The name of the destination table that we should sync the hudi table to.");
public static final ConfigProperty<String> META_SYNC_BASE_FILE_FORMAT = ConfigProperty
@@ -148,6 +143,7 @@ public class HoodieSyncConfig extends HoodieConfig {
public HoodieSyncConfig(Properties props, Configuration hadoopConf) {
super(props);
setDefaults(getClass().getName());
this.hadoopConf = hadoopConf;
}
@@ -173,9 +169,9 @@ public class HoodieSyncConfig extends HoodieConfig {
}
public static class HoodieSyncConfigParams {
@Parameter(names = {"--database"}, description = "name of the target database in meta store", required = true)
@Parameter(names = {"--database"}, description = "name of the target database in meta store")
public String databaseName;
@Parameter(names = {"--table"}, description = "name of the target table in meta store", required = true)
@Parameter(names = {"--table"}, description = "name of the target table in meta store")
public String tableName;
@Parameter(names = {"--base-path"}, description = "Base path of the hoodie table to sync", required = true)
public String basePath;

View File

@@ -0,0 +1,55 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hudi.sync.common;
import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hadoop.conf.Configuration;
import org.junit.jupiter.api.Test;
import java.util.Properties;
import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_DATABASE_NAME;
import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_TABLE_NAME;
import static org.junit.jupiter.api.Assertions.assertEquals;
class TestHoodieSyncConfig {
@Test
void testInferDatabaseAndTableNames() {
Properties props1 = new Properties();
props1.setProperty(HoodieTableConfig.DATABASE_NAME.key(), "db1");
props1.setProperty(HoodieTableConfig.HOODIE_TABLE_NAME_KEY, "tbl1");
HoodieSyncConfig config1 = new HoodieSyncConfig(props1, new Configuration());
assertEquals("db1", config1.getString(META_SYNC_DATABASE_NAME));
assertEquals("tbl1", config1.getString(META_SYNC_TABLE_NAME));
Properties props2 = new Properties();
props2.setProperty(HoodieTableConfig.DATABASE_NAME.key(), "db2");
props2.setProperty(HoodieTableConfig.HOODIE_WRITE_TABLE_NAME_KEY, "tbl2");
HoodieSyncConfig config2 = new HoodieSyncConfig(props2, new Configuration());
assertEquals("db2", config2.getString(META_SYNC_DATABASE_NAME));
assertEquals("tbl2", config2.getString(META_SYNC_TABLE_NAME));
HoodieSyncConfig config3 = new HoodieSyncConfig(new Properties(), new Configuration());
assertEquals("default", config3.getString(META_SYNC_DATABASE_NAME));
assertEquals("unknown", config3.getString(META_SYNC_TABLE_NAME));
}
}