1
0

[HUDI-3985] Refactor DLASyncTool to support read hoodie table as spark datasource table (#5532)

This commit is contained in:
huberylee
2022-05-20 22:25:32 +08:00
committed by GitHub
parent c7576f7613
commit 85b146d3d5
26 changed files with 1281 additions and 974 deletions

View File

@@ -0,0 +1,46 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3 http://maven.apache.org/xsd/assembly-1.1.3.xsd">
<id>jar-with-dependencies</id>
<formats>
<format>jar</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<dependencySets>
<dependencySet>
<outputDirectory>/</outputDirectory>
<unpack>true</unpack>
<scope>runtime</scope>
<excludes>
<exclude>junit:junit</exclude>
<exclude>com.google.code.findbugs:*</exclude>
<exclude>org.apache.hbase:*</exclude>
</excludes>
</dependencySet>
<dependencySet>
<unpack>true</unpack>
<scope>provided</scope>
</dependencySet>
</dependencySets>
</assembly>

View File

@@ -0,0 +1,128 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.sync.adb;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.hive.PartitionValueExtractor;
import org.apache.hudi.hive.SchemaDifference;
import org.apache.hudi.sync.common.AbstractSyncHoodieClient;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public abstract class AbstractAdbSyncHoodieClient extends AbstractSyncHoodieClient {
protected AdbSyncConfig adbSyncConfig;
protected PartitionValueExtractor partitionValueExtractor;
protected HoodieTimeline activeTimeline;
public AbstractAdbSyncHoodieClient(AdbSyncConfig syncConfig, FileSystem fs) {
super(syncConfig.basePath, syncConfig.assumeDatePartitioning,
syncConfig.useFileListingFromMetadata, false, fs);
this.adbSyncConfig = syncConfig;
final String clazz = adbSyncConfig.partitionValueExtractorClass;
try {
this.partitionValueExtractor = (PartitionValueExtractor) Class.forName(clazz).newInstance();
} catch (Exception e) {
throw new HoodieException("Fail to init PartitionValueExtractor class " + clazz, e);
}
activeTimeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
}
public List<PartitionEvent> getPartitionEvents(Map<List<String>, String> tablePartitions,
List<String> partitionStoragePartitions) {
Map<String, String> paths = new HashMap<>();
for (Map.Entry<List<String>, String> entry : tablePartitions.entrySet()) {
List<String> partitionValues = entry.getKey();
String fullTablePartitionPath = entry.getValue();
paths.put(String.join(", ", partitionValues), fullTablePartitionPath);
}
List<PartitionEvent> events = new ArrayList<>();
for (String storagePartition : partitionStoragePartitions) {
Path storagePartitionPath = FSUtils.getPartitionPath(adbSyncConfig.basePath, storagePartition);
String fullStoragePartitionPath = Path.getPathWithoutSchemeAndAuthority(storagePartitionPath).toUri().getPath();
// Check if the partition values or if hdfs path is the same
List<String> storagePartitionValues = partitionValueExtractor.extractPartitionValuesInPath(storagePartition);
if (adbSyncConfig.useHiveStylePartitioning) {
String partition = String.join("/", storagePartitionValues);
storagePartitionPath = FSUtils.getPartitionPath(adbSyncConfig.basePath, partition);
fullStoragePartitionPath = Path.getPathWithoutSchemeAndAuthority(storagePartitionPath).toUri().getPath();
}
if (!storagePartitionValues.isEmpty()) {
String storageValue = String.join(", ", storagePartitionValues);
if (!paths.containsKey(storageValue)) {
events.add(PartitionEvent.newPartitionAddEvent(storagePartition));
} else if (!paths.get(storageValue).equals(fullStoragePartitionPath)) {
events.add(PartitionEvent.newPartitionUpdateEvent(storagePartition));
}
}
}
return events;
}
public void close() {
}
public abstract Map<List<String>, String> scanTablePartitions(String tableName) throws Exception;
public abstract void updateTableDefinition(String tableName, SchemaDifference schemaDiff) throws Exception;
public abstract boolean databaseExists(String databaseName) throws Exception;
public abstract void createDatabase(String databaseName) throws Exception;
public abstract void dropTable(String tableName);
protected String getDatabasePath() {
String dbLocation = adbSyncConfig.dbLocation;
Path dbLocationPath;
if (StringUtils.isNullOrEmpty(dbLocation)) {
if (new Path(adbSyncConfig.basePath).isRoot()) {
dbLocationPath = new Path(adbSyncConfig.basePath);
} else {
dbLocationPath = new Path(adbSyncConfig.basePath).getParent();
}
} else {
dbLocationPath = new Path(dbLocation);
}
return generateAbsolutePathStr(dbLocationPath);
}
protected String generateAbsolutePathStr(Path path) {
String absolutePathStr = path.toString();
if (path.toUri().getScheme() == null) {
absolutePathStr = getDefaultFs() + absolutePathStr;
}
return absolutePathStr.endsWith("/") ? absolutePathStr : absolutePathStr + "/";
}
protected String getDefaultFs() {
return fs.getConf().get("fs.defaultFS");
}
}

View File

@@ -0,0 +1,240 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.sync.adb;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.sync.common.HoodieSyncConfig;
import com.beust.jcommander.Parameter;
/**
* Configs needed to sync data into Alibaba Cloud AnalyticDB(ADB).
*/
public class AdbSyncConfig extends HoodieSyncConfig {
@Parameter(names = {"--user"}, description = "Adb username", required = true)
public String adbUser;
@Parameter(names = {"--pass"}, description = "Adb password", required = true)
public String adbPass;
@Parameter(names = {"--jdbc-url"}, description = "Adb jdbc connect url", required = true)
public String jdbcUrl;
@Parameter(names = {"--skip-ro-suffix"}, description = "Whether skip the `_ro` suffix for read optimized table when syncing")
public Boolean skipROSuffix;
@Parameter(names = {"--skip-rt-sync"}, description = "Whether skip the rt table when syncing")
public Boolean skipRTSync;
@Parameter(names = {"--hive-style-partitioning"}, description = "Whether use hive style partitioning, true if like the following style: field1=value1/field2=value2")
public Boolean useHiveStylePartitioning;
@Parameter(names = {"--support-timestamp"}, description = "If true, converts int64(timestamp_micros) to timestamp type")
public Boolean supportTimestamp;
@Parameter(names = {"--spark-datasource"}, description = "Whether sync this table as spark data source table")
public Boolean syncAsSparkDataSourceTable;
@Parameter(names = {"--table-properties"}, description = "Table properties, to support read hoodie table as datasource table", required = true)
public String tableProperties;
@Parameter(names = {"--serde-properties"}, description = "Serde properties, to support read hoodie table as datasource table", required = true)
public String serdeProperties;
@Parameter(names = {"--spark-schema-length-threshold"}, description = "The maximum length allowed in a single cell when storing additional schema information in Hive's metastore")
public int sparkSchemaLengthThreshold;
@Parameter(names = {"--db-location"}, description = "Database location")
public String dbLocation;
@Parameter(names = {"--auto-create-database"}, description = "Whether auto create adb database")
public Boolean autoCreateDatabase = true;
@Parameter(names = {"--skip-last-commit-time-sync"}, description = "Whether skip last commit time syncing")
public Boolean skipLastCommitTimeSync = false;
@Parameter(names = {"--drop-table-before-creation"}, description = "Whether drop table before creation")
public Boolean dropTableBeforeCreation = false;
@Parameter(names = {"--help", "-h"}, help = true)
public Boolean help = false;
public static final ConfigProperty<String> ADB_SYNC_USER = ConfigProperty
.key("hoodie.datasource.adb.sync.username")
.noDefaultValue()
.withDocumentation("ADB username");
public static final ConfigProperty<String> ADB_SYNC_PASS = ConfigProperty
.key("hoodie.datasource.adb.sync.password")
.noDefaultValue()
.withDocumentation("ADB user password");
public static final ConfigProperty<String> ADB_SYNC_JDBC_URL = ConfigProperty
.key("hoodie.datasource.adb.sync.jdbc_url")
.noDefaultValue()
.withDocumentation("Adb jdbc connect url");
public static final ConfigProperty<Boolean> ADB_SYNC_SKIP_RO_SUFFIX = ConfigProperty
.key("hoodie.datasource.adb.sync.skip_ro_suffix")
.defaultValue(true)
.withDocumentation("Whether skip the `_ro` suffix for read optimized table when syncing");
public static final ConfigProperty<Boolean> ADB_SYNC_SKIP_RT_SYNC = ConfigProperty
.key("hoodie.datasource.adb.sync.skip_rt_sync")
.defaultValue(true)
.withDocumentation("Whether skip the rt table when syncing");
public static final ConfigProperty<Boolean> ADB_SYNC_USE_HIVE_STYLE_PARTITIONING = ConfigProperty
.key("hoodie.datasource.adb.sync.hive_style_partitioning")
.defaultValue(false)
.withDocumentation("Whether use hive style partitioning, true if like the following style: field1=value1/field2=value2");
public static final ConfigProperty<Boolean> ADB_SYNC_SUPPORT_TIMESTAMP = ConfigProperty
.key("hoodie.datasource.adb.sync.support_timestamp")
.defaultValue(false)
.withDocumentation("If true, converts int64(timestamp_micros) to timestamp type");
public static final ConfigProperty<Boolean> ADB_SYNC_SYNC_AS_SPARK_DATA_SOURCE_TABLE = ConfigProperty
.key("hoodie.datasource.adb.sync.sync_as_spark_datasource")
.defaultValue(true)
.withDocumentation("Whether sync this table as spark data source table");
public static final ConfigProperty<String> ADB_SYNC_TABLE_PROPERTIES = ConfigProperty
.key("hoodie.datasource.adb.sync.table_properties")
.noDefaultValue()
.withDocumentation("Table properties, to support read hoodie table as datasource table");
public static final ConfigProperty<String> ADB_SYNC_SERDE_PROPERTIES = ConfigProperty
.key("hoodie.datasource.adb.sync.serde_properties")
.noDefaultValue()
.withDocumentation("Serde properties, to support read hoodie table as datasource table");
public static final ConfigProperty<Integer> ADB_SYNC_SCHEMA_STRING_LENGTH_THRESHOLD = ConfigProperty
.key("hoodie.datasource.adb.sync.schema_string_length_threshold")
.defaultValue(4000)
.withDocumentation("The maximum length allowed in a single cell when storing additional schema information in Hive's metastore");
public static final ConfigProperty<String> ADB_SYNC_DB_LOCATION = ConfigProperty
.key("hoodie.datasource.adb.sync.db_location")
.noDefaultValue()
.withDocumentation("Database location");
public static final ConfigProperty<Boolean> ADB_SYNC_AUTO_CREATE_DATABASE = ConfigProperty
.key("hoodie.datasource.adb.sync.auto_create_database")
.defaultValue(true)
.withDocumentation("Whether auto create adb database");
public static final ConfigProperty<Boolean> ADB_SYNC_SKIP_LAST_COMMIT_TIME_SYNC = ConfigProperty
.key("hoodie.datasource.adb.sync.skip_last_commit_time_sync")
.defaultValue(false)
.withDocumentation("Whether skip last commit time syncing");
public static final ConfigProperty<Boolean> ADB_SYNC_DROP_TABLE_BEFORE_CREATION = ConfigProperty
.key("hoodie.datasource.adb.sync.drop_table_before_creation")
.defaultValue(false)
.withDocumentation("Whether drop table before creation");
public AdbSyncConfig() {
this(new TypedProperties());
}
public AdbSyncConfig(TypedProperties props) {
super(props);
adbUser = getString(ADB_SYNC_USER);
adbPass = getString(ADB_SYNC_PASS);
jdbcUrl = getString(ADB_SYNC_JDBC_URL);
skipROSuffix = getBooleanOrDefault(ADB_SYNC_SKIP_RO_SUFFIX);
skipRTSync = getBooleanOrDefault(ADB_SYNC_SKIP_RT_SYNC);
useHiveStylePartitioning = getBooleanOrDefault(ADB_SYNC_USE_HIVE_STYLE_PARTITIONING);
supportTimestamp = getBooleanOrDefault(ADB_SYNC_SUPPORT_TIMESTAMP);
syncAsSparkDataSourceTable = getBooleanOrDefault(ADB_SYNC_SYNC_AS_SPARK_DATA_SOURCE_TABLE);
tableProperties = getString(ADB_SYNC_TABLE_PROPERTIES);
serdeProperties = getString(ADB_SYNC_SERDE_PROPERTIES);
sparkSchemaLengthThreshold = getIntOrDefault(ADB_SYNC_SCHEMA_STRING_LENGTH_THRESHOLD);
dbLocation = getString(ADB_SYNC_DB_LOCATION);
autoCreateDatabase = getBooleanOrDefault(ADB_SYNC_AUTO_CREATE_DATABASE);
skipLastCommitTimeSync = getBooleanOrDefault(ADB_SYNC_SKIP_LAST_COMMIT_TIME_SYNC);
dropTableBeforeCreation = getBooleanOrDefault(ADB_SYNC_DROP_TABLE_BEFORE_CREATION);
}
public static TypedProperties toProps(AdbSyncConfig cfg) {
TypedProperties properties = new TypedProperties();
properties.put(META_SYNC_DATABASE_NAME.key(), cfg.databaseName);
properties.put(META_SYNC_TABLE_NAME.key(), cfg.tableName);
properties.put(ADB_SYNC_USER.key(), cfg.adbUser);
properties.put(ADB_SYNC_PASS.key(), cfg.adbPass);
properties.put(ADB_SYNC_JDBC_URL.key(), cfg.jdbcUrl);
properties.put(META_SYNC_BASE_PATH.key(), cfg.basePath);
properties.put(META_SYNC_PARTITION_FIELDS.key(), String.join(",", cfg.partitionFields));
properties.put(META_SYNC_PARTITION_EXTRACTOR_CLASS.key(), cfg.partitionValueExtractorClass);
properties.put(META_SYNC_ASSUME_DATE_PARTITION.key(), String.valueOf(cfg.assumeDatePartitioning));
properties.put(ADB_SYNC_SKIP_RO_SUFFIX.key(), String.valueOf(cfg.skipROSuffix));
properties.put(ADB_SYNC_SKIP_RT_SYNC.key(), String.valueOf(cfg.skipRTSync));
properties.put(ADB_SYNC_USE_HIVE_STYLE_PARTITIONING.key(), String.valueOf(cfg.useHiveStylePartitioning));
properties.put(META_SYNC_USE_FILE_LISTING_FROM_METADATA.key(), String.valueOf(cfg.useFileListingFromMetadata));
properties.put(ADB_SYNC_SUPPORT_TIMESTAMP.key(), String.valueOf(cfg.supportTimestamp));
properties.put(ADB_SYNC_TABLE_PROPERTIES.key(), cfg.tableProperties);
properties.put(ADB_SYNC_SERDE_PROPERTIES.key(), cfg.serdeProperties);
properties.put(ADB_SYNC_SYNC_AS_SPARK_DATA_SOURCE_TABLE.key(), String.valueOf(cfg.syncAsSparkDataSourceTable));
properties.put(ADB_SYNC_SCHEMA_STRING_LENGTH_THRESHOLD.key(), String.valueOf(cfg.sparkSchemaLengthThreshold));
properties.put(META_SYNC_SPARK_VERSION.key(), cfg.sparkVersion);
properties.put(ADB_SYNC_DB_LOCATION.key(), cfg.dbLocation);
properties.put(ADB_SYNC_AUTO_CREATE_DATABASE.key(), String.valueOf(cfg.autoCreateDatabase));
properties.put(ADB_SYNC_SKIP_LAST_COMMIT_TIME_SYNC.key(), String.valueOf(cfg.skipLastCommitTimeSync));
properties.put(ADB_SYNC_DROP_TABLE_BEFORE_CREATION.key(), String.valueOf(cfg.dropTableBeforeCreation));
return properties;
}
@Override
public String toString() {
return "AdbSyncConfig{"
+ "adbUser='" + adbUser + '\''
+ ", adbPass='" + adbPass + '\''
+ ", jdbcUrl='" + jdbcUrl + '\''
+ ", skipROSuffix=" + skipROSuffix
+ ", skipRTSync=" + skipRTSync
+ ", useHiveStylePartitioning=" + useHiveStylePartitioning
+ ", supportTimestamp=" + supportTimestamp
+ ", syncAsSparkDataSourceTable=" + syncAsSparkDataSourceTable
+ ", tableProperties='" + tableProperties + '\''
+ ", serdeProperties='" + serdeProperties + '\''
+ ", sparkSchemaLengthThreshold=" + sparkSchemaLengthThreshold
+ ", dbLocation='" + dbLocation + '\''
+ ", autoCreateDatabase=" + autoCreateDatabase
+ ", skipLastCommitTimeSync=" + skipLastCommitTimeSync
+ ", dropTableBeforeCreation=" + dropTableBeforeCreation
+ ", help=" + help
+ ", databaseName='" + databaseName + '\''
+ ", tableName='" + tableName + '\''
+ ", basePath='" + basePath + '\''
+ ", baseFileFormat='" + baseFileFormat + '\''
+ ", partitionFields=" + partitionFields
+ ", partitionValueExtractorClass='" + partitionValueExtractorClass + '\''
+ ", assumeDatePartitioning=" + assumeDatePartitioning
+ ", decodePartition=" + decodePartition
+ ", useFileListingFromMetadata=" + useFileListingFromMetadata
+ ", isConditionalSync=" + isConditionalSync
+ ", sparkVersion='" + sparkVersion + '\''
+ '}';
}
}

View File

@@ -0,0 +1,283 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.sync.adb;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.HoodieFileFormat;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils;
import org.apache.hudi.hive.SchemaDifference;
import org.apache.hudi.hive.util.HiveSchemaUtil;
import org.apache.hudi.sync.common.AbstractSyncHoodieClient.PartitionEvent;
import org.apache.hudi.sync.common.AbstractSyncHoodieClient.PartitionEvent.PartitionEventType;
import org.apache.hudi.sync.common.AbstractSyncTool;
import org.apache.hudi.sync.common.util.ConfigUtils;
import com.beust.jcommander.JCommander;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat;
import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe;
import org.apache.parquet.schema.MessageType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
/**
* Adb sync tool is mainly used to sync hoodie tables to Alibaba Cloud AnalyticDB(ADB),
* it can be used as API `AdbSyncTool.syncHoodieTable(AdbSyncConfig)` or as command
* line `java -cp hoodie-hive.jar AdbSyncTool [args]`
*
* <p>
* This utility will get the schema from the latest commit and will sync ADB table schema,
* incremental partitions will be synced as well.
*/
@SuppressWarnings("WeakerAccess")
public class AdbSyncTool extends AbstractSyncTool {
private static final Logger LOG = LoggerFactory.getLogger(AdbSyncTool.class);
public static final String SUFFIX_SNAPSHOT_TABLE = "_rt";
public static final String SUFFIX_READ_OPTIMIZED_TABLE = "_ro";
private final AdbSyncConfig adbSyncConfig;
private final AbstractAdbSyncHoodieClient hoodieAdbClient;
private final String snapshotTableName;
private final Option<String> roTableTableName;
public AdbSyncTool(TypedProperties props, Configuration conf, FileSystem fs) {
super(props, conf, fs);
this.adbSyncConfig = new AdbSyncConfig(props);
this.hoodieAdbClient = getHoodieAdbClient(adbSyncConfig, fs);
switch (hoodieAdbClient.getTableType()) {
case COPY_ON_WRITE:
this.snapshotTableName = adbSyncConfig.tableName;
this.roTableTableName = Option.empty();
break;
case MERGE_ON_READ:
this.snapshotTableName = adbSyncConfig.tableName + SUFFIX_SNAPSHOT_TABLE;
this.roTableTableName = adbSyncConfig.skipROSuffix ? Option.of(adbSyncConfig.tableName)
: Option.of(adbSyncConfig.tableName + SUFFIX_READ_OPTIMIZED_TABLE);
break;
default:
throw new HoodieAdbSyncException("Unknown table type:" + hoodieAdbClient.getTableType()
+ ", basePath:" + hoodieAdbClient.getBasePath());
}
}
private AbstractAdbSyncHoodieClient getHoodieAdbClient(AdbSyncConfig adbSyncConfig, FileSystem fs) {
return new HoodieAdbJdbcClient(adbSyncConfig, fs);
}
@Override
public void syncHoodieTable() {
try {
switch (hoodieAdbClient.getTableType()) {
case COPY_ON_WRITE:
syncHoodieTable(snapshotTableName, false, false);
break;
case MERGE_ON_READ:
// Sync a ro table for MOR table
syncHoodieTable(roTableTableName.get(), false, true);
// Sync a rt table for MOR table
if (!adbSyncConfig.skipRTSync) {
syncHoodieTable(snapshotTableName, true, false);
}
break;
default:
throw new HoodieAdbSyncException("Unknown table type:" + hoodieAdbClient.getTableType()
+ ", basePath:" + hoodieAdbClient.getBasePath());
}
} catch (Exception re) {
throw new HoodieAdbSyncException("Sync hoodie table to ADB failed, tableName:" + adbSyncConfig.tableName, re);
} finally {
hoodieAdbClient.close();
}
}
private void syncHoodieTable(String tableName, boolean useRealtimeInputFormat,
boolean readAsOptimized) throws Exception {
LOG.info("Try to sync hoodie table, tableName:{}, path:{}, tableType:{}",
tableName, hoodieAdbClient.getBasePath(), hoodieAdbClient.getTableType());
if (adbSyncConfig.autoCreateDatabase) {
try {
synchronized (AdbSyncTool.class) {
if (!hoodieAdbClient.databaseExists(adbSyncConfig.databaseName)) {
hoodieAdbClient.createDatabase(adbSyncConfig.databaseName);
}
}
} catch (Exception e) {
throw new HoodieAdbSyncException("Failed to create database:" + adbSyncConfig.databaseName
+ ", useRealtimeInputFormat = " + useRealtimeInputFormat, e);
}
} else if (!hoodieAdbClient.databaseExists(adbSyncConfig.databaseName)) {
throw new HoodieAdbSyncException("ADB database does not exists:" + adbSyncConfig.databaseName);
}
// Currently HoodieBootstrapRelation does support reading bootstrap MOR rt table,
// so we disable the syncAsSparkDataSourceTable here to avoid read such kind table
// by the data source way (which will use the HoodieBootstrapRelation).
// TODO after we support bootstrap MOR rt table in HoodieBootstrapRelation[HUDI-2071],
// we can remove this logical.
if (hoodieAdbClient.isBootstrap()
&& hoodieAdbClient.getTableType() == HoodieTableType.MERGE_ON_READ
&& !readAsOptimized) {
adbSyncConfig.syncAsSparkDataSourceTable = false;
LOG.info("Disable sync as spark datasource table for mor rt table:{}", tableName);
}
if (adbSyncConfig.dropTableBeforeCreation) {
LOG.info("Drop table before creation, tableName:{}", tableName);
hoodieAdbClient.dropTable(tableName);
}
boolean tableExists = hoodieAdbClient.tableExists(tableName);
// Get the parquet schema for this table looking at the latest commit
MessageType schema = hoodieAdbClient.getDataSchema();
// Sync schema if needed
syncSchema(tableName, tableExists, useRealtimeInputFormat, readAsOptimized, schema);
LOG.info("Sync schema complete, start syncing partitions for table:{}", tableName);
// Get the last time we successfully synced partitions
Option<String> lastCommitTimeSynced = Option.empty();
if (tableExists) {
lastCommitTimeSynced = hoodieAdbClient.getLastCommitTimeSynced(tableName);
}
LOG.info("Last commit time synced was found:{}", lastCommitTimeSynced.orElse("null"));
// Scan synced partitions
List<String> writtenPartitionsSince;
if (adbSyncConfig.partitionFields.isEmpty()) {
writtenPartitionsSince = new ArrayList<>();
} else {
writtenPartitionsSince = hoodieAdbClient.getPartitionsWrittenToSince(lastCommitTimeSynced);
}
LOG.info("Scan partitions complete, partitionNum:{}", writtenPartitionsSince.size());
// Sync the partitions if needed
syncPartitions(tableName, writtenPartitionsSince);
// Update sync commit time
// whether to skip syncing commit time stored in tbl properties, since it is time consuming.
if (!adbSyncConfig.skipLastCommitTimeSync) {
hoodieAdbClient.updateLastCommitTimeSynced(tableName);
}
LOG.info("Sync complete for table:{}", tableName);
}
/**
* Get the latest schema from the last commit and check if its in sync with the ADB
* table schema. If not, evolves the table schema.
*
* @param tableName The table to be synced
* @param tableExists Whether target table exists
* @param useRealTimeInputFormat Whether using realtime input format
* @param readAsOptimized Whether read as optimized table
* @param schema The extracted schema
*/
private void syncSchema(String tableName, boolean tableExists, boolean useRealTimeInputFormat,
boolean readAsOptimized, MessageType schema) throws Exception {
// Append spark table properties & serde properties
Map<String, String> tableProperties = ConfigUtils.toMap(adbSyncConfig.tableProperties);
Map<String, String> serdeProperties = ConfigUtils.toMap(adbSyncConfig.serdeProperties);
if (adbSyncConfig.syncAsSparkDataSourceTable) {
Map<String, String> sparkTableProperties = getSparkTableProperties(adbSyncConfig.partitionFields,
adbSyncConfig.sparkVersion, adbSyncConfig.sparkSchemaLengthThreshold, schema);
Map<String, String> sparkSerdeProperties = getSparkSerdeProperties(readAsOptimized, adbSyncConfig.basePath);
tableProperties.putAll(sparkTableProperties);
serdeProperties.putAll(sparkSerdeProperties);
LOG.info("Sync as spark datasource table, tableName:{}, tableExists:{}, tableProperties:{}, sederProperties:{}",
tableName, tableExists, tableProperties, serdeProperties);
}
// Check and sync schema
if (!tableExists) {
LOG.info("ADB table [{}] is not found, creating it", tableName);
String inputFormatClassName = HoodieInputFormatUtils.getInputFormatClassName(HoodieFileFormat.PARQUET, useRealTimeInputFormat);
// Custom serde will not work with ALTER TABLE REPLACE COLUMNS
// https://github.com/apache/hive/blob/release-1.1.0/ql/src/java/org/apache/hadoop/hive
// /ql/exec/DDLTask.java#L3488
hoodieAdbClient.createTable(tableName, schema, inputFormatClassName, MapredParquetOutputFormat.class.getName(),
ParquetHiveSerDe.class.getName(), serdeProperties, tableProperties);
} else {
// Check if the table schema has evolved
Map<String, String> tableSchema = hoodieAdbClient.getTableSchema(tableName);
SchemaDifference schemaDiff = HiveSchemaUtil.getSchemaDifference(schema, tableSchema, adbSyncConfig.partitionFields,
adbSyncConfig.supportTimestamp);
if (!schemaDiff.isEmpty()) {
LOG.info("Schema difference found for table:{}", tableName);
hoodieAdbClient.updateTableDefinition(tableName, schemaDiff);
} else {
LOG.info("No Schema difference for table:{}", tableName);
}
}
}
/**
* Syncs the list of storage partitions passed in (checks if the partition is in adb, if not adds it or if the
* partition path does not match, it updates the partition path).
*/
private void syncPartitions(String tableName, List<String> writtenPartitionsSince) {
try {
if (adbSyncConfig.partitionFields.isEmpty()) {
LOG.info("Not a partitioned table.");
return;
}
Map<List<String>, String> partitions = hoodieAdbClient.scanTablePartitions(tableName);
List<PartitionEvent> partitionEvents = hoodieAdbClient.getPartitionEvents(partitions, writtenPartitionsSince);
List<String> newPartitions = filterPartitions(partitionEvents, PartitionEventType.ADD);
LOG.info("New Partitions:{}", newPartitions);
hoodieAdbClient.addPartitionsToTable(tableName, newPartitions);
List<String> updatePartitions = filterPartitions(partitionEvents, PartitionEventType.UPDATE);
LOG.info("Changed Partitions:{}", updatePartitions);
hoodieAdbClient.updatePartitionsToTable(tableName, updatePartitions);
} catch (Exception e) {
throw new HoodieAdbSyncException("Failed to sync partitions for table:" + tableName, e);
}
}
private List<String> filterPartitions(List<PartitionEvent> events, PartitionEventType eventType) {
return events.stream().filter(s -> s.eventType == eventType)
.map(s -> s.storagePartition).collect(Collectors.toList());
}
public static void main(String[] args) {
// parse the params
final AdbSyncConfig cfg = new AdbSyncConfig();
JCommander cmd = new JCommander(cfg, null, args);
if (cfg.help || args.length == 0) {
cmd.usage();
System.exit(1);
}
Configuration hadoopConf = new Configuration();
FileSystem fs = FSUtils.getFs(cfg.basePath, hadoopConf);
new AdbSyncTool(AdbSyncConfig.toProps(cfg), hadoopConf, fs).syncHoodieTable();
}
}

View File

@@ -0,0 +1,440 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.sync.adb;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.common.util.ValidationUtils;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.hive.HiveSyncConfig;
import org.apache.hudi.hive.HoodieHiveSyncException;
import org.apache.hudi.hive.SchemaDifference;
import org.apache.hudi.hive.util.HiveSchemaUtil;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.parquet.schema.MessageType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.sql.Connection;
import java.sql.DatabaseMetaData;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
public class HoodieAdbJdbcClient extends AbstractAdbSyncHoodieClient {
private static final Logger LOG = LoggerFactory.getLogger(HoodieAdbJdbcClient.class);
public static final String HOODIE_LAST_COMMIT_TIME_SYNC = "hoodie_last_sync";
// Make sure we have the jdbc driver in classpath
private static final String DRIVER_NAME = "com.mysql.jdbc.Driver";
public static final String ADB_ESCAPE_CHARACTER = "";
private static final String TBL_PROPERTIES_STR = "TBLPROPERTIES";
static {
try {
Class.forName(DRIVER_NAME);
} catch (ClassNotFoundException e) {
throw new IllegalStateException("Could not find " + DRIVER_NAME + " in classpath. ", e);
}
}
private Connection connection;
public HoodieAdbJdbcClient(AdbSyncConfig syncConfig, FileSystem fs) {
super(syncConfig, fs);
createAdbConnection();
LOG.info("Init adb jdbc client success, jdbcUrl:{}", syncConfig.jdbcUrl);
}
private void createAdbConnection() {
if (connection == null) {
try {
Class.forName(DRIVER_NAME);
} catch (ClassNotFoundException e) {
LOG.error("Unable to load jdbc driver class", e);
return;
}
try {
this.connection = DriverManager.getConnection(
adbSyncConfig.jdbcUrl, adbSyncConfig.adbUser, adbSyncConfig.adbPass);
} catch (SQLException e) {
throw new HoodieException("Cannot create adb connection ", e);
}
}
}
@Override
public void createTable(String tableName, MessageType storageSchema, String inputFormatClass,
String outputFormatClass, String serdeClass,
Map<String, String> serdeProperties, Map<String, String> tableProperties) {
try {
LOG.info("Creating table:{}", tableName);
String createSQLQuery = HiveSchemaUtil.generateCreateDDL(tableName, storageSchema,
getHiveSyncConfig(), inputFormatClass, outputFormatClass, serdeClass, serdeProperties, tableProperties);
executeAdbSql(createSQLQuery);
} catch (IOException e) {
throw new HoodieException("Fail to create table:" + tableName, e);
}
}
@Override
public void dropTable(String tableName) {
LOG.info("Dropping table:{}", tableName);
String dropTable = "drop table if exists `" + adbSyncConfig.databaseName + "`.`" + tableName + "`";
executeAdbSql(dropTable);
}
public Map<String, String> getTableSchema(String tableName) {
Map<String, String> schema = new HashMap<>();
ResultSet result = null;
try {
DatabaseMetaData databaseMetaData = connection.getMetaData();
result = databaseMetaData.getColumns(adbSyncConfig.databaseName,
adbSyncConfig.databaseName, tableName, null);
while (result.next()) {
String columnName = result.getString(4);
String columnType = result.getString(6);
if ("DECIMAL".equals(columnType)) {
int columnSize = result.getInt("COLUMN_SIZE");
int decimalDigits = result.getInt("DECIMAL_DIGITS");
columnType += String.format("(%s,%s)", columnSize, decimalDigits);
}
schema.put(columnName, columnType);
}
return schema;
} catch (SQLException e) {
throw new HoodieException("Fail to get table schema:" + tableName, e);
} finally {
closeQuietly(result, null);
}
}
@Override
public void addPartitionsToTable(String tableName, List<String> partitionsToAdd) {
if (partitionsToAdd.isEmpty()) {
LOG.info("No partitions to add for table:{}", tableName);
return;
}
LOG.info("Adding partitions to table:{}, partitionNum:{}", tableName, partitionsToAdd.size());
String sql = constructAddPartitionsSql(tableName, partitionsToAdd);
executeAdbSql(sql);
}
private void executeAdbSql(String sql) {
Statement stmt = null;
try {
stmt = connection.createStatement();
LOG.info("Executing sql:{}", sql);
stmt.execute(sql);
} catch (SQLException e) {
throw new HoodieException("Fail to execute sql:" + sql, e);
} finally {
closeQuietly(null, stmt);
}
}
private <T> T executeQuerySQL(String sql, Function<ResultSet, T> function) {
Statement stmt = null;
try {
stmt = connection.createStatement();
LOG.info("Executing sql:{}", sql);
return function.apply(stmt.executeQuery(sql));
} catch (SQLException e) {
throw new HoodieException("Fail to execute sql:" + sql, e);
} finally {
closeQuietly(null, stmt);
}
}
public void createDatabase(String databaseName) {
String rootPath = getDatabasePath();
LOG.info("Creating database:{}, databaseLocation:{}", databaseName, rootPath);
String sql = constructCreateDatabaseSql(rootPath);
executeAdbSql(sql);
}
public boolean databaseExists(String databaseName) {
String sql = constructShowCreateDatabaseSql(databaseName);
Function<ResultSet, Boolean> transform = resultSet -> {
try {
return resultSet.next();
} catch (Exception e) {
if (e.getMessage().contains("Unknown database `" + databaseName + "`")) {
return false;
} else {
throw new HoodieException("Fail to execute sql:" + sql, e);
}
}
};
return executeQuerySQL(sql, transform);
}
@Override
public boolean doesTableExist(String tableName) {
String sql = constructShowLikeTableSql(tableName);
Function<ResultSet, Boolean> transform = resultSet -> {
try {
return resultSet.next();
} catch (Exception e) {
throw new HoodieException("Fail to execute sql:" + sql, e);
}
};
return executeQuerySQL(sql, transform);
}
@Override
public boolean tableExists(String tableName) {
return doesTableExist(tableName);
}
@Override
public Option<String> getLastCommitTimeSynced(String tableName) {
String sql = constructShowCreateTableSql(tableName);
Function<ResultSet, Option<String>> transform = resultSet -> {
try {
if (resultSet.next()) {
String table = resultSet.getString(2);
Map<String, String> attr = new HashMap<>();
int index = table.indexOf(TBL_PROPERTIES_STR);
if (index != -1) {
String sub = table.substring(index + TBL_PROPERTIES_STR.length());
sub = sub
.replaceAll("\\(", "")
.replaceAll("\\)", "")
.replaceAll("'", "");
String[] str = sub.split(",");
for (String s : str) {
String key = s.split("=")[0].trim();
String value = s.split("=")[1].trim();
attr.put(key, value);
}
}
return Option.ofNullable(attr.getOrDefault(HOODIE_LAST_COMMIT_TIME_SYNC, null));
}
return Option.empty();
} catch (Exception e) {
throw new HoodieException("Fail to execute sql:" + sql, e);
}
};
return executeQuerySQL(sql, transform);
}
@Override
public void updateLastCommitTimeSynced(String tableName) {
// Set the last commit time from the TBLProperties
String lastCommitSynced = activeTimeline.lastInstant().get().getTimestamp();
try {
String sql = constructUpdateTblPropertiesSql(tableName, lastCommitSynced);
executeAdbSql(sql);
} catch (Exception e) {
throw new HoodieHiveSyncException("Fail to get update last commit time synced:" + lastCommitSynced, e);
}
}
@Override
public Option<String> getLastReplicatedTime(String tableName) {
throw new UnsupportedOperationException("Not support getLastReplicatedTime yet");
}
@Override
public void updateLastReplicatedTimeStamp(String tableName, String timeStamp) {
throw new UnsupportedOperationException("Not support updateLastReplicatedTimeStamp yet");
}
@Override
public void deleteLastReplicatedTimeStamp(String tableName) {
throw new UnsupportedOperationException("Not support deleteLastReplicatedTimeStamp yet");
}
@Override
public void updatePartitionsToTable(String tableName, List<String> changedPartitions) {
if (changedPartitions.isEmpty()) {
LOG.info("No partitions to change for table:{}", tableName);
return;
}
LOG.info("Changing partitions on table:{}, changedPartitionNum:{}", tableName, changedPartitions.size());
List<String> sqlList = constructChangePartitionsSql(tableName, changedPartitions);
for (String sql : sqlList) {
executeAdbSql(sql);
}
}
@Override
public void dropPartitions(String tableName, List<String> partitionsToDrop) {
throw new UnsupportedOperationException("Not support dropPartitions yet.");
}
public Map<List<String>, String> scanTablePartitions(String tableName) {
String sql = constructShowPartitionSql(tableName);
Function<ResultSet, Map<List<String>, String>> transform = resultSet -> {
Map<List<String>, String> partitions = new HashMap<>();
try {
while (resultSet.next()) {
if (resultSet.getMetaData().getColumnCount() > 0) {
String str = resultSet.getString(1);
if (!StringUtils.isNullOrEmpty(str)) {
List<String> values = partitionValueExtractor.extractPartitionValuesInPath(str);
Path storagePartitionPath = FSUtils.getPartitionPath(adbSyncConfig.basePath, String.join("/", values));
String fullStoragePartitionPath = Path.getPathWithoutSchemeAndAuthority(storagePartitionPath).toUri().getPath();
partitions.put(values, fullStoragePartitionPath);
}
}
}
} catch (Exception e) {
throw new HoodieException("Fail to execute sql:" + sql, e);
}
return partitions;
};
return executeQuerySQL(sql, transform);
}
public void updateTableDefinition(String tableName, SchemaDifference schemaDiff) {
LOG.info("Adding columns for table:{}", tableName);
schemaDiff.getAddColumnTypes().forEach((columnName, columnType) ->
executeAdbSql(constructAddColumnSql(tableName, columnName, columnType))
);
LOG.info("Updating columns' definition for table:{}", tableName);
schemaDiff.getUpdateColumnTypes().forEach((columnName, columnType) ->
executeAdbSql(constructChangeColumnSql(tableName, columnName, columnType))
);
}
private String constructAddPartitionsSql(String tableName, List<String> partitions) {
StringBuilder sqlBuilder = new StringBuilder("alter table `");
sqlBuilder.append(adbSyncConfig.databaseName).append("`").append(".`")
.append(tableName).append("`").append(" add if not exists ");
for (String partition : partitions) {
String partitionClause = getPartitionClause(partition);
Path partitionPath = FSUtils.getPartitionPath(adbSyncConfig.basePath, partition);
String fullPartitionPathStr = generateAbsolutePathStr(partitionPath);
sqlBuilder.append(" partition (").append(partitionClause).append(") location '")
.append(fullPartitionPathStr).append("' ");
}
return sqlBuilder.toString();
}
private List<String> constructChangePartitionsSql(String tableName, List<String> partitions) {
List<String> changePartitions = new ArrayList<>();
String useDatabase = "use `" + adbSyncConfig.databaseName + "`";
changePartitions.add(useDatabase);
String alterTable = "alter table `" + tableName + "`";
for (String partition : partitions) {
String partitionClause = getPartitionClause(partition);
Path partitionPath = FSUtils.getPartitionPath(adbSyncConfig.basePath, partition);
String fullPartitionPathStr = generateAbsolutePathStr(partitionPath);
String changePartition = alterTable + " add if not exists partition (" + partitionClause
+ ") location '" + fullPartitionPathStr + "'";
changePartitions.add(changePartition);
}
return changePartitions;
}
/**
* Generate Hive Partition from partition values.
*
* @param partition Partition path
* @return partition clause
*/
private String getPartitionClause(String partition) {
List<String> partitionValues = partitionValueExtractor.extractPartitionValuesInPath(partition);
ValidationUtils.checkArgument(adbSyncConfig.partitionFields.size() == partitionValues.size(),
"Partition key parts " + adbSyncConfig.partitionFields
+ " does not match with partition values " + partitionValues + ". Check partition strategy. ");
List<String> partBuilder = new ArrayList<>();
for (int i = 0; i < adbSyncConfig.partitionFields.size(); i++) {
partBuilder.add(adbSyncConfig.partitionFields.get(i) + "='" + partitionValues.get(i) + "'");
}
return String.join(",", partBuilder);
}
private String constructShowPartitionSql(String tableName) {
return String.format("show partitions `%s`.`%s`", adbSyncConfig.databaseName, tableName);
}
private String constructShowCreateTableSql(String tableName) {
return String.format("show create table `%s`.`%s`", adbSyncConfig.databaseName, tableName);
}
private String constructShowLikeTableSql(String tableName) {
return String.format("show tables from `%s` like '%s'", adbSyncConfig.databaseName, tableName);
}
private String constructCreateDatabaseSql(String rootPath) {
return String.format("create database if not exists `%s` with dbproperties(catalog = 'oss', location = '%s')",
adbSyncConfig.databaseName, rootPath);
}
private String constructShowCreateDatabaseSql(String databaseName) {
return String.format("show create database `%s`", databaseName);
}
private String constructUpdateTblPropertiesSql(String tableName, String lastCommitSynced) {
return String.format("alter table `%s`.`%s` set tblproperties('%s' = '%s')",
adbSyncConfig.databaseName, tableName, HOODIE_LAST_COMMIT_TIME_SYNC, lastCommitSynced);
}
private String constructAddColumnSql(String tableName, String columnName, String columnType) {
return String.format("alter table `%s`.`%s` add columns(`%s` %s)",
adbSyncConfig.databaseName, tableName, columnName, columnType);
}
private String constructChangeColumnSql(String tableName, String columnName, String columnType) {
return String.format("alter table `%s`.`%s` change `%s` `%s` %s",
adbSyncConfig.databaseName, tableName, columnName, columnName, columnType);
}
private HiveSyncConfig getHiveSyncConfig() {
HiveSyncConfig hiveSyncConfig = new HiveSyncConfig();
hiveSyncConfig.partitionFields = adbSyncConfig.partitionFields;
hiveSyncConfig.databaseName = adbSyncConfig.databaseName;
Path basePath = new Path(adbSyncConfig.basePath);
hiveSyncConfig.basePath = generateAbsolutePathStr(basePath);
return hiveSyncConfig;
}
@Override
public void close() {
try {
if (connection != null) {
connection.close();
}
} catch (SQLException e) {
LOG.error("Fail to close connection", e);
}
}
}

View File

@@ -0,0 +1,29 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.sync.adb;
public class HoodieAdbSyncException extends RuntimeException {
public HoodieAdbSyncException(String message) {
super(message);
}
public HoodieAdbSyncException(String message, Throwable t) {
super(message, t);
}
}

View File

@@ -0,0 +1,65 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hudi.sync.adb;
import org.apache.hudi.common.config.TypedProperties;
import org.junit.jupiter.api.Test;
import java.util.Arrays;
import static org.junit.jupiter.api.Assertions.assertEquals;
public class TestAdbSyncConfig {
@Test
public void testCopy() {
AdbSyncConfig adbSyncConfig = new AdbSyncConfig();
adbSyncConfig.partitionFields = Arrays.asList("a", "b");
adbSyncConfig.basePath = "/tmp";
adbSyncConfig.assumeDatePartitioning = true;
adbSyncConfig.databaseName = "test";
adbSyncConfig.tableName = "test";
adbSyncConfig.adbUser = "adb";
adbSyncConfig.adbPass = "adb";
adbSyncConfig.jdbcUrl = "jdbc:mysql://localhost:3306";
adbSyncConfig.skipROSuffix = false;
adbSyncConfig.tableProperties = "spark.sql.sources.provider= 'hudi'\\n"
+ "spark.sql.sources.schema.numParts = '1'\\n "
+ "spark.sql.sources.schema.part.0 ='xx'\\n "
+ "spark.sql.sources.schema.numPartCols = '1'\\n"
+ "spark.sql.sources.schema.partCol.0 = 'dt'";
adbSyncConfig.serdeProperties = "'path'='/tmp/test_db/tbl'";
adbSyncConfig.dbLocation = "file://tmp/test_db";
TypedProperties props = AdbSyncConfig.toProps(adbSyncConfig);
AdbSyncConfig copied = new AdbSyncConfig(props);
assertEquals(copied.partitionFields, adbSyncConfig.partitionFields);
assertEquals(copied.basePath, adbSyncConfig.basePath);
assertEquals(copied.assumeDatePartitioning, adbSyncConfig.assumeDatePartitioning);
assertEquals(copied.databaseName, adbSyncConfig.databaseName);
assertEquals(copied.tableName, adbSyncConfig.tableName);
assertEquals(copied.adbUser, adbSyncConfig.adbUser);
assertEquals(copied.adbPass, adbSyncConfig.adbPass);
assertEquals(copied.basePath, adbSyncConfig.basePath);
assertEquals(copied.jdbcUrl, adbSyncConfig.jdbcUrl);
assertEquals(copied.skipROSuffix, adbSyncConfig.skipROSuffix);
assertEquals(copied.supportTimestamp, adbSyncConfig.supportTimestamp);
}
}

View File

@@ -0,0 +1,29 @@
###
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
###
log4j.rootLogger=WARN, CONSOLE
log4j.logger.org.apache.hudi=DEBUG
# CONSOLE is set to be a ConsoleAppender.
log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
# CONSOLE uses PatternLayout.
log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
log4j.appender.CONSOLE.layout.ConversionPattern=[%-5p] %d %c %x - %m%n
log4j.appender.CONSOLE.filter.a=org.apache.log4j.varia.LevelRangeFilter
log4j.appender.CONSOLE.filter.a.AcceptOnMatch=true
log4j.appender.CONSOLE.filter.a.LevelMin=WARN
log4j.appender.CONSOLE.filter.a.LevelMax=FATAL

View File

@@ -0,0 +1,30 @@
###
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
###
log4j.rootLogger=WARN, CONSOLE
log4j.logger.org.apache=INFO
log4j.logger.org.apache.hudi=DEBUG
# CONSOLE is set to be a ConsoleAppender.
log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
# CONSOLE uses PatternLayout.
log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
log4j.appender.CONSOLE.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
log4j.appender.CONSOLE.filter.a=org.apache.log4j.varia.LevelRangeFilter
log4j.appender.CONSOLE.filter.a.AcceptOnMatch=true
log4j.appender.CONSOLE.filter.a.LevelMin=WARN
log4j.appender.CONSOLE.filter.a.LevelMax=FATAL