[HUDI-2757] Implement Hudi AWS Glue sync (#5076)
This commit is contained in:
@@ -114,7 +114,7 @@ public class DLASyncTool extends AbstractSyncTool {
|
||||
LOG.info("Trying to sync hoodie table " + tableName + " with base path " + hoodieDLAClient.getBasePath()
|
||||
+ " of type " + hoodieDLAClient.getTableType());
|
||||
// Check if the necessary table exists
|
||||
boolean tableExists = hoodieDLAClient.doesTableExist(tableName);
|
||||
boolean tableExists = hoodieDLAClient.tableExists(tableName);
|
||||
// Get the parquet schema for this table looking at the latest commit
|
||||
MessageType schema = hoodieDLAClient.getDataSchema();
|
||||
// Sync schema if needed
|
||||
|
||||
@@ -18,8 +18,6 @@
|
||||
|
||||
package org.apache.hudi.dla;
|
||||
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.common.util.StringUtils;
|
||||
@@ -31,14 +29,17 @@ import org.apache.hudi.hive.PartitionValueExtractor;
|
||||
import org.apache.hudi.hive.SchemaDifference;
|
||||
import org.apache.hudi.hive.util.HiveSchemaUtil;
|
||||
import org.apache.hudi.sync.common.AbstractSyncHoodieClient;
|
||||
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.parquet.schema.MessageType;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.sql.Connection;
|
||||
import java.sql.DriverManager;
|
||||
import java.sql.DatabaseMetaData;
|
||||
import java.sql.DriverManager;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.sql.Statement;
|
||||
@@ -115,7 +116,7 @@ public class HoodieDLAClient extends AbstractSyncHoodieClient {
|
||||
}
|
||||
|
||||
public Map<String, String> getTableSchema(String tableName) {
|
||||
if (!doesTableExist(tableName)) {
|
||||
if (!tableExists(tableName)) {
|
||||
throw new IllegalArgumentException(
|
||||
"Failed to get schema for table " + tableName + " does not exist");
|
||||
}
|
||||
@@ -222,6 +223,11 @@ public class HoodieDLAClient extends AbstractSyncHoodieClient {
|
||||
|
||||
@Override
|
||||
public boolean doesTableExist(String tableName) {
|
||||
return tableExists(tableName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean tableExists(String tableName) {
|
||||
String sql = consutructShowCreateTableSQL(tableName);
|
||||
Statement stmt = null;
|
||||
ResultSet rs = null;
|
||||
@@ -274,6 +280,22 @@ public class HoodieDLAClient extends AbstractSyncHoodieClient {
|
||||
// TODO : dla do not support update tblproperties, so do nothing.
|
||||
}
|
||||
|
||||
@Override
|
||||
public Option<String> getLastReplicatedTime(String tableName) {
|
||||
// no op; unsupported
|
||||
return Option.empty();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void updateLastReplicatedTimeStamp(String tableName, String timeStamp) {
|
||||
// no op; unsupported
|
||||
}
|
||||
|
||||
@Override
|
||||
public void deleteLastReplicatedTimeStamp(String tableName) {
|
||||
// no op; unsupported
|
||||
}
|
||||
|
||||
@Override
|
||||
public void updatePartitionsToTable(String tableName, List<String> changedPartitions) {
|
||||
if (changedPartitions.isEmpty()) {
|
||||
@@ -370,6 +392,7 @@ public class HoodieDLAClient extends AbstractSyncHoodieClient {
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
try {
|
||||
if (connection != null) {
|
||||
|
||||
@@ -0,0 +1,142 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.hive;
|
||||
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.table.TableSchemaResolver;
|
||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.apache.hudi.common.util.ReflectionUtils;
|
||||
import org.apache.hudi.sync.common.AbstractSyncHoodieClient;
|
||||
import org.apache.hudi.sync.common.HoodieSyncException;
|
||||
import org.apache.hudi.sync.common.model.Partition;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hive.metastore.api.FieldSchema;
|
||||
import org.apache.parquet.schema.MessageType;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Base class to sync Hudi tables with Hive based metastores, such as Hive server, HMS or managed Hive services.
|
||||
*/
|
||||
public abstract class AbstractHiveSyncHoodieClient extends AbstractSyncHoodieClient {
|
||||
|
||||
protected final HoodieTimeline activeTimeline;
|
||||
protected final HiveSyncConfig syncConfig;
|
||||
protected final Configuration hadoopConf;
|
||||
protected final PartitionValueExtractor partitionValueExtractor;
|
||||
|
||||
public AbstractHiveSyncHoodieClient(HiveSyncConfig syncConfig, Configuration hadoopConf, FileSystem fs) {
|
||||
super(syncConfig.basePath, syncConfig.assumeDatePartitioning, syncConfig.useFileListingFromMetadata, syncConfig.withOperationField, fs);
|
||||
this.syncConfig = syncConfig;
|
||||
this.hadoopConf = hadoopConf;
|
||||
this.partitionValueExtractor = ReflectionUtils.loadClass(syncConfig.partitionValueExtractorClass);
|
||||
this.activeTimeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
|
||||
}
|
||||
|
||||
public HoodieTimeline getActiveTimeline() {
|
||||
return activeTimeline;
|
||||
}
|
||||
|
||||
/**
|
||||
* Iterate over the storage partitions and find if there are any new partitions that need to be added or updated.
|
||||
* Generate a list of PartitionEvent based on the changes required.
|
||||
*/
|
||||
protected List<PartitionEvent> getPartitionEvents(List<Partition> tablePartitions, List<String> partitionStoragePartitions, boolean isDropPartition) {
|
||||
Map<String, String> paths = new HashMap<>();
|
||||
for (Partition tablePartition : tablePartitions) {
|
||||
List<String> hivePartitionValues = tablePartition.getValues();
|
||||
String fullTablePartitionPath =
|
||||
Path.getPathWithoutSchemeAndAuthority(new Path(tablePartition.getStorageLocation())).toUri().getPath();
|
||||
paths.put(String.join(", ", hivePartitionValues), fullTablePartitionPath);
|
||||
}
|
||||
|
||||
List<PartitionEvent> events = new ArrayList<>();
|
||||
for (String storagePartition : partitionStoragePartitions) {
|
||||
Path storagePartitionPath = FSUtils.getPartitionPath(syncConfig.basePath, storagePartition);
|
||||
String fullStoragePartitionPath = Path.getPathWithoutSchemeAndAuthority(storagePartitionPath).toUri().getPath();
|
||||
// Check if the partition values or if hdfs path is the same
|
||||
List<String> storagePartitionValues = partitionValueExtractor.extractPartitionValuesInPath(storagePartition);
|
||||
|
||||
if (isDropPartition) {
|
||||
events.add(PartitionEvent.newPartitionDropEvent(storagePartition));
|
||||
} else {
|
||||
if (!storagePartitionValues.isEmpty()) {
|
||||
String storageValue = String.join(", ", storagePartitionValues);
|
||||
if (!paths.containsKey(storageValue)) {
|
||||
events.add(PartitionEvent.newPartitionAddEvent(storagePartition));
|
||||
} else if (!paths.get(storageValue).equals(fullStoragePartitionPath)) {
|
||||
events.add(PartitionEvent.newPartitionUpdateEvent(storagePartition));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return events;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all partitions for the table in the metastore.
|
||||
*/
|
||||
public abstract List<Partition> getAllPartitions(String tableName);
|
||||
|
||||
/**
|
||||
* Check if a database already exists in the metastore.
|
||||
*/
|
||||
public abstract boolean databaseExists(String databaseName);
|
||||
|
||||
/**
|
||||
* Create a database in the metastore.
|
||||
*/
|
||||
public abstract void createDatabase(String databaseName);
|
||||
|
||||
/**
|
||||
* Update schema for the table in the metastore.
|
||||
*/
|
||||
public abstract void updateTableDefinition(String tableName, MessageType newSchema);
|
||||
|
||||
/*
|
||||
* APIs below need to be re-worked by modeling field comment in hudi-sync-common,
|
||||
* instead of relying on Avro or Hive schema class.
|
||||
*/
|
||||
|
||||
public Schema getAvroSchemaWithoutMetadataFields() {
|
||||
try {
|
||||
return new TableSchemaResolver(metaClient).getTableAvroSchemaWithoutMetadataFields();
|
||||
} catch (Exception e) {
|
||||
throw new HoodieSyncException("Failed to read avro schema", e);
|
||||
}
|
||||
}
|
||||
|
||||
public abstract List<FieldSchema> getTableCommentUsingMetastoreClient(String tableName);
|
||||
|
||||
public abstract void updateTableComments(String tableName, List<FieldSchema> oldSchema, List<Schema.Field> newSchema);
|
||||
|
||||
public abstract void updateTableComments(String tableName, List<FieldSchema> oldSchema, Map<String, String> newComments);
|
||||
|
||||
/*
|
||||
* APIs above need to be re-worked by modeling field comment in hudi-sync-common,
|
||||
* instead of relying on Avro or Hive schema class.
|
||||
*/
|
||||
}
|
||||
@@ -18,14 +18,6 @@
|
||||
|
||||
package org.apache.hudi.hive;
|
||||
|
||||
import com.beust.jcommander.JCommander;
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.hive.conf.HiveConf;
|
||||
import org.apache.hadoop.hive.metastore.api.FieldSchema;
|
||||
import org.apache.hadoop.hive.metastore.api.Partition;
|
||||
|
||||
import org.apache.hudi.common.config.TypedProperties;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.model.HoodieFileFormat;
|
||||
@@ -41,7 +33,14 @@ import org.apache.hudi.hive.util.Parquet2SparkSchemaUtils;
|
||||
import org.apache.hudi.sync.common.AbstractSyncHoodieClient.PartitionEvent;
|
||||
import org.apache.hudi.sync.common.AbstractSyncHoodieClient.PartitionEvent.PartitionEventType;
|
||||
import org.apache.hudi.sync.common.AbstractSyncTool;
|
||||
import org.apache.hudi.sync.common.model.Partition;
|
||||
|
||||
import com.beust.jcommander.JCommander;
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.hive.conf.HiveConf;
|
||||
import org.apache.hadoop.hive.metastore.api.FieldSchema;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.parquet.schema.GroupType;
|
||||
@@ -66,35 +65,33 @@ import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY;
|
||||
* partitions incrementally (all the partitions modified since the last commit)
|
||||
*/
|
||||
@SuppressWarnings("WeakerAccess")
|
||||
public class HiveSyncTool extends AbstractSyncTool {
|
||||
public class HiveSyncTool extends AbstractSyncTool implements AutoCloseable {
|
||||
|
||||
private static final Logger LOG = LogManager.getLogger(HiveSyncTool.class);
|
||||
public static final String SUFFIX_SNAPSHOT_TABLE = "_rt";
|
||||
public static final String SUFFIX_READ_OPTIMIZED_TABLE = "_ro";
|
||||
|
||||
protected final HiveSyncConfig hiveSyncConfig;
|
||||
protected HoodieHiveClient hoodieHiveClient = null;
|
||||
protected HiveSyncConfig hiveSyncConfig;
|
||||
protected AbstractHiveSyncHoodieClient hoodieHiveClient;
|
||||
protected String snapshotTableName = null;
|
||||
protected Option<String> roTableName = null;
|
||||
|
||||
public HiveSyncTool(TypedProperties props, Configuration conf, FileSystem fs) {
|
||||
super(props, conf, fs);
|
||||
this.hiveSyncConfig = new HiveSyncConfig(props);
|
||||
init(hiveSyncConfig, new HiveConf(conf, HiveConf.class));
|
||||
this(new HiveSyncConfig(props), new HiveConf(conf, HiveConf.class), fs);
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public HiveSyncTool(HiveSyncConfig hiveSyncConfig, HiveConf hiveConf, FileSystem fs) {
|
||||
super(hiveSyncConfig.getProps(), hiveConf, fs);
|
||||
this.hiveSyncConfig = hiveSyncConfig;
|
||||
init(hiveSyncConfig, hiveConf);
|
||||
// TODO: reconcile the way to set METASTOREURIS
|
||||
if (StringUtils.isNullOrEmpty(hiveConf.get(HiveConf.ConfVars.METASTOREURIS.varname))) {
|
||||
hiveConf.set(HiveConf.ConfVars.METASTOREURIS.varname, hiveSyncConfig.metastoreUris);
|
||||
}
|
||||
initClient(hiveSyncConfig, hiveConf);
|
||||
initConfig(hiveSyncConfig);
|
||||
}
|
||||
|
||||
private void init(HiveSyncConfig hiveSyncConfig, HiveConf hiveConf) {
|
||||
protected void initClient(HiveSyncConfig hiveSyncConfig, HiveConf hiveConf) {
|
||||
try {
|
||||
if (StringUtils.isNullOrEmpty(hiveConf.get(HiveConf.ConfVars.METASTOREURIS.varname))) {
|
||||
hiveConf.set(HiveConf.ConfVars.METASTOREURIS.varname, hiveSyncConfig.metastoreUris);
|
||||
}
|
||||
this.hoodieHiveClient = new HoodieHiveClient(hiveSyncConfig, hiveConf, fs);
|
||||
} catch (RuntimeException e) {
|
||||
if (hiveSyncConfig.ignoreExceptions) {
|
||||
@@ -103,12 +100,16 @@ public class HiveSyncTool extends AbstractSyncTool {
|
||||
throw new HoodieHiveSyncException("Got runtime exception when hive syncing", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void initConfig(HiveSyncConfig hiveSyncConfig) {
|
||||
// Set partitionFields to empty, when the NonPartitionedExtractor is used
|
||||
// TODO: HiveSyncConfig should be responsible for inferring config value
|
||||
if (NonPartitionedExtractor.class.getName().equals(hiveSyncConfig.partitionValueExtractorClass)) {
|
||||
LOG.warn("Set partitionFields to empty, since the NonPartitionedExtractor is used");
|
||||
hiveSyncConfig.partitionFields = new ArrayList<>();
|
||||
}
|
||||
this.hiveSyncConfig = hiveSyncConfig;
|
||||
if (hoodieHiveClient != null) {
|
||||
switch (hoodieHiveClient.getTableType()) {
|
||||
case COPY_ON_WRITE:
|
||||
@@ -139,9 +140,7 @@ public class HiveSyncTool extends AbstractSyncTool {
|
||||
} catch (RuntimeException re) {
|
||||
throw new HoodieException("Got runtime exception when hive syncing " + hiveSyncConfig.tableName, re);
|
||||
} finally {
|
||||
if (hoodieHiveClient != null) {
|
||||
hoodieHiveClient.close();
|
||||
}
|
||||
close();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -162,6 +161,17 @@ public class HiveSyncTool extends AbstractSyncTool {
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
if (hoodieHiveClient != null) {
|
||||
try {
|
||||
hoodieHiveClient.close();
|
||||
} catch (Exception e) {
|
||||
throw new HoodieHiveSyncException("Fail to close sync client.", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected void syncHoodieTable(String tableName, boolean useRealtimeInputFormat,
|
||||
boolean readAsOptimized) {
|
||||
LOG.info("Trying to sync hoodie table " + tableName + " with base path " + hoodieHiveClient.getBasePath()
|
||||
@@ -170,7 +180,7 @@ public class HiveSyncTool extends AbstractSyncTool {
|
||||
// check if the database exists else create it
|
||||
if (hiveSyncConfig.autoCreateDatabase) {
|
||||
try {
|
||||
if (!hoodieHiveClient.doesDataBaseExist(hiveSyncConfig.databaseName)) {
|
||||
if (!hoodieHiveClient.databaseExists(hiveSyncConfig.databaseName)) {
|
||||
hoodieHiveClient.createDatabase(hiveSyncConfig.databaseName);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
@@ -178,14 +188,14 @@ public class HiveSyncTool extends AbstractSyncTool {
|
||||
LOG.warn("Unable to create database", e);
|
||||
}
|
||||
} else {
|
||||
if (!hoodieHiveClient.doesDataBaseExist(hiveSyncConfig.databaseName)) {
|
||||
if (!hoodieHiveClient.databaseExists(hiveSyncConfig.databaseName)) {
|
||||
LOG.error("Hive database does not exist " + hiveSyncConfig.databaseName);
|
||||
throw new HoodieHiveSyncException("hive database does not exist " + hiveSyncConfig.databaseName);
|
||||
}
|
||||
}
|
||||
|
||||
// Check if the necessary table exists
|
||||
boolean tableExists = hoodieHiveClient.doesTableExist(tableName);
|
||||
boolean tableExists = hoodieHiveClient.tableExists(tableName);
|
||||
|
||||
// check if isDropPartition
|
||||
boolean isDropPartition = hoodieHiveClient.isDropPartition();
|
||||
@@ -375,7 +385,7 @@ public class HiveSyncTool extends AbstractSyncTool {
|
||||
private boolean syncPartitions(String tableName, List<String> writtenPartitionsSince, boolean isDropPartition) {
|
||||
boolean partitionsChanged;
|
||||
try {
|
||||
List<Partition> hivePartitions = hoodieHiveClient.scanTablePartitions(tableName);
|
||||
List<Partition> hivePartitions = hoodieHiveClient.getAllPartitions(tableName);
|
||||
List<PartitionEvent> partitionEvents =
|
||||
hoodieHiveClient.getPartitionEvents(hivePartitions, writtenPartitionsSince, isDropPartition);
|
||||
|
||||
|
||||
@@ -18,10 +18,7 @@
|
||||
|
||||
package org.apache.hudi.hive;
|
||||
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.table.TableSchemaResolver;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.common.util.StringUtils;
|
||||
import org.apache.hudi.common.util.collection.ImmutablePair;
|
||||
@@ -30,18 +27,14 @@ import org.apache.hudi.hive.ddl.HMSDDLExecutor;
|
||||
import org.apache.hudi.hive.ddl.HiveQueryDDLExecutor;
|
||||
import org.apache.hudi.hive.ddl.HiveSyncMode;
|
||||
import org.apache.hudi.hive.ddl.JDBCExecutor;
|
||||
import org.apache.hudi.hive.util.HiveSchemaUtil;
|
||||
import org.apache.hudi.sync.common.AbstractSyncHoodieClient;
|
||||
import org.apache.hudi.sync.common.HoodieSyncException;
|
||||
import org.apache.hudi.sync.common.model.Partition;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hive.conf.HiveConf;
|
||||
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
|
||||
import org.apache.hadoop.hive.metastore.api.FieldSchema;
|
||||
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
|
||||
import org.apache.hadoop.hive.metastore.api.Partition;
|
||||
import org.apache.hadoop.hive.metastore.api.Table;
|
||||
import org.apache.hadoop.hive.ql.metadata.Hive;
|
||||
import org.apache.log4j.LogManager;
|
||||
@@ -49,7 +42,6 @@ import org.apache.log4j.Logger;
|
||||
import org.apache.parquet.schema.MessageType;
|
||||
import org.apache.thrift.TException;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
@@ -57,22 +49,19 @@ import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static org.apache.hudi.hadoop.utils.HoodieHiveUtils.GLOBALLY_CONSISTENT_READ_TIMESTAMP;
|
||||
import static org.apache.hudi.sync.common.util.TableUtils.tableId;
|
||||
|
||||
public class HoodieHiveClient extends AbstractSyncHoodieClient {
|
||||
|
||||
private static final String HOODIE_LAST_COMMIT_TIME_SYNC = "last_commit_time_sync";
|
||||
private static final String HIVE_ESCAPE_CHARACTER = HiveSchemaUtil.HIVE_ESCAPE_CHARACTER;
|
||||
/**
|
||||
* This class implements logic to sync a Hudi table with either the Hive server or the Hive Metastore.
|
||||
*/
|
||||
public class HoodieHiveClient extends AbstractHiveSyncHoodieClient {
|
||||
|
||||
private static final Logger LOG = LogManager.getLogger(HoodieHiveClient.class);
|
||||
private final PartitionValueExtractor partitionValueExtractor;
|
||||
private final HoodieTimeline activeTimeline;
|
||||
DDLExecutor ddlExecutor;
|
||||
private IMetaStoreClient client;
|
||||
private final HiveSyncConfig syncConfig;
|
||||
|
||||
public HoodieHiveClient(HiveSyncConfig cfg, HiveConf configuration, FileSystem fs) {
|
||||
super(cfg.basePath, cfg.assumeDatePartitioning, cfg.useFileListingFromMetadata, cfg.withOperationField, fs);
|
||||
this.syncConfig = cfg;
|
||||
super(cfg, configuration, fs);
|
||||
|
||||
// Support JDBC, HiveQL and metastore based implementations for backwards compatibility. Future users should
|
||||
// disable jdbc and depend on metastore client for all hive registrations
|
||||
@@ -99,20 +88,6 @@ public class HoodieHiveClient extends AbstractSyncHoodieClient {
|
||||
} catch (Exception e) {
|
||||
throw new HoodieHiveSyncException("Failed to create HiveMetaStoreClient", e);
|
||||
}
|
||||
|
||||
try {
|
||||
this.partitionValueExtractor =
|
||||
(PartitionValueExtractor) Class.forName(cfg.partitionValueExtractorClass).newInstance();
|
||||
} catch (Exception e) {
|
||||
throw new HoodieHiveSyncException(
|
||||
"Failed to initialize PartitionValueExtractor class " + cfg.partitionValueExtractorClass, e);
|
||||
}
|
||||
|
||||
activeTimeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
|
||||
}
|
||||
|
||||
public HoodieTimeline getActiveTimeline() {
|
||||
return activeTimeline;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -159,61 +134,33 @@ public class HoodieHiveClient extends AbstractSyncHoodieClient {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Iterate over the storage partitions and find if there are any new partitions that need to be added or updated.
|
||||
* Generate a list of PartitionEvent based on the changes required.
|
||||
*/
|
||||
List<PartitionEvent> getPartitionEvents(List<Partition> tablePartitions, List<String> partitionStoragePartitions) {
|
||||
return getPartitionEvents(tablePartitions, partitionStoragePartitions, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Iterate over the storage partitions and find if there are any new partitions that need to be added or updated.
|
||||
* Generate a list of PartitionEvent based on the changes required.
|
||||
*/
|
||||
List<PartitionEvent> getPartitionEvents(List<Partition> tablePartitions, List<String> partitionStoragePartitions, boolean isDropPartition) {
|
||||
Map<String, String> paths = new HashMap<>();
|
||||
for (Partition tablePartition : tablePartitions) {
|
||||
List<String> hivePartitionValues = tablePartition.getValues();
|
||||
String fullTablePartitionPath =
|
||||
Path.getPathWithoutSchemeAndAuthority(new Path(tablePartition.getSd().getLocation())).toUri().getPath();
|
||||
paths.put(String.join(", ", hivePartitionValues), fullTablePartitionPath);
|
||||
}
|
||||
|
||||
List<PartitionEvent> events = new ArrayList<>();
|
||||
for (String storagePartition : partitionStoragePartitions) {
|
||||
Path storagePartitionPath = FSUtils.getPartitionPath(syncConfig.basePath, storagePartition);
|
||||
String fullStoragePartitionPath = Path.getPathWithoutSchemeAndAuthority(storagePartitionPath).toUri().getPath();
|
||||
// Check if the partition values or if hdfs path is the same
|
||||
List<String> storagePartitionValues = partitionValueExtractor.extractPartitionValuesInPath(storagePartition);
|
||||
|
||||
if (isDropPartition) {
|
||||
events.add(PartitionEvent.newPartitionDropEvent(storagePartition));
|
||||
} else {
|
||||
if (!storagePartitionValues.isEmpty()) {
|
||||
String storageValue = String.join(", ", storagePartitionValues);
|
||||
if (!paths.containsKey(storageValue)) {
|
||||
events.add(PartitionEvent.newPartitionAddEvent(storagePartition));
|
||||
} else if (!paths.get(storageValue).equals(fullStoragePartitionPath)) {
|
||||
events.add(PartitionEvent.newPartitionUpdateEvent(storagePartition));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return events;
|
||||
}
|
||||
|
||||
/**
|
||||
* Scan table partitions.
|
||||
*
|
||||
* @deprecated Use {@link #getAllPartitions} instead.
|
||||
*/
|
||||
public List<Partition> scanTablePartitions(String tableName) throws TException {
|
||||
@Deprecated
|
||||
public List<org.apache.hadoop.hive.metastore.api.Partition> scanTablePartitions(String tableName) throws TException {
|
||||
return client.listPartitions(syncConfig.databaseName, tableName, (short) -1);
|
||||
}
|
||||
|
||||
void updateTableDefinition(String tableName, MessageType newSchema) {
|
||||
@Override
|
||||
public void updateTableDefinition(String tableName, MessageType newSchema) {
|
||||
ddlExecutor.updateTableDefinition(tableName, newSchema);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Partition> getAllPartitions(String tableName) {
|
||||
try {
|
||||
return client.listPartitions(syncConfig.databaseName, tableName, (short) -1)
|
||||
.stream()
|
||||
.map(p -> new Partition(p.getValues(), p.getSd().getLocation()))
|
||||
.collect(Collectors.toList());
|
||||
} catch (TException e) {
|
||||
throw new HoodieHiveSyncException("Failed to get all partitions for table " + tableId(syncConfig.databaseName, tableName), e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void createTable(String tableName, MessageType storageSchema, String inputFormatClass,
|
||||
String outputFormatClass, String serdeClass,
|
||||
@@ -226,18 +173,21 @@ public class HoodieHiveClient extends AbstractSyncHoodieClient {
|
||||
*/
|
||||
@Override
|
||||
public Map<String, String> getTableSchema(String tableName) {
|
||||
if (!doesTableExist(tableName)) {
|
||||
if (!tableExists(tableName)) {
|
||||
throw new IllegalArgumentException(
|
||||
"Failed to get schema for table " + tableName + " does not exist");
|
||||
}
|
||||
return ddlExecutor.getTableSchema(tableName);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if the configured table exists
|
||||
*/
|
||||
@Deprecated
|
||||
@Override
|
||||
public boolean doesTableExist(String tableName) {
|
||||
return tableExists(tableName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean tableExists(String tableName) {
|
||||
try {
|
||||
return client.tableExists(syncConfig.databaseName, tableName);
|
||||
} catch (TException e) {
|
||||
@@ -245,11 +195,13 @@ public class HoodieHiveClient extends AbstractSyncHoodieClient {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param databaseName
|
||||
* @return true if the configured database exists
|
||||
*/
|
||||
@Deprecated
|
||||
public boolean doesDataBaseExist(String databaseName) {
|
||||
return databaseExists(databaseName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean databaseExists(String databaseName) {
|
||||
try {
|
||||
client.getDatabase(databaseName);
|
||||
return true;
|
||||
@@ -261,6 +213,7 @@ public class HoodieHiveClient extends AbstractSyncHoodieClient {
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void createDatabase(String databaseName) {
|
||||
ddlExecutor.createDatabase(databaseName);
|
||||
}
|
||||
@@ -321,6 +274,7 @@ public class HoodieHiveClient extends AbstractSyncHoodieClient {
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
try {
|
||||
ddlExecutor.close();
|
||||
@@ -333,10 +287,6 @@ public class HoodieHiveClient extends AbstractSyncHoodieClient {
|
||||
}
|
||||
}
|
||||
|
||||
List<String> getAllTables(String db) throws Exception {
|
||||
return client.getAllTables(db);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void updateLastCommitTimeSynced(String tableName) {
|
||||
// Set the last commit time from the TBLproperties
|
||||
@@ -352,14 +302,7 @@ public class HoodieHiveClient extends AbstractSyncHoodieClient {
|
||||
}
|
||||
}
|
||||
|
||||
public Schema getAvroSchemaWithoutMetadataFields() {
|
||||
try {
|
||||
return new TableSchemaResolver(metaClient).getTableAvroSchemaWithoutMetadataFields();
|
||||
} catch (Exception e) {
|
||||
throw new HoodieSyncException("Failed to read avro schema", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<FieldSchema> getTableCommentUsingMetastoreClient(String tableName) {
|
||||
try {
|
||||
return client.getSchema(syncConfig.databaseName, tableName);
|
||||
@@ -368,11 +311,13 @@ public class HoodieHiveClient extends AbstractSyncHoodieClient {
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void updateTableComments(String tableName, List<FieldSchema> oldSchema, List<Schema.Field> newSchema) {
|
||||
Map<String,String> newComments = newSchema.stream().collect(Collectors.toMap(field -> field.name().toLowerCase(Locale.ROOT), field -> StringUtils.isNullOrEmpty(field.doc()) ? "" : field.doc()));
|
||||
updateTableComments(tableName,oldSchema,newComments);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void updateTableComments(String tableName, List<FieldSchema> oldSchema, Map<String,String> newComments) {
|
||||
Map<String,String> oldComments = oldSchema.stream().collect(Collectors.toMap(fieldSchema -> fieldSchema.getName().toLowerCase(Locale.ROOT),
|
||||
fieldSchema -> StringUtils.isNullOrEmpty(fieldSchema.getComment()) ? "" : fieldSchema.getComment()));
|
||||
|
||||
@@ -20,10 +20,6 @@ package org.apache.hudi.hive;
|
||||
|
||||
public class HoodieHiveSyncException extends RuntimeException {
|
||||
|
||||
public HoodieHiveSyncException() {
|
||||
super();
|
||||
}
|
||||
|
||||
public HoodieHiveSyncException(String message) {
|
||||
super(message);
|
||||
}
|
||||
@@ -32,11 +28,4 @@ public class HoodieHiveSyncException extends RuntimeException {
|
||||
super(message, t);
|
||||
}
|
||||
|
||||
public HoodieHiveSyncException(Throwable t) {
|
||||
super(t);
|
||||
}
|
||||
|
||||
protected static String format(String message, Object... args) {
|
||||
return String.format(String.valueOf(message), (Object[]) args);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -30,11 +30,12 @@ import java.util.Map;
|
||||
* There are two main implementations one is QueryBased other is based on HiveMetaStore
|
||||
* QueryBasedDDLExecutor also has two implementations namely HiveQL based and other JDBC based.
|
||||
*/
|
||||
public interface DDLExecutor {
|
||||
public interface DDLExecutor extends AutoCloseable {
|
||||
|
||||
/**
|
||||
* @param databaseName name of database to be created.
|
||||
*/
|
||||
public void createDatabase(String databaseName);
|
||||
void createDatabase(String databaseName);
|
||||
|
||||
/**
|
||||
* Creates a table with the following properties.
|
||||
@@ -47,9 +48,9 @@ public interface DDLExecutor {
|
||||
* @param serdeProperties
|
||||
* @param tableProperties
|
||||
*/
|
||||
public void createTable(String tableName, MessageType storageSchema, String inputFormatClass,
|
||||
String outputFormatClass, String serdeClass,
|
||||
Map<String, String> serdeProperties, Map<String, String> tableProperties);
|
||||
void createTable(String tableName, MessageType storageSchema, String inputFormatClass,
|
||||
String outputFormatClass, String serdeClass,
|
||||
Map<String, String> serdeProperties, Map<String, String> tableProperties);
|
||||
|
||||
/**
|
||||
* Updates the table with the newSchema.
|
||||
@@ -57,7 +58,7 @@ public interface DDLExecutor {
|
||||
* @param tableName
|
||||
* @param newSchema
|
||||
*/
|
||||
public void updateTableDefinition(String tableName, MessageType newSchema);
|
||||
void updateTableDefinition(String tableName, MessageType newSchema);
|
||||
|
||||
/**
|
||||
* Fetches tableSchema for a table.
|
||||
@@ -65,7 +66,7 @@ public interface DDLExecutor {
|
||||
* @param tableName
|
||||
* @return
|
||||
*/
|
||||
public Map<String, String> getTableSchema(String tableName);
|
||||
Map<String, String> getTableSchema(String tableName);
|
||||
|
||||
/**
|
||||
* Adds partition to table.
|
||||
@@ -73,7 +74,7 @@ public interface DDLExecutor {
|
||||
* @param tableName
|
||||
* @param partitionsToAdd
|
||||
*/
|
||||
public void addPartitionsToTable(String tableName, List<String> partitionsToAdd);
|
||||
void addPartitionsToTable(String tableName, List<String> partitionsToAdd);
|
||||
|
||||
/**
|
||||
* Updates partitions for a given table.
|
||||
@@ -81,7 +82,7 @@ public interface DDLExecutor {
|
||||
* @param tableName
|
||||
* @param changedPartitions
|
||||
*/
|
||||
public void updatePartitionsToTable(String tableName, List<String> changedPartitions);
|
||||
void updatePartitionsToTable(String tableName, List<String> changedPartitions);
|
||||
|
||||
/**
|
||||
* Drop partitions for a given table.
|
||||
@@ -89,15 +90,13 @@ public interface DDLExecutor {
|
||||
* @param tableName
|
||||
* @param partitionsToDrop
|
||||
*/
|
||||
public void dropPartitionsToTable(String tableName, List<String> partitionsToDrop);
|
||||
void dropPartitionsToTable(String tableName, List<String> partitionsToDrop);
|
||||
|
||||
/**
|
||||
* update table comments
|
||||
*
|
||||
* @param tableName
|
||||
* @param newSchema
|
||||
* @param newSchema Map key: field name, Map value: [field type, field comment]
|
||||
*/
|
||||
public void updateTableComments(String tableName, Map<String, ImmutablePair<String,String>> newSchema);
|
||||
|
||||
public void close();
|
||||
void updateTableComments(String tableName, Map<String, ImmutablePair<String, String>> newSchema);
|
||||
}
|
||||
|
||||
@@ -55,10 +55,6 @@ public class GlobalHiveSyncTool extends HiveSyncTool {
|
||||
LOG.info("Sync complete for " + tableName);
|
||||
}
|
||||
|
||||
public void close() {
|
||||
hoodieHiveClient.close();
|
||||
}
|
||||
|
||||
public Map<String, Option<String>> getLastReplicatedTimeStampMap() {
|
||||
Map<String, Option<String>> timeStampMap = new HashMap<>();
|
||||
Option<String> timeStamp = hoodieHiveClient.getLastReplicatedTime(snapshotTableName);
|
||||
|
||||
@@ -138,12 +138,12 @@ public class TestHiveSyncTool {
|
||||
HiveTestUtil.createCOWTable(instantTime, 5, useSchemaFromCommitMetadata);
|
||||
|
||||
reinitHiveSyncClient();
|
||||
assertFalse(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
|
||||
assertFalse(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
|
||||
"Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
|
||||
// Lets do the sync
|
||||
reSyncHiveTable();
|
||||
|
||||
assertTrue(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
|
||||
assertTrue(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
|
||||
"Table " + HiveTestUtil.TABLE_NAME + " should exist after sync completes");
|
||||
assertEquals(hiveClient.getTableSchema(HiveTestUtil.TABLE_NAME).size(),
|
||||
hiveClient.getDataSchema().getColumns().size() + 1,
|
||||
@@ -176,9 +176,9 @@ public class TestHiveSyncTool {
|
||||
ddlExecutor.runSQL("ALTER TABLE `" + HiveTestUtil.TABLE_NAME
|
||||
+ "` PARTITION (`datestr`='2050-01-01') SET LOCATION '/some/new/location'");
|
||||
|
||||
List<Partition> hivePartitions = hiveClient.scanTablePartitions(HiveTestUtil.TABLE_NAME);
|
||||
List<org.apache.hudi.sync.common.model.Partition> hivePartitions = hiveClient.getAllPartitions(HiveTestUtil.TABLE_NAME);
|
||||
List<String> writtenPartitionsSince = hiveClient.getPartitionsWrittenToSince(Option.empty());
|
||||
List<PartitionEvent> partitionEvents = hiveClient.getPartitionEvents(hivePartitions, writtenPartitionsSince);
|
||||
List<PartitionEvent> partitionEvents = hiveClient.getPartitionEvents(hivePartitions, writtenPartitionsSince, false);
|
||||
assertEquals(1, partitionEvents.size(), "There should be only one partition event");
|
||||
assertEquals(PartitionEventType.UPDATE, partitionEvents.iterator().next().eventType,
|
||||
"The one partition event must of type UPDATE");
|
||||
@@ -211,20 +211,20 @@ public class TestHiveSyncTool {
|
||||
hiveSyncProps.setProperty(HiveSyncConfig.HIVE_AUTO_CREATE_DATABASE.key(), "true");
|
||||
reinitHiveSyncClient();
|
||||
assertDoesNotThrow((this::reSyncHiveTable));
|
||||
assertTrue(hiveClient.doesDataBaseExist(HiveTestUtil.DB_NAME),
|
||||
assertTrue(hiveClient.databaseExists(HiveTestUtil.DB_NAME),
|
||||
"DataBases " + HiveTestUtil.DB_NAME + " should exist after sync completes");
|
||||
|
||||
// while autoCreateDatabase is false and database exists;
|
||||
hiveSyncProps.setProperty(HiveSyncConfig.HIVE_AUTO_CREATE_DATABASE.key(), "false");
|
||||
reinitHiveSyncClient();
|
||||
assertDoesNotThrow((this::reSyncHiveTable));
|
||||
assertTrue(hiveClient.doesDataBaseExist(HiveTestUtil.DB_NAME),
|
||||
assertTrue(hiveClient.databaseExists(HiveTestUtil.DB_NAME),
|
||||
"DataBases " + HiveTestUtil.DB_NAME + " should exist after sync completes");
|
||||
|
||||
// while autoCreateDatabase is true and database exists;
|
||||
hiveSyncProps.setProperty(HiveSyncConfig.HIVE_AUTO_CREATE_DATABASE.key(), "true");
|
||||
assertDoesNotThrow((this::reSyncHiveTable));
|
||||
assertTrue(hiveClient.doesDataBaseExist(HiveTestUtil.DB_NAME),
|
||||
assertTrue(hiveClient.databaseExists(HiveTestUtil.DB_NAME),
|
||||
"DataBases " + HiveTestUtil.DB_NAME + " should exist after sync completes");
|
||||
}
|
||||
|
||||
@@ -457,8 +457,8 @@ public class TestHiveSyncTool {
|
||||
reSyncHiveTable();
|
||||
List<String> writtenPartitionsSince = hiveClient.getPartitionsWrittenToSince(Option.of(commitTime1));
|
||||
assertEquals(1, writtenPartitionsSince.size(), "We should have one partition written after 100 commit");
|
||||
List<Partition> hivePartitions = hiveClient.scanTablePartitions(HiveTestUtil.TABLE_NAME);
|
||||
List<PartitionEvent> partitionEvents = hiveClient.getPartitionEvents(hivePartitions, writtenPartitionsSince);
|
||||
List<org.apache.hudi.sync.common.model.Partition> hivePartitions = hiveClient.getAllPartitions(HiveTestUtil.TABLE_NAME);
|
||||
List<PartitionEvent> partitionEvents = hiveClient.getPartitionEvents(hivePartitions, writtenPartitionsSince, false);
|
||||
assertEquals(1, partitionEvents.size(), "There should be only one partition event");
|
||||
assertEquals(PartitionEventType.ADD, partitionEvents.iterator().next().eventType, "The one partition event must of type ADD");
|
||||
|
||||
@@ -581,11 +581,11 @@ public class TestHiveSyncTool {
|
||||
|
||||
String roTableName = HiveTestUtil.TABLE_NAME + HiveSyncTool.SUFFIX_READ_OPTIMIZED_TABLE;
|
||||
reinitHiveSyncClient();
|
||||
assertFalse(hiveClient.doesTableExist(roTableName), "Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
|
||||
assertFalse(hiveClient.tableExists(roTableName), "Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
|
||||
// Lets do the sync
|
||||
reSyncHiveTable();
|
||||
|
||||
assertTrue(hiveClient.doesTableExist(roTableName), "Table " + roTableName + " should exist after sync completes");
|
||||
assertTrue(hiveClient.tableExists(roTableName), "Table " + roTableName + " should exist after sync completes");
|
||||
|
||||
if (useSchemaFromCommitMetadata) {
|
||||
assertEquals(hiveClient.getTableSchema(roTableName).size(),
|
||||
@@ -643,14 +643,14 @@ public class TestHiveSyncTool {
|
||||
String snapshotTableName = HiveTestUtil.TABLE_NAME + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE;
|
||||
HiveTestUtil.createMORTable(instantTime, deltaCommitTime, 5, true, useSchemaFromCommitMetadata);
|
||||
reinitHiveSyncClient();
|
||||
assertFalse(hiveClient.doesTableExist(snapshotTableName),
|
||||
assertFalse(hiveClient.tableExists(snapshotTableName),
|
||||
"Table " + HiveTestUtil.TABLE_NAME + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE
|
||||
+ " should not exist initially");
|
||||
|
||||
// Lets do the sync
|
||||
reSyncHiveTable();
|
||||
|
||||
assertTrue(hiveClient.doesTableExist(snapshotTableName),
|
||||
assertTrue(hiveClient.tableExists(snapshotTableName),
|
||||
"Table " + HiveTestUtil.TABLE_NAME + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE
|
||||
+ " should exist after sync completes");
|
||||
|
||||
@@ -713,11 +713,11 @@ public class TestHiveSyncTool {
|
||||
HiveTestUtil.getCreatedTablesSet().add(HiveTestUtil.DB_NAME + "." + HiveTestUtil.TABLE_NAME);
|
||||
|
||||
reinitHiveSyncClient();
|
||||
assertFalse(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
|
||||
assertFalse(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
|
||||
"Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
|
||||
// Lets do the sync
|
||||
reSyncHiveTable();
|
||||
assertTrue(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
|
||||
assertTrue(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
|
||||
"Table " + HiveTestUtil.TABLE_NAME + " should exist after sync completes");
|
||||
assertEquals(hiveClient.getTableSchema(HiveTestUtil.TABLE_NAME).size(),
|
||||
hiveClient.getDataSchema().getColumns().size() + 3,
|
||||
@@ -736,8 +736,8 @@ public class TestHiveSyncTool {
|
||||
reinitHiveSyncClient();
|
||||
List<String> writtenPartitionsSince = hiveClient.getPartitionsWrittenToSince(Option.of(instantTime));
|
||||
assertEquals(1, writtenPartitionsSince.size(), "We should have one partition written after 100 commit");
|
||||
List<Partition> hivePartitions = hiveClient.scanTablePartitions(HiveTestUtil.TABLE_NAME);
|
||||
List<PartitionEvent> partitionEvents = hiveClient.getPartitionEvents(hivePartitions, writtenPartitionsSince);
|
||||
List<org.apache.hudi.sync.common.model.Partition> hivePartitions = hiveClient.getAllPartitions(HiveTestUtil.TABLE_NAME);
|
||||
List<PartitionEvent> partitionEvents = hiveClient.getPartitionEvents(hivePartitions, writtenPartitionsSince, false);
|
||||
assertEquals(1, partitionEvents.size(), "There should be only one partition event");
|
||||
assertEquals(PartitionEventType.ADD, partitionEvents.iterator().next().eventType, "The one partition event must of type ADD");
|
||||
|
||||
@@ -755,7 +755,7 @@ public class TestHiveSyncTool {
|
||||
|
||||
reinitHiveSyncClient();
|
||||
reSyncHiveTable();
|
||||
assertTrue(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
|
||||
assertTrue(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
|
||||
"Table " + HiveTestUtil.TABLE_NAME + " should exist after sync completes");
|
||||
assertEquals(hiveClient.getTableSchema(HiveTestUtil.TABLE_NAME).size(),
|
||||
hiveClient.getDataSchema().getColumns().size() + 3,
|
||||
@@ -776,12 +776,12 @@ public class TestHiveSyncTool {
|
||||
HiveTestUtil.createCOWTable(instantTime, 1, true);
|
||||
|
||||
reinitHiveSyncClient();
|
||||
assertFalse(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
|
||||
assertFalse(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
|
||||
"Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
|
||||
// Lets do the sync
|
||||
reSyncHiveTable();
|
||||
|
||||
assertTrue(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
|
||||
assertTrue(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
|
||||
"Table " + HiveTestUtil.TABLE_NAME + " should exist after sync completes");
|
||||
assertEquals(hiveClient.getTableSchema(HiveTestUtil.TABLE_NAME).size(),
|
||||
hiveClient.getDataSchema().getColumns().size() + 1,
|
||||
@@ -820,11 +820,11 @@ public class TestHiveSyncTool {
|
||||
HiveTestUtil.createCOWTable(instantTime, 1, true);
|
||||
|
||||
reinitHiveSyncClient();
|
||||
assertFalse(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
|
||||
assertFalse(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
|
||||
"Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
|
||||
// Lets do the sync
|
||||
reSyncHiveTable();
|
||||
assertTrue(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
|
||||
assertTrue(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
|
||||
"Table " + HiveTestUtil.TABLE_NAME + " should exist after sync completes");
|
||||
assertEquals(hiveClient.getTableSchema(HiveTestUtil.TABLE_NAME).size(),
|
||||
hiveClient.getDataSchema().getColumns().size() + 1,
|
||||
@@ -860,11 +860,11 @@ public class TestHiveSyncTool {
|
||||
HiveTestUtil.getCreatedTablesSet().add(HiveTestUtil.DB_NAME + "." + HiveTestUtil.TABLE_NAME);
|
||||
|
||||
reinitHiveSyncClient();
|
||||
assertFalse(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
|
||||
assertFalse(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
|
||||
"Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
|
||||
// Lets do the sync
|
||||
reSyncHiveTable();
|
||||
assertTrue(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
|
||||
assertTrue(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
|
||||
"Table " + HiveTestUtil.TABLE_NAME + " should exist after sync completes");
|
||||
assertEquals(hiveClient.getTableSchema(HiveTestUtil.TABLE_NAME).size(),
|
||||
hiveClient.getDataSchema().getColumns().size(),
|
||||
@@ -882,13 +882,13 @@ public class TestHiveSyncTool {
|
||||
HiveTestUtil.createMORTable(commitTime, "", 5, false, true);
|
||||
reinitHiveSyncClient();
|
||||
|
||||
assertFalse(hiveClient.doesTableExist(snapshotTableName), "Table " + HiveTestUtil.TABLE_NAME + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE
|
||||
assertFalse(hiveClient.tableExists(snapshotTableName), "Table " + HiveTestUtil.TABLE_NAME + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE
|
||||
+ " should not exist initially");
|
||||
|
||||
// Lets do the sync
|
||||
reSyncHiveTable();
|
||||
|
||||
assertTrue(hiveClient.doesTableExist(snapshotTableName), "Table " + HiveTestUtil.TABLE_NAME + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE
|
||||
assertTrue(hiveClient.tableExists(snapshotTableName), "Table " + HiveTestUtil.TABLE_NAME + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE
|
||||
+ " should exist after sync completes");
|
||||
|
||||
// Schema being read from compacted base files
|
||||
@@ -925,7 +925,7 @@ public class TestHiveSyncTool {
|
||||
HiveTestUtil.createCOWTable(instantTime, 5, false);
|
||||
reinitHiveSyncClient();
|
||||
HoodieHiveClient prevHiveClient = hiveClient;
|
||||
assertFalse(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
|
||||
assertFalse(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
|
||||
"Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
|
||||
|
||||
// Lets do the sync
|
||||
@@ -936,12 +936,12 @@ public class TestHiveSyncTool {
|
||||
reSyncHiveTable();
|
||||
|
||||
assertNull(hiveClient);
|
||||
assertFalse(prevHiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
|
||||
assertFalse(prevHiveClient.tableExists(HiveTestUtil.TABLE_NAME),
|
||||
"Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
|
||||
}
|
||||
|
||||
private void verifyOldParquetFileTest(HoodieHiveClient hiveClient, String emptyCommitTime) throws Exception {
|
||||
assertTrue(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME), "Table " + HiveTestUtil.TABLE_NAME + " should exist after sync completes");
|
||||
assertTrue(hiveClient.tableExists(HiveTestUtil.TABLE_NAME), "Table " + HiveTestUtil.TABLE_NAME + " should exist after sync completes");
|
||||
assertEquals(hiveClient.getTableSchema(HiveTestUtil.TABLE_NAME).size(),
|
||||
hiveClient.getDataSchema().getColumns().size() + 1,
|
||||
"Hive Schema should match the table schema + partition field");
|
||||
@@ -973,7 +973,7 @@ public class TestHiveSyncTool {
|
||||
final String emptyCommitTime = "200";
|
||||
HiveTestUtil.createCommitFileWithSchema(commitMetadata, emptyCommitTime, true);
|
||||
reinitHiveSyncClient();
|
||||
assertFalse(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME), "Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
|
||||
assertFalse(hiveClient.tableExists(HiveTestUtil.TABLE_NAME), "Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
|
||||
|
||||
reinitHiveSyncClient();
|
||||
reSyncHiveTable();
|
||||
@@ -1000,7 +1000,7 @@ public class TestHiveSyncTool {
|
||||
|
||||
reinitHiveSyncClient();
|
||||
assertFalse(
|
||||
hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME), "Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
|
||||
hiveClient.tableExists(HiveTestUtil.TABLE_NAME), "Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
|
||||
|
||||
HiveSyncTool tool = new HiveSyncTool(hiveSyncProps, getHiveConf(), fileSystem);
|
||||
// now delete the evolved commit instant
|
||||
@@ -1017,7 +1017,7 @@ public class TestHiveSyncTool {
|
||||
}
|
||||
|
||||
// table should not be synced yet
|
||||
assertFalse(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME), "Table " + HiveTestUtil.TABLE_NAME + " should not exist at all");
|
||||
assertFalse(hiveClient.tableExists(HiveTestUtil.TABLE_NAME), "Table " + HiveTestUtil.TABLE_NAME + " should not exist at all");
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@@ -1033,7 +1033,7 @@ public class TestHiveSyncTool {
|
||||
//HiveTestUtil.createCommitFile(commitMetadata, emptyCommitTime);
|
||||
reinitHiveSyncClient();
|
||||
assertFalse(
|
||||
hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME), "Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
|
||||
hiveClient.tableExists(HiveTestUtil.TABLE_NAME), "Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
|
||||
|
||||
reSyncHiveTable();
|
||||
|
||||
@@ -1120,7 +1120,7 @@ public class TestHiveSyncTool {
|
||||
reinitHiveSyncClient();
|
||||
reSyncHiveTable();
|
||||
|
||||
assertTrue(hiveClient.doesTableExist(tableName));
|
||||
assertTrue(hiveClient.tableExists(tableName));
|
||||
assertEquals(commitTime1, hiveClient.getLastCommitTimeSynced(tableName).get());
|
||||
|
||||
HiveTestUtil.addMORPartitions(0, true, true, true, ZonedDateTime.now().plusDays(2), commitTime1, commitTime2);
|
||||
@@ -1138,7 +1138,7 @@ public class TestHiveSyncTool {
|
||||
|
||||
private void reinitHiveSyncClient() {
|
||||
hiveSyncTool = new HiveSyncTool(hiveSyncProps, HiveTestUtil.getHiveConf(), fileSystem);
|
||||
hiveClient = hiveSyncTool.hoodieHiveClient;
|
||||
hiveClient = (HoodieHiveClient) hiveSyncTool.hoodieHiveClient;
|
||||
}
|
||||
|
||||
private int getPartitionFieldSize() {
|
||||
|
||||
@@ -18,8 +18,6 @@
|
||||
|
||||
package org.apache.hudi.sync.common;
|
||||
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hudi.common.engine.HoodieLocalEngineContext;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.model.HoodieCommitMetadata;
|
||||
@@ -31,6 +29,9 @@ import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.timeline.TimelineUtils;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.common.util.ValidationUtils;
|
||||
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.parquet.schema.MessageType;
|
||||
@@ -43,10 +44,11 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
public abstract class AbstractSyncHoodieClient {
|
||||
public abstract class AbstractSyncHoodieClient implements AutoCloseable {
|
||||
|
||||
private static final Logger LOG = LogManager.getLogger(AbstractSyncHoodieClient.class);
|
||||
|
||||
public static final String HOODIE_LAST_COMMIT_TIME_SYNC = "last_commit_time_sync";
|
||||
public static final TypeConverter TYPE_CONVERTOR = new TypeConverter() {};
|
||||
|
||||
protected final HoodieTableMetaClient metaClient;
|
||||
@@ -89,12 +91,24 @@ public abstract class AbstractSyncHoodieClient {
|
||||
String serdeClass, Map<String, String> serdeProperties,
|
||||
Map<String, String> tableProperties);
|
||||
|
||||
/**
|
||||
* @deprecated Use {@link #tableExists} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
public abstract boolean doesTableExist(String tableName);
|
||||
|
||||
public abstract boolean tableExists(String tableName);
|
||||
|
||||
public abstract Option<String> getLastCommitTimeSynced(String tableName);
|
||||
|
||||
public abstract void updateLastCommitTimeSynced(String tableName);
|
||||
|
||||
public abstract Option<String> getLastReplicatedTime(String tableName);
|
||||
|
||||
public abstract void updateLastReplicatedTimeStamp(String tableName, String timeStamp);
|
||||
|
||||
public abstract void deleteLastReplicatedTimeStamp(String tableName);
|
||||
|
||||
public abstract void addPartitionsToTable(String tableName, List<String> partitionsToAdd);
|
||||
|
||||
public abstract void updatePartitionsToTable(String tableName, List<String> changedPartitions);
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.sync.common.model;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class Partition {
|
||||
|
||||
private final List<String> values;
|
||||
|
||||
private final String storageLocation;
|
||||
|
||||
public Partition(List<String> values, String storageLocation) {
|
||||
this.values = values;
|
||||
this.storageLocation = storageLocation;
|
||||
}
|
||||
|
||||
public List<String> getValues() {
|
||||
return values;
|
||||
}
|
||||
|
||||
public String getStorageLocation() {
|
||||
return storageLocation;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.sync.common.util;
|
||||
|
||||
public final class TableUtils {
|
||||
|
||||
public static String tableId(String database, String table) {
|
||||
return String.format("%s.%s", database, table);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user