1
0

[HUDI-2757] Implement Hudi AWS Glue sync (#5076)

This commit is contained in:
Raymond Xu
2022-03-28 11:54:59 -07:00
committed by GitHub
parent 4ed84b216d
commit 6ccbae4d2a
25 changed files with 1151 additions and 204 deletions

View File

@@ -114,7 +114,7 @@ public class DLASyncTool extends AbstractSyncTool {
LOG.info("Trying to sync hoodie table " + tableName + " with base path " + hoodieDLAClient.getBasePath()
+ " of type " + hoodieDLAClient.getTableType());
// Check if the necessary table exists
boolean tableExists = hoodieDLAClient.doesTableExist(tableName);
boolean tableExists = hoodieDLAClient.tableExists(tableName);
// Get the parquet schema for this table looking at the latest commit
MessageType schema = hoodieDLAClient.getDataSchema();
// Sync schema if needed

View File

@@ -18,8 +18,6 @@
package org.apache.hudi.dla;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
@@ -31,14 +29,17 @@ import org.apache.hudi.hive.PartitionValueExtractor;
import org.apache.hudi.hive.SchemaDifference;
import org.apache.hudi.hive.util.HiveSchemaUtil;
import org.apache.hudi.sync.common.AbstractSyncHoodieClient;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.parquet.schema.MessageType;
import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.DatabaseMetaData;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
@@ -115,7 +116,7 @@ public class HoodieDLAClient extends AbstractSyncHoodieClient {
}
public Map<String, String> getTableSchema(String tableName) {
if (!doesTableExist(tableName)) {
if (!tableExists(tableName)) {
throw new IllegalArgumentException(
"Failed to get schema for table " + tableName + " does not exist");
}
@@ -222,6 +223,11 @@ public class HoodieDLAClient extends AbstractSyncHoodieClient {
@Override
public boolean doesTableExist(String tableName) {
return tableExists(tableName);
}
@Override
public boolean tableExists(String tableName) {
String sql = consutructShowCreateTableSQL(tableName);
Statement stmt = null;
ResultSet rs = null;
@@ -274,6 +280,22 @@ public class HoodieDLAClient extends AbstractSyncHoodieClient {
// TODO : dla do not support update tblproperties, so do nothing.
}
@Override
public Option<String> getLastReplicatedTime(String tableName) {
// no op; unsupported
return Option.empty();
}
@Override
public void updateLastReplicatedTimeStamp(String tableName, String timeStamp) {
// no op; unsupported
}
@Override
public void deleteLastReplicatedTimeStamp(String tableName) {
// no op; unsupported
}
@Override
public void updatePartitionsToTable(String tableName, List<String> changedPartitions) {
if (changedPartitions.isEmpty()) {
@@ -370,6 +392,7 @@ public class HoodieDLAClient extends AbstractSyncHoodieClient {
}
}
@Override
public void close() {
try {
if (connection != null) {

View File

@@ -0,0 +1,142 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.hive;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.table.TableSchemaResolver;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.util.ReflectionUtils;
import org.apache.hudi.sync.common.AbstractSyncHoodieClient;
import org.apache.hudi.sync.common.HoodieSyncException;
import org.apache.hudi.sync.common.model.Partition;
import org.apache.avro.Schema;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.parquet.schema.MessageType;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Base class to sync Hudi tables with Hive based metastores, such as Hive server, HMS or managed Hive services.
*/
public abstract class AbstractHiveSyncHoodieClient extends AbstractSyncHoodieClient {
protected final HoodieTimeline activeTimeline;
protected final HiveSyncConfig syncConfig;
protected final Configuration hadoopConf;
protected final PartitionValueExtractor partitionValueExtractor;
public AbstractHiveSyncHoodieClient(HiveSyncConfig syncConfig, Configuration hadoopConf, FileSystem fs) {
super(syncConfig.basePath, syncConfig.assumeDatePartitioning, syncConfig.useFileListingFromMetadata, syncConfig.withOperationField, fs);
this.syncConfig = syncConfig;
this.hadoopConf = hadoopConf;
this.partitionValueExtractor = ReflectionUtils.loadClass(syncConfig.partitionValueExtractorClass);
this.activeTimeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
}
public HoodieTimeline getActiveTimeline() {
return activeTimeline;
}
/**
* Iterate over the storage partitions and find if there are any new partitions that need to be added or updated.
* Generate a list of PartitionEvent based on the changes required.
*/
protected List<PartitionEvent> getPartitionEvents(List<Partition> tablePartitions, List<String> partitionStoragePartitions, boolean isDropPartition) {
Map<String, String> paths = new HashMap<>();
for (Partition tablePartition : tablePartitions) {
List<String> hivePartitionValues = tablePartition.getValues();
String fullTablePartitionPath =
Path.getPathWithoutSchemeAndAuthority(new Path(tablePartition.getStorageLocation())).toUri().getPath();
paths.put(String.join(", ", hivePartitionValues), fullTablePartitionPath);
}
List<PartitionEvent> events = new ArrayList<>();
for (String storagePartition : partitionStoragePartitions) {
Path storagePartitionPath = FSUtils.getPartitionPath(syncConfig.basePath, storagePartition);
String fullStoragePartitionPath = Path.getPathWithoutSchemeAndAuthority(storagePartitionPath).toUri().getPath();
// Check if the partition values or if hdfs path is the same
List<String> storagePartitionValues = partitionValueExtractor.extractPartitionValuesInPath(storagePartition);
if (isDropPartition) {
events.add(PartitionEvent.newPartitionDropEvent(storagePartition));
} else {
if (!storagePartitionValues.isEmpty()) {
String storageValue = String.join(", ", storagePartitionValues);
if (!paths.containsKey(storageValue)) {
events.add(PartitionEvent.newPartitionAddEvent(storagePartition));
} else if (!paths.get(storageValue).equals(fullStoragePartitionPath)) {
events.add(PartitionEvent.newPartitionUpdateEvent(storagePartition));
}
}
}
}
return events;
}
/**
* Get all partitions for the table in the metastore.
*/
public abstract List<Partition> getAllPartitions(String tableName);
/**
* Check if a database already exists in the metastore.
*/
public abstract boolean databaseExists(String databaseName);
/**
* Create a database in the metastore.
*/
public abstract void createDatabase(String databaseName);
/**
* Update schema for the table in the metastore.
*/
public abstract void updateTableDefinition(String tableName, MessageType newSchema);
/*
* APIs below need to be re-worked by modeling field comment in hudi-sync-common,
* instead of relying on Avro or Hive schema class.
*/
public Schema getAvroSchemaWithoutMetadataFields() {
try {
return new TableSchemaResolver(metaClient).getTableAvroSchemaWithoutMetadataFields();
} catch (Exception e) {
throw new HoodieSyncException("Failed to read avro schema", e);
}
}
public abstract List<FieldSchema> getTableCommentUsingMetastoreClient(String tableName);
public abstract void updateTableComments(String tableName, List<FieldSchema> oldSchema, List<Schema.Field> newSchema);
public abstract void updateTableComments(String tableName, List<FieldSchema> oldSchema, Map<String, String> newComments);
/*
* APIs above need to be re-worked by modeling field comment in hudi-sync-common,
* instead of relying on Avro or Hive schema class.
*/
}

View File

@@ -18,14 +18,6 @@
package org.apache.hudi.hive;
import com.beust.jcommander.JCommander;
import org.apache.avro.Schema;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.HoodieFileFormat;
@@ -41,7 +33,14 @@ import org.apache.hudi.hive.util.Parquet2SparkSchemaUtils;
import org.apache.hudi.sync.common.AbstractSyncHoodieClient.PartitionEvent;
import org.apache.hudi.sync.common.AbstractSyncHoodieClient.PartitionEvent.PartitionEventType;
import org.apache.hudi.sync.common.AbstractSyncTool;
import org.apache.hudi.sync.common.model.Partition;
import com.beust.jcommander.JCommander;
import org.apache.avro.Schema;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.parquet.schema.GroupType;
@@ -66,35 +65,33 @@ import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY;
* partitions incrementally (all the partitions modified since the last commit)
*/
@SuppressWarnings("WeakerAccess")
public class HiveSyncTool extends AbstractSyncTool {
public class HiveSyncTool extends AbstractSyncTool implements AutoCloseable {
private static final Logger LOG = LogManager.getLogger(HiveSyncTool.class);
public static final String SUFFIX_SNAPSHOT_TABLE = "_rt";
public static final String SUFFIX_READ_OPTIMIZED_TABLE = "_ro";
protected final HiveSyncConfig hiveSyncConfig;
protected HoodieHiveClient hoodieHiveClient = null;
protected HiveSyncConfig hiveSyncConfig;
protected AbstractHiveSyncHoodieClient hoodieHiveClient;
protected String snapshotTableName = null;
protected Option<String> roTableName = null;
public HiveSyncTool(TypedProperties props, Configuration conf, FileSystem fs) {
super(props, conf, fs);
this.hiveSyncConfig = new HiveSyncConfig(props);
init(hiveSyncConfig, new HiveConf(conf, HiveConf.class));
this(new HiveSyncConfig(props), new HiveConf(conf, HiveConf.class), fs);
}
@Deprecated
public HiveSyncTool(HiveSyncConfig hiveSyncConfig, HiveConf hiveConf, FileSystem fs) {
super(hiveSyncConfig.getProps(), hiveConf, fs);
this.hiveSyncConfig = hiveSyncConfig;
init(hiveSyncConfig, hiveConf);
// TODO: reconcile the way to set METASTOREURIS
if (StringUtils.isNullOrEmpty(hiveConf.get(HiveConf.ConfVars.METASTOREURIS.varname))) {
hiveConf.set(HiveConf.ConfVars.METASTOREURIS.varname, hiveSyncConfig.metastoreUris);
}
initClient(hiveSyncConfig, hiveConf);
initConfig(hiveSyncConfig);
}
private void init(HiveSyncConfig hiveSyncConfig, HiveConf hiveConf) {
protected void initClient(HiveSyncConfig hiveSyncConfig, HiveConf hiveConf) {
try {
if (StringUtils.isNullOrEmpty(hiveConf.get(HiveConf.ConfVars.METASTOREURIS.varname))) {
hiveConf.set(HiveConf.ConfVars.METASTOREURIS.varname, hiveSyncConfig.metastoreUris);
}
this.hoodieHiveClient = new HoodieHiveClient(hiveSyncConfig, hiveConf, fs);
} catch (RuntimeException e) {
if (hiveSyncConfig.ignoreExceptions) {
@@ -103,12 +100,16 @@ public class HiveSyncTool extends AbstractSyncTool {
throw new HoodieHiveSyncException("Got runtime exception when hive syncing", e);
}
}
}
private void initConfig(HiveSyncConfig hiveSyncConfig) {
// Set partitionFields to empty, when the NonPartitionedExtractor is used
// TODO: HiveSyncConfig should be responsible for inferring config value
if (NonPartitionedExtractor.class.getName().equals(hiveSyncConfig.partitionValueExtractorClass)) {
LOG.warn("Set partitionFields to empty, since the NonPartitionedExtractor is used");
hiveSyncConfig.partitionFields = new ArrayList<>();
}
this.hiveSyncConfig = hiveSyncConfig;
if (hoodieHiveClient != null) {
switch (hoodieHiveClient.getTableType()) {
case COPY_ON_WRITE:
@@ -139,9 +140,7 @@ public class HiveSyncTool extends AbstractSyncTool {
} catch (RuntimeException re) {
throw new HoodieException("Got runtime exception when hive syncing " + hiveSyncConfig.tableName, re);
} finally {
if (hoodieHiveClient != null) {
hoodieHiveClient.close();
}
close();
}
}
@@ -162,6 +161,17 @@ public class HiveSyncTool extends AbstractSyncTool {
}
}
@Override
public void close() {
if (hoodieHiveClient != null) {
try {
hoodieHiveClient.close();
} catch (Exception e) {
throw new HoodieHiveSyncException("Fail to close sync client.", e);
}
}
}
protected void syncHoodieTable(String tableName, boolean useRealtimeInputFormat,
boolean readAsOptimized) {
LOG.info("Trying to sync hoodie table " + tableName + " with base path " + hoodieHiveClient.getBasePath()
@@ -170,7 +180,7 @@ public class HiveSyncTool extends AbstractSyncTool {
// check if the database exists else create it
if (hiveSyncConfig.autoCreateDatabase) {
try {
if (!hoodieHiveClient.doesDataBaseExist(hiveSyncConfig.databaseName)) {
if (!hoodieHiveClient.databaseExists(hiveSyncConfig.databaseName)) {
hoodieHiveClient.createDatabase(hiveSyncConfig.databaseName);
}
} catch (Exception e) {
@@ -178,14 +188,14 @@ public class HiveSyncTool extends AbstractSyncTool {
LOG.warn("Unable to create database", e);
}
} else {
if (!hoodieHiveClient.doesDataBaseExist(hiveSyncConfig.databaseName)) {
if (!hoodieHiveClient.databaseExists(hiveSyncConfig.databaseName)) {
LOG.error("Hive database does not exist " + hiveSyncConfig.databaseName);
throw new HoodieHiveSyncException("hive database does not exist " + hiveSyncConfig.databaseName);
}
}
// Check if the necessary table exists
boolean tableExists = hoodieHiveClient.doesTableExist(tableName);
boolean tableExists = hoodieHiveClient.tableExists(tableName);
// check if isDropPartition
boolean isDropPartition = hoodieHiveClient.isDropPartition();
@@ -375,7 +385,7 @@ public class HiveSyncTool extends AbstractSyncTool {
private boolean syncPartitions(String tableName, List<String> writtenPartitionsSince, boolean isDropPartition) {
boolean partitionsChanged;
try {
List<Partition> hivePartitions = hoodieHiveClient.scanTablePartitions(tableName);
List<Partition> hivePartitions = hoodieHiveClient.getAllPartitions(tableName);
List<PartitionEvent> partitionEvents =
hoodieHiveClient.getPartitionEvents(hivePartitions, writtenPartitionsSince, isDropPartition);

View File

@@ -18,10 +18,7 @@
package org.apache.hudi.hive;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.table.TableSchemaResolver;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.common.util.collection.ImmutablePair;
@@ -30,18 +27,14 @@ import org.apache.hudi.hive.ddl.HMSDDLExecutor;
import org.apache.hudi.hive.ddl.HiveQueryDDLExecutor;
import org.apache.hudi.hive.ddl.HiveSyncMode;
import org.apache.hudi.hive.ddl.JDBCExecutor;
import org.apache.hudi.hive.util.HiveSchemaUtil;
import org.apache.hudi.sync.common.AbstractSyncHoodieClient;
import org.apache.hudi.sync.common.HoodieSyncException;
import org.apache.hudi.sync.common.model.Partition;
import org.apache.avro.Schema;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.log4j.LogManager;
@@ -49,7 +42,6 @@ import org.apache.log4j.Logger;
import org.apache.parquet.schema.MessageType;
import org.apache.thrift.TException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
@@ -57,22 +49,19 @@ import java.util.Map;
import java.util.stream.Collectors;
import static org.apache.hudi.hadoop.utils.HoodieHiveUtils.GLOBALLY_CONSISTENT_READ_TIMESTAMP;
import static org.apache.hudi.sync.common.util.TableUtils.tableId;
public class HoodieHiveClient extends AbstractSyncHoodieClient {
private static final String HOODIE_LAST_COMMIT_TIME_SYNC = "last_commit_time_sync";
private static final String HIVE_ESCAPE_CHARACTER = HiveSchemaUtil.HIVE_ESCAPE_CHARACTER;
/**
* This class implements logic to sync a Hudi table with either the Hive server or the Hive Metastore.
*/
public class HoodieHiveClient extends AbstractHiveSyncHoodieClient {
private static final Logger LOG = LogManager.getLogger(HoodieHiveClient.class);
private final PartitionValueExtractor partitionValueExtractor;
private final HoodieTimeline activeTimeline;
DDLExecutor ddlExecutor;
private IMetaStoreClient client;
private final HiveSyncConfig syncConfig;
public HoodieHiveClient(HiveSyncConfig cfg, HiveConf configuration, FileSystem fs) {
super(cfg.basePath, cfg.assumeDatePartitioning, cfg.useFileListingFromMetadata, cfg.withOperationField, fs);
this.syncConfig = cfg;
super(cfg, configuration, fs);
// Support JDBC, HiveQL and metastore based implementations for backwards compatibility. Future users should
// disable jdbc and depend on metastore client for all hive registrations
@@ -99,20 +88,6 @@ public class HoodieHiveClient extends AbstractSyncHoodieClient {
} catch (Exception e) {
throw new HoodieHiveSyncException("Failed to create HiveMetaStoreClient", e);
}
try {
this.partitionValueExtractor =
(PartitionValueExtractor) Class.forName(cfg.partitionValueExtractorClass).newInstance();
} catch (Exception e) {
throw new HoodieHiveSyncException(
"Failed to initialize PartitionValueExtractor class " + cfg.partitionValueExtractorClass, e);
}
activeTimeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
}
public HoodieTimeline getActiveTimeline() {
return activeTimeline;
}
/**
@@ -159,61 +134,33 @@ public class HoodieHiveClient extends AbstractSyncHoodieClient {
}
}
/**
* Iterate over the storage partitions and find if there are any new partitions that need to be added or updated.
* Generate a list of PartitionEvent based on the changes required.
*/
List<PartitionEvent> getPartitionEvents(List<Partition> tablePartitions, List<String> partitionStoragePartitions) {
return getPartitionEvents(tablePartitions, partitionStoragePartitions, false);
}
/**
* Iterate over the storage partitions and find if there are any new partitions that need to be added or updated.
* Generate a list of PartitionEvent based on the changes required.
*/
List<PartitionEvent> getPartitionEvents(List<Partition> tablePartitions, List<String> partitionStoragePartitions, boolean isDropPartition) {
Map<String, String> paths = new HashMap<>();
for (Partition tablePartition : tablePartitions) {
List<String> hivePartitionValues = tablePartition.getValues();
String fullTablePartitionPath =
Path.getPathWithoutSchemeAndAuthority(new Path(tablePartition.getSd().getLocation())).toUri().getPath();
paths.put(String.join(", ", hivePartitionValues), fullTablePartitionPath);
}
List<PartitionEvent> events = new ArrayList<>();
for (String storagePartition : partitionStoragePartitions) {
Path storagePartitionPath = FSUtils.getPartitionPath(syncConfig.basePath, storagePartition);
String fullStoragePartitionPath = Path.getPathWithoutSchemeAndAuthority(storagePartitionPath).toUri().getPath();
// Check if the partition values or if hdfs path is the same
List<String> storagePartitionValues = partitionValueExtractor.extractPartitionValuesInPath(storagePartition);
if (isDropPartition) {
events.add(PartitionEvent.newPartitionDropEvent(storagePartition));
} else {
if (!storagePartitionValues.isEmpty()) {
String storageValue = String.join(", ", storagePartitionValues);
if (!paths.containsKey(storageValue)) {
events.add(PartitionEvent.newPartitionAddEvent(storagePartition));
} else if (!paths.get(storageValue).equals(fullStoragePartitionPath)) {
events.add(PartitionEvent.newPartitionUpdateEvent(storagePartition));
}
}
}
}
return events;
}
/**
* Scan table partitions.
*
* @deprecated Use {@link #getAllPartitions} instead.
*/
public List<Partition> scanTablePartitions(String tableName) throws TException {
@Deprecated
public List<org.apache.hadoop.hive.metastore.api.Partition> scanTablePartitions(String tableName) throws TException {
return client.listPartitions(syncConfig.databaseName, tableName, (short) -1);
}
void updateTableDefinition(String tableName, MessageType newSchema) {
@Override
public void updateTableDefinition(String tableName, MessageType newSchema) {
ddlExecutor.updateTableDefinition(tableName, newSchema);
}
@Override
public List<Partition> getAllPartitions(String tableName) {
try {
return client.listPartitions(syncConfig.databaseName, tableName, (short) -1)
.stream()
.map(p -> new Partition(p.getValues(), p.getSd().getLocation()))
.collect(Collectors.toList());
} catch (TException e) {
throw new HoodieHiveSyncException("Failed to get all partitions for table " + tableId(syncConfig.databaseName, tableName), e);
}
}
@Override
public void createTable(String tableName, MessageType storageSchema, String inputFormatClass,
String outputFormatClass, String serdeClass,
@@ -226,18 +173,21 @@ public class HoodieHiveClient extends AbstractSyncHoodieClient {
*/
@Override
public Map<String, String> getTableSchema(String tableName) {
if (!doesTableExist(tableName)) {
if (!tableExists(tableName)) {
throw new IllegalArgumentException(
"Failed to get schema for table " + tableName + " does not exist");
}
return ddlExecutor.getTableSchema(tableName);
}
/**
* @return true if the configured table exists
*/
@Deprecated
@Override
public boolean doesTableExist(String tableName) {
return tableExists(tableName);
}
@Override
public boolean tableExists(String tableName) {
try {
return client.tableExists(syncConfig.databaseName, tableName);
} catch (TException e) {
@@ -245,11 +195,13 @@ public class HoodieHiveClient extends AbstractSyncHoodieClient {
}
}
/**
* @param databaseName
* @return true if the configured database exists
*/
@Deprecated
public boolean doesDataBaseExist(String databaseName) {
return databaseExists(databaseName);
}
@Override
public boolean databaseExists(String databaseName) {
try {
client.getDatabase(databaseName);
return true;
@@ -261,6 +213,7 @@ public class HoodieHiveClient extends AbstractSyncHoodieClient {
}
}
@Override
public void createDatabase(String databaseName) {
ddlExecutor.createDatabase(databaseName);
}
@@ -321,6 +274,7 @@ public class HoodieHiveClient extends AbstractSyncHoodieClient {
}
}
@Override
public void close() {
try {
ddlExecutor.close();
@@ -333,10 +287,6 @@ public class HoodieHiveClient extends AbstractSyncHoodieClient {
}
}
List<String> getAllTables(String db) throws Exception {
return client.getAllTables(db);
}
@Override
public void updateLastCommitTimeSynced(String tableName) {
// Set the last commit time from the TBLproperties
@@ -352,14 +302,7 @@ public class HoodieHiveClient extends AbstractSyncHoodieClient {
}
}
public Schema getAvroSchemaWithoutMetadataFields() {
try {
return new TableSchemaResolver(metaClient).getTableAvroSchemaWithoutMetadataFields();
} catch (Exception e) {
throw new HoodieSyncException("Failed to read avro schema", e);
}
}
@Override
public List<FieldSchema> getTableCommentUsingMetastoreClient(String tableName) {
try {
return client.getSchema(syncConfig.databaseName, tableName);
@@ -368,11 +311,13 @@ public class HoodieHiveClient extends AbstractSyncHoodieClient {
}
}
@Override
public void updateTableComments(String tableName, List<FieldSchema> oldSchema, List<Schema.Field> newSchema) {
Map<String,String> newComments = newSchema.stream().collect(Collectors.toMap(field -> field.name().toLowerCase(Locale.ROOT), field -> StringUtils.isNullOrEmpty(field.doc()) ? "" : field.doc()));
updateTableComments(tableName,oldSchema,newComments);
}
@Override
public void updateTableComments(String tableName, List<FieldSchema> oldSchema, Map<String,String> newComments) {
Map<String,String> oldComments = oldSchema.stream().collect(Collectors.toMap(fieldSchema -> fieldSchema.getName().toLowerCase(Locale.ROOT),
fieldSchema -> StringUtils.isNullOrEmpty(fieldSchema.getComment()) ? "" : fieldSchema.getComment()));

View File

@@ -20,10 +20,6 @@ package org.apache.hudi.hive;
public class HoodieHiveSyncException extends RuntimeException {
public HoodieHiveSyncException() {
super();
}
public HoodieHiveSyncException(String message) {
super(message);
}
@@ -32,11 +28,4 @@ public class HoodieHiveSyncException extends RuntimeException {
super(message, t);
}
public HoodieHiveSyncException(Throwable t) {
super(t);
}
protected static String format(String message, Object... args) {
return String.format(String.valueOf(message), (Object[]) args);
}
}

View File

@@ -30,11 +30,12 @@ import java.util.Map;
* There are two main implementations one is QueryBased other is based on HiveMetaStore
* QueryBasedDDLExecutor also has two implementations namely HiveQL based and other JDBC based.
*/
public interface DDLExecutor {
public interface DDLExecutor extends AutoCloseable {
/**
* @param databaseName name of database to be created.
*/
public void createDatabase(String databaseName);
void createDatabase(String databaseName);
/**
* Creates a table with the following properties.
@@ -47,9 +48,9 @@ public interface DDLExecutor {
* @param serdeProperties
* @param tableProperties
*/
public void createTable(String tableName, MessageType storageSchema, String inputFormatClass,
String outputFormatClass, String serdeClass,
Map<String, String> serdeProperties, Map<String, String> tableProperties);
void createTable(String tableName, MessageType storageSchema, String inputFormatClass,
String outputFormatClass, String serdeClass,
Map<String, String> serdeProperties, Map<String, String> tableProperties);
/**
* Updates the table with the newSchema.
@@ -57,7 +58,7 @@ public interface DDLExecutor {
* @param tableName
* @param newSchema
*/
public void updateTableDefinition(String tableName, MessageType newSchema);
void updateTableDefinition(String tableName, MessageType newSchema);
/**
* Fetches tableSchema for a table.
@@ -65,7 +66,7 @@ public interface DDLExecutor {
* @param tableName
* @return
*/
public Map<String, String> getTableSchema(String tableName);
Map<String, String> getTableSchema(String tableName);
/**
* Adds partition to table.
@@ -73,7 +74,7 @@ public interface DDLExecutor {
* @param tableName
* @param partitionsToAdd
*/
public void addPartitionsToTable(String tableName, List<String> partitionsToAdd);
void addPartitionsToTable(String tableName, List<String> partitionsToAdd);
/**
* Updates partitions for a given table.
@@ -81,7 +82,7 @@ public interface DDLExecutor {
* @param tableName
* @param changedPartitions
*/
public void updatePartitionsToTable(String tableName, List<String> changedPartitions);
void updatePartitionsToTable(String tableName, List<String> changedPartitions);
/**
* Drop partitions for a given table.
@@ -89,15 +90,13 @@ public interface DDLExecutor {
* @param tableName
* @param partitionsToDrop
*/
public void dropPartitionsToTable(String tableName, List<String> partitionsToDrop);
void dropPartitionsToTable(String tableName, List<String> partitionsToDrop);
/**
* update table comments
*
* @param tableName
* @param newSchema
* @param newSchema Map key: field name, Map value: [field type, field comment]
*/
public void updateTableComments(String tableName, Map<String, ImmutablePair<String,String>> newSchema);
public void close();
void updateTableComments(String tableName, Map<String, ImmutablePair<String, String>> newSchema);
}

View File

@@ -55,10 +55,6 @@ public class GlobalHiveSyncTool extends HiveSyncTool {
LOG.info("Sync complete for " + tableName);
}
public void close() {
hoodieHiveClient.close();
}
public Map<String, Option<String>> getLastReplicatedTimeStampMap() {
Map<String, Option<String>> timeStampMap = new HashMap<>();
Option<String> timeStamp = hoodieHiveClient.getLastReplicatedTime(snapshotTableName);

View File

@@ -138,12 +138,12 @@ public class TestHiveSyncTool {
HiveTestUtil.createCOWTable(instantTime, 5, useSchemaFromCommitMetadata);
reinitHiveSyncClient();
assertFalse(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
assertFalse(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
"Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
// Lets do the sync
reSyncHiveTable();
assertTrue(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
assertTrue(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
"Table " + HiveTestUtil.TABLE_NAME + " should exist after sync completes");
assertEquals(hiveClient.getTableSchema(HiveTestUtil.TABLE_NAME).size(),
hiveClient.getDataSchema().getColumns().size() + 1,
@@ -176,9 +176,9 @@ public class TestHiveSyncTool {
ddlExecutor.runSQL("ALTER TABLE `" + HiveTestUtil.TABLE_NAME
+ "` PARTITION (`datestr`='2050-01-01') SET LOCATION '/some/new/location'");
List<Partition> hivePartitions = hiveClient.scanTablePartitions(HiveTestUtil.TABLE_NAME);
List<org.apache.hudi.sync.common.model.Partition> hivePartitions = hiveClient.getAllPartitions(HiveTestUtil.TABLE_NAME);
List<String> writtenPartitionsSince = hiveClient.getPartitionsWrittenToSince(Option.empty());
List<PartitionEvent> partitionEvents = hiveClient.getPartitionEvents(hivePartitions, writtenPartitionsSince);
List<PartitionEvent> partitionEvents = hiveClient.getPartitionEvents(hivePartitions, writtenPartitionsSince, false);
assertEquals(1, partitionEvents.size(), "There should be only one partition event");
assertEquals(PartitionEventType.UPDATE, partitionEvents.iterator().next().eventType,
"The one partition event must of type UPDATE");
@@ -211,20 +211,20 @@ public class TestHiveSyncTool {
hiveSyncProps.setProperty(HiveSyncConfig.HIVE_AUTO_CREATE_DATABASE.key(), "true");
reinitHiveSyncClient();
assertDoesNotThrow((this::reSyncHiveTable));
assertTrue(hiveClient.doesDataBaseExist(HiveTestUtil.DB_NAME),
assertTrue(hiveClient.databaseExists(HiveTestUtil.DB_NAME),
"DataBases " + HiveTestUtil.DB_NAME + " should exist after sync completes");
// while autoCreateDatabase is false and database exists;
hiveSyncProps.setProperty(HiveSyncConfig.HIVE_AUTO_CREATE_DATABASE.key(), "false");
reinitHiveSyncClient();
assertDoesNotThrow((this::reSyncHiveTable));
assertTrue(hiveClient.doesDataBaseExist(HiveTestUtil.DB_NAME),
assertTrue(hiveClient.databaseExists(HiveTestUtil.DB_NAME),
"DataBases " + HiveTestUtil.DB_NAME + " should exist after sync completes");
// while autoCreateDatabase is true and database exists;
hiveSyncProps.setProperty(HiveSyncConfig.HIVE_AUTO_CREATE_DATABASE.key(), "true");
assertDoesNotThrow((this::reSyncHiveTable));
assertTrue(hiveClient.doesDataBaseExist(HiveTestUtil.DB_NAME),
assertTrue(hiveClient.databaseExists(HiveTestUtil.DB_NAME),
"DataBases " + HiveTestUtil.DB_NAME + " should exist after sync completes");
}
@@ -457,8 +457,8 @@ public class TestHiveSyncTool {
reSyncHiveTable();
List<String> writtenPartitionsSince = hiveClient.getPartitionsWrittenToSince(Option.of(commitTime1));
assertEquals(1, writtenPartitionsSince.size(), "We should have one partition written after 100 commit");
List<Partition> hivePartitions = hiveClient.scanTablePartitions(HiveTestUtil.TABLE_NAME);
List<PartitionEvent> partitionEvents = hiveClient.getPartitionEvents(hivePartitions, writtenPartitionsSince);
List<org.apache.hudi.sync.common.model.Partition> hivePartitions = hiveClient.getAllPartitions(HiveTestUtil.TABLE_NAME);
List<PartitionEvent> partitionEvents = hiveClient.getPartitionEvents(hivePartitions, writtenPartitionsSince, false);
assertEquals(1, partitionEvents.size(), "There should be only one partition event");
assertEquals(PartitionEventType.ADD, partitionEvents.iterator().next().eventType, "The one partition event must of type ADD");
@@ -581,11 +581,11 @@ public class TestHiveSyncTool {
String roTableName = HiveTestUtil.TABLE_NAME + HiveSyncTool.SUFFIX_READ_OPTIMIZED_TABLE;
reinitHiveSyncClient();
assertFalse(hiveClient.doesTableExist(roTableName), "Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
assertFalse(hiveClient.tableExists(roTableName), "Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
// Lets do the sync
reSyncHiveTable();
assertTrue(hiveClient.doesTableExist(roTableName), "Table " + roTableName + " should exist after sync completes");
assertTrue(hiveClient.tableExists(roTableName), "Table " + roTableName + " should exist after sync completes");
if (useSchemaFromCommitMetadata) {
assertEquals(hiveClient.getTableSchema(roTableName).size(),
@@ -643,14 +643,14 @@ public class TestHiveSyncTool {
String snapshotTableName = HiveTestUtil.TABLE_NAME + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE;
HiveTestUtil.createMORTable(instantTime, deltaCommitTime, 5, true, useSchemaFromCommitMetadata);
reinitHiveSyncClient();
assertFalse(hiveClient.doesTableExist(snapshotTableName),
assertFalse(hiveClient.tableExists(snapshotTableName),
"Table " + HiveTestUtil.TABLE_NAME + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE
+ " should not exist initially");
// Lets do the sync
reSyncHiveTable();
assertTrue(hiveClient.doesTableExist(snapshotTableName),
assertTrue(hiveClient.tableExists(snapshotTableName),
"Table " + HiveTestUtil.TABLE_NAME + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE
+ " should exist after sync completes");
@@ -713,11 +713,11 @@ public class TestHiveSyncTool {
HiveTestUtil.getCreatedTablesSet().add(HiveTestUtil.DB_NAME + "." + HiveTestUtil.TABLE_NAME);
reinitHiveSyncClient();
assertFalse(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
assertFalse(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
"Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
// Lets do the sync
reSyncHiveTable();
assertTrue(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
assertTrue(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
"Table " + HiveTestUtil.TABLE_NAME + " should exist after sync completes");
assertEquals(hiveClient.getTableSchema(HiveTestUtil.TABLE_NAME).size(),
hiveClient.getDataSchema().getColumns().size() + 3,
@@ -736,8 +736,8 @@ public class TestHiveSyncTool {
reinitHiveSyncClient();
List<String> writtenPartitionsSince = hiveClient.getPartitionsWrittenToSince(Option.of(instantTime));
assertEquals(1, writtenPartitionsSince.size(), "We should have one partition written after 100 commit");
List<Partition> hivePartitions = hiveClient.scanTablePartitions(HiveTestUtil.TABLE_NAME);
List<PartitionEvent> partitionEvents = hiveClient.getPartitionEvents(hivePartitions, writtenPartitionsSince);
List<org.apache.hudi.sync.common.model.Partition> hivePartitions = hiveClient.getAllPartitions(HiveTestUtil.TABLE_NAME);
List<PartitionEvent> partitionEvents = hiveClient.getPartitionEvents(hivePartitions, writtenPartitionsSince, false);
assertEquals(1, partitionEvents.size(), "There should be only one partition event");
assertEquals(PartitionEventType.ADD, partitionEvents.iterator().next().eventType, "The one partition event must of type ADD");
@@ -755,7 +755,7 @@ public class TestHiveSyncTool {
reinitHiveSyncClient();
reSyncHiveTable();
assertTrue(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
assertTrue(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
"Table " + HiveTestUtil.TABLE_NAME + " should exist after sync completes");
assertEquals(hiveClient.getTableSchema(HiveTestUtil.TABLE_NAME).size(),
hiveClient.getDataSchema().getColumns().size() + 3,
@@ -776,12 +776,12 @@ public class TestHiveSyncTool {
HiveTestUtil.createCOWTable(instantTime, 1, true);
reinitHiveSyncClient();
assertFalse(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
assertFalse(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
"Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
// Lets do the sync
reSyncHiveTable();
assertTrue(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
assertTrue(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
"Table " + HiveTestUtil.TABLE_NAME + " should exist after sync completes");
assertEquals(hiveClient.getTableSchema(HiveTestUtil.TABLE_NAME).size(),
hiveClient.getDataSchema().getColumns().size() + 1,
@@ -820,11 +820,11 @@ public class TestHiveSyncTool {
HiveTestUtil.createCOWTable(instantTime, 1, true);
reinitHiveSyncClient();
assertFalse(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
assertFalse(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
"Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
// Lets do the sync
reSyncHiveTable();
assertTrue(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
assertTrue(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
"Table " + HiveTestUtil.TABLE_NAME + " should exist after sync completes");
assertEquals(hiveClient.getTableSchema(HiveTestUtil.TABLE_NAME).size(),
hiveClient.getDataSchema().getColumns().size() + 1,
@@ -860,11 +860,11 @@ public class TestHiveSyncTool {
HiveTestUtil.getCreatedTablesSet().add(HiveTestUtil.DB_NAME + "." + HiveTestUtil.TABLE_NAME);
reinitHiveSyncClient();
assertFalse(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
assertFalse(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
"Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
// Lets do the sync
reSyncHiveTable();
assertTrue(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
assertTrue(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
"Table " + HiveTestUtil.TABLE_NAME + " should exist after sync completes");
assertEquals(hiveClient.getTableSchema(HiveTestUtil.TABLE_NAME).size(),
hiveClient.getDataSchema().getColumns().size(),
@@ -882,13 +882,13 @@ public class TestHiveSyncTool {
HiveTestUtil.createMORTable(commitTime, "", 5, false, true);
reinitHiveSyncClient();
assertFalse(hiveClient.doesTableExist(snapshotTableName), "Table " + HiveTestUtil.TABLE_NAME + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE
assertFalse(hiveClient.tableExists(snapshotTableName), "Table " + HiveTestUtil.TABLE_NAME + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE
+ " should not exist initially");
// Lets do the sync
reSyncHiveTable();
assertTrue(hiveClient.doesTableExist(snapshotTableName), "Table " + HiveTestUtil.TABLE_NAME + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE
assertTrue(hiveClient.tableExists(snapshotTableName), "Table " + HiveTestUtil.TABLE_NAME + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE
+ " should exist after sync completes");
// Schema being read from compacted base files
@@ -925,7 +925,7 @@ public class TestHiveSyncTool {
HiveTestUtil.createCOWTable(instantTime, 5, false);
reinitHiveSyncClient();
HoodieHiveClient prevHiveClient = hiveClient;
assertFalse(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
assertFalse(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
"Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
// Lets do the sync
@@ -936,12 +936,12 @@ public class TestHiveSyncTool {
reSyncHiveTable();
assertNull(hiveClient);
assertFalse(prevHiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
assertFalse(prevHiveClient.tableExists(HiveTestUtil.TABLE_NAME),
"Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
}
private void verifyOldParquetFileTest(HoodieHiveClient hiveClient, String emptyCommitTime) throws Exception {
assertTrue(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME), "Table " + HiveTestUtil.TABLE_NAME + " should exist after sync completes");
assertTrue(hiveClient.tableExists(HiveTestUtil.TABLE_NAME), "Table " + HiveTestUtil.TABLE_NAME + " should exist after sync completes");
assertEquals(hiveClient.getTableSchema(HiveTestUtil.TABLE_NAME).size(),
hiveClient.getDataSchema().getColumns().size() + 1,
"Hive Schema should match the table schema + partition field");
@@ -973,7 +973,7 @@ public class TestHiveSyncTool {
final String emptyCommitTime = "200";
HiveTestUtil.createCommitFileWithSchema(commitMetadata, emptyCommitTime, true);
reinitHiveSyncClient();
assertFalse(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME), "Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
assertFalse(hiveClient.tableExists(HiveTestUtil.TABLE_NAME), "Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
reinitHiveSyncClient();
reSyncHiveTable();
@@ -1000,7 +1000,7 @@ public class TestHiveSyncTool {
reinitHiveSyncClient();
assertFalse(
hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME), "Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
hiveClient.tableExists(HiveTestUtil.TABLE_NAME), "Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
HiveSyncTool tool = new HiveSyncTool(hiveSyncProps, getHiveConf(), fileSystem);
// now delete the evolved commit instant
@@ -1017,7 +1017,7 @@ public class TestHiveSyncTool {
}
// table should not be synced yet
assertFalse(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME), "Table " + HiveTestUtil.TABLE_NAME + " should not exist at all");
assertFalse(hiveClient.tableExists(HiveTestUtil.TABLE_NAME), "Table " + HiveTestUtil.TABLE_NAME + " should not exist at all");
}
@ParameterizedTest
@@ -1033,7 +1033,7 @@ public class TestHiveSyncTool {
//HiveTestUtil.createCommitFile(commitMetadata, emptyCommitTime);
reinitHiveSyncClient();
assertFalse(
hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME), "Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
hiveClient.tableExists(HiveTestUtil.TABLE_NAME), "Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
reSyncHiveTable();
@@ -1120,7 +1120,7 @@ public class TestHiveSyncTool {
reinitHiveSyncClient();
reSyncHiveTable();
assertTrue(hiveClient.doesTableExist(tableName));
assertTrue(hiveClient.tableExists(tableName));
assertEquals(commitTime1, hiveClient.getLastCommitTimeSynced(tableName).get());
HiveTestUtil.addMORPartitions(0, true, true, true, ZonedDateTime.now().plusDays(2), commitTime1, commitTime2);
@@ -1138,7 +1138,7 @@ public class TestHiveSyncTool {
private void reinitHiveSyncClient() {
hiveSyncTool = new HiveSyncTool(hiveSyncProps, HiveTestUtil.getHiveConf(), fileSystem);
hiveClient = hiveSyncTool.hoodieHiveClient;
hiveClient = (HoodieHiveClient) hiveSyncTool.hoodieHiveClient;
}
private int getPartitionFieldSize() {

View File

@@ -18,8 +18,6 @@
package org.apache.hudi.sync.common;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.common.engine.HoodieLocalEngineContext;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.HoodieCommitMetadata;
@@ -31,6 +29,9 @@ import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.TimelineUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.ValidationUtils;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.parquet.schema.MessageType;
@@ -43,10 +44,11 @@ import java.util.List;
import java.util.Map;
import java.util.Objects;
public abstract class AbstractSyncHoodieClient {
public abstract class AbstractSyncHoodieClient implements AutoCloseable {
private static final Logger LOG = LogManager.getLogger(AbstractSyncHoodieClient.class);
public static final String HOODIE_LAST_COMMIT_TIME_SYNC = "last_commit_time_sync";
public static final TypeConverter TYPE_CONVERTOR = new TypeConverter() {};
protected final HoodieTableMetaClient metaClient;
@@ -89,12 +91,24 @@ public abstract class AbstractSyncHoodieClient {
String serdeClass, Map<String, String> serdeProperties,
Map<String, String> tableProperties);
/**
* @deprecated Use {@link #tableExists} instead.
*/
@Deprecated
public abstract boolean doesTableExist(String tableName);
public abstract boolean tableExists(String tableName);
public abstract Option<String> getLastCommitTimeSynced(String tableName);
public abstract void updateLastCommitTimeSynced(String tableName);
public abstract Option<String> getLastReplicatedTime(String tableName);
public abstract void updateLastReplicatedTimeStamp(String tableName, String timeStamp);
public abstract void deleteLastReplicatedTimeStamp(String tableName);
public abstract void addPartitionsToTable(String tableName, List<String> partitionsToAdd);
public abstract void updatePartitionsToTable(String tableName, List<String> changedPartitions);

View File

@@ -0,0 +1,42 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hudi.sync.common.model;
import java.util.List;
public class Partition {
private final List<String> values;
private final String storageLocation;
public Partition(List<String> values, String storageLocation) {
this.values = values;
this.storageLocation = storageLocation;
}
public List<String> getValues() {
return values;
}
public String getStorageLocation() {
return storageLocation;
}
}

View File

@@ -0,0 +1,27 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hudi.sync.common.util;
public final class TableUtils {
public static String tableId(String database, String table) {
return String.format("%s.%s", database, table);
}
}