1
0

[HUDI-2757] Implement Hudi AWS Glue sync (#5076)

This commit is contained in:
Raymond Xu
2022-03-28 11:54:59 -07:00
committed by GitHub
parent 4ed84b216d
commit 6ccbae4d2a
25 changed files with 1151 additions and 204 deletions

View File

@@ -114,7 +114,7 @@ public class DLASyncTool extends AbstractSyncTool {
LOG.info("Trying to sync hoodie table " + tableName + " with base path " + hoodieDLAClient.getBasePath()
+ " of type " + hoodieDLAClient.getTableType());
// Check if the necessary table exists
boolean tableExists = hoodieDLAClient.doesTableExist(tableName);
boolean tableExists = hoodieDLAClient.tableExists(tableName);
// Get the parquet schema for this table looking at the latest commit
MessageType schema = hoodieDLAClient.getDataSchema();
// Sync schema if needed

View File

@@ -18,8 +18,6 @@
package org.apache.hudi.dla;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
@@ -31,14 +29,17 @@ import org.apache.hudi.hive.PartitionValueExtractor;
import org.apache.hudi.hive.SchemaDifference;
import org.apache.hudi.hive.util.HiveSchemaUtil;
import org.apache.hudi.sync.common.AbstractSyncHoodieClient;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.parquet.schema.MessageType;
import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.DatabaseMetaData;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
@@ -115,7 +116,7 @@ public class HoodieDLAClient extends AbstractSyncHoodieClient {
}
public Map<String, String> getTableSchema(String tableName) {
if (!doesTableExist(tableName)) {
if (!tableExists(tableName)) {
throw new IllegalArgumentException(
"Failed to get schema for table " + tableName + " does not exist");
}
@@ -222,6 +223,11 @@ public class HoodieDLAClient extends AbstractSyncHoodieClient {
@Override
public boolean doesTableExist(String tableName) {
return tableExists(tableName);
}
@Override
public boolean tableExists(String tableName) {
String sql = consutructShowCreateTableSQL(tableName);
Statement stmt = null;
ResultSet rs = null;
@@ -274,6 +280,22 @@ public class HoodieDLAClient extends AbstractSyncHoodieClient {
// TODO : dla do not support update tblproperties, so do nothing.
}
@Override
public Option<String> getLastReplicatedTime(String tableName) {
// no op; unsupported
return Option.empty();
}
@Override
public void updateLastReplicatedTimeStamp(String tableName, String timeStamp) {
// no op; unsupported
}
@Override
public void deleteLastReplicatedTimeStamp(String tableName) {
// no op; unsupported
}
@Override
public void updatePartitionsToTable(String tableName, List<String> changedPartitions) {
if (changedPartitions.isEmpty()) {
@@ -370,6 +392,7 @@ public class HoodieDLAClient extends AbstractSyncHoodieClient {
}
}
@Override
public void close() {
try {
if (connection != null) {