From d6e83e8f49828940159cd34711cc88ee7b42dc1c Mon Sep 17 00:00:00 2001 From: wenningd Date: Tue, 26 Nov 2019 21:18:39 -0800 Subject: [PATCH] [HUDI-325] Fix Hive partition error for updated HDFS Hudi table (#1001) --- .../main/java/org/apache/hudi/common/util/FSUtils.java | 7 +++++++ .../main/java/org/apache/hudi/hive/HoodieHiveClient.java | 8 ++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/FSUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/FSUtils.java index 86e3e87ec..60d85382d 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/FSUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/FSUtils.java @@ -530,6 +530,13 @@ public class FSUtils { return ((partitionPath == null) || (partitionPath.isEmpty())) ? basePath : new Path(basePath, partitionPath); } + /** + * Get DFS full partition path (e.g. hdfs://ip-address:8020:/) + */ + public static String getDFSFullPartitionPath(FileSystem fs, Path partitionPath) { + return fs.getUri() + partitionPath.toUri().getRawPath(); + } + /** * This is due to HUDI-140 GCS has a different behavior for detecting EOF during seek(). * diff --git a/hudi-hive/src/main/java/org/apache/hudi/hive/HoodieHiveClient.java b/hudi-hive/src/main/java/org/apache/hudi/hive/HoodieHiveClient.java index ed319ce9c..0b9e261dd 100644 --- a/hudi-hive/src/main/java/org/apache/hudi/hive/HoodieHiveClient.java +++ b/hudi-hive/src/main/java/org/apache/hudi/hive/HoodieHiveClient.java @@ -51,6 +51,7 @@ import org.apache.hudi.common.model.HoodieCommitMetadata; import org.apache.hudi.common.model.HoodieFileFormat; import org.apache.hudi.common.model.HoodieLogFile; import org.apache.hudi.common.model.HoodieTableType; +import org.apache.hudi.common.storage.StorageSchemes; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.HoodieTimeline; import org.apache.hudi.common.table.timeline.HoodieInstant; @@ -194,7 +195,9 @@ public class HoodieHiveClient { String alterTable = "ALTER TABLE " + syncConfig.tableName; for (String partition : partitions) { String partitionClause = getPartitionClause(partition); - String fullPartitionPath = FSUtils.getPartitionPath(syncConfig.basePath, partition).toString(); + Path partitionPath = FSUtils.getPartitionPath(syncConfig.basePath, partition); + String fullPartitionPath = partitionPath.toUri().getScheme().equals(StorageSchemes.HDFS.getScheme()) + ? FSUtils.getDFSFullPartitionPath(fs, partitionPath) : partitionPath.toString(); String changePartition = alterTable + " PARTITION (" + partitionClause + ") SET LOCATION '" + fullPartitionPath + "'"; changePartitions.add(changePartition); @@ -218,7 +221,8 @@ public class HoodieHiveClient { List events = Lists.newArrayList(); for (String storagePartition : partitionStoragePartitions) { - String fullStoragePartitionPath = FSUtils.getPartitionPath(syncConfig.basePath, storagePartition).toString(); + Path storagePartitionPath = FSUtils.getPartitionPath(syncConfig.basePath, storagePartition); + String fullStoragePartitionPath = Path.getPathWithoutSchemeAndAuthority(storagePartitionPath).toUri().getPath(); // Check if the partition values or if hdfs path is the same List storagePartitionValues = partitionValueExtractor.extractPartitionValuesInPath(storagePartition); Collections.sort(storagePartitionValues);