1
0

[HUDI-1484] Escape the partition value in HiveSyncTool (#2363)

This commit is contained in:
pengzhiwei
2020-12-29 12:02:36 +08:00
committed by GitHub
parent 4c17528de0
commit b83d1d3e61
4 changed files with 44 additions and 6 deletions

View File

@@ -84,6 +84,9 @@ public class HiveSyncConfig implements Serializable {
+ "Disabled by default for backward compatibility.")
public Boolean supportTimestamp = false;
@Parameter(names = {"--decode-partition"}, description = "Decode the partition value if the partition has encoded during writing")
public Boolean decodePartition = false;
public static HiveSyncConfig copy(HiveSyncConfig cfg) {
HiveSyncConfig newConfig = new HiveSyncConfig();
newConfig.basePath = cfg.basePath;
@@ -97,15 +100,30 @@ public class HiveSyncConfig implements Serializable {
newConfig.tableName = cfg.tableName;
newConfig.usePreApacheInputFormat = cfg.usePreApacheInputFormat;
newConfig.supportTimestamp = cfg.supportTimestamp;
newConfig.decodePartition = cfg.decodePartition;
return newConfig;
}
@Override
public String toString() {
return "HiveSyncConfig{databaseName='" + databaseName + '\'' + ", tableName='" + tableName + '\''
+ ", hiveUser='" + hiveUser + '\'' + ", hivePass='" + hivePass + '\'' + ", jdbcUrl='" + jdbcUrl + '\''
+ ", basePath='" + basePath + '\'' + ", partitionFields=" + partitionFields + ", partitionValueExtractorClass='"
+ partitionValueExtractorClass + '\'' + ", assumeDatePartitioning=" + assumeDatePartitioning + '\'' + ", supportTimestamp='" + supportTimestamp + '\''
+ ", usePreApacheInputFormat=" + usePreApacheInputFormat + ", useJdbc=" + useJdbc + ", help=" + help + '}';
return "HiveSyncConfig{"
+ "databaseName='" + databaseName + '\''
+ ", tableName='" + tableName + '\''
+ ", baseFileFormat='" + baseFileFormat + '\''
+ ", hiveUser='" + hiveUser + '\''
+ ", hivePass='" + hivePass + '\''
+ ", jdbcUrl='" + jdbcUrl + '\''
+ ", basePath='" + basePath + '\''
+ ", partitionFields=" + partitionFields
+ ", partitionValueExtractorClass='" + partitionValueExtractorClass + '\''
+ ", assumeDatePartitioning=" + assumeDatePartitioning
+ ", usePreApacheInputFormat=" + usePreApacheInputFormat
+ ", useJdbc=" + useJdbc
+ ", autoCreateDatabase=" + autoCreateDatabase
+ ", skipROSuffix=" + skipROSuffix
+ ", help=" + help
+ ", supportTimestamp=" + supportTimestamp
+ ", decodePartition=" + decodePartition
+ '}';
}
}

View File

@@ -18,6 +18,9 @@
package org.apache.hudi.hive;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.nio.charset.StandardCharsets;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Partition;
@@ -162,7 +165,17 @@ public class HoodieHiveClient extends AbstractSyncHoodieClient {
+ ". Check partition strategy. ");
List<String> partBuilder = new ArrayList<>();
for (int i = 0; i < syncConfig.partitionFields.size(); i++) {
partBuilder.add("`" + syncConfig.partitionFields.get(i) + "`='" + partitionValues.get(i) + "'");
String partitionValue = partitionValues.get(i);
// decode the partition before sync to hive to prevent multiple escapes of HIVE
if (syncConfig.decodePartition) {
try {
// This is a decode operator for encode in KeyGenUtils#getRecordPartitionPath
partitionValue = URLDecoder.decode(partitionValue, StandardCharsets.UTF_8.toString());
} catch (UnsupportedEncodingException e) {
throw new HoodieHiveSyncException("error in decode partition: " + partitionValue, e);
}
}
partBuilder.add("`" + syncConfig.partitionFields.get(i) + "`='" + partitionValue + "'");
}
return String.join(",", partBuilder);
}