From 80783c27f5211ae5cd01d692223a106df5dff4a2 Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Sat, 4 Jun 2022 11:16:52 +0200 Subject: [PATCH] [HUDI-4187] Fix partition order in aws glue sync (#5731) --- .../hudi/aws/sync/AWSGlueCatalogSyncClient.java | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java b/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java index e5a23a9a5..f970423b3 100644 --- a/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java +++ b/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java @@ -271,19 +271,22 @@ public class AWSGlueCatalogSyncClient extends AbstractHiveSyncHoodieClient { try { Map mapSchema = parquetSchemaToMapSchema(storageSchema, syncConfig.supportTimestamp, false); - List schemaPartitionKeys = new ArrayList<>(); List schemaWithoutPartitionKeys = new ArrayList<>(); for (String key : mapSchema.keySet()) { String keyType = getPartitionKeyType(mapSchema, key); Column column = new Column().withName(key).withType(keyType.toLowerCase()).withComment(""); // In Glue, the full schema should exclude the partition keys - if (syncConfig.partitionFields.contains(key)) { - schemaPartitionKeys.add(column); - } else { + if (!syncConfig.partitionFields.contains(key)) { schemaWithoutPartitionKeys.add(column); } } + // now create the schema partition + List schemaPartitionKeys = syncConfig.partitionFields.stream().map(partitionKey -> { + String keyType = getPartitionKeyType(mapSchema, partitionKey); + return new Column().withName(partitionKey).withType(keyType.toLowerCase()).withComment(""); + }).collect(Collectors.toList()); + StorageDescriptor storageDescriptor = new StorageDescriptor(); serdeProperties.put("serialization.format", "1"); storageDescriptor