1
0

[HUDI-4187] Fix partition order in aws glue sync (#5731)

This commit is contained in:
Nicolas Paris
2022-06-04 11:16:52 +02:00
committed by GitHub
parent 3759a38b99
commit 80783c27f5

View File

@@ -271,19 +271,22 @@ public class AWSGlueCatalogSyncClient extends AbstractHiveSyncHoodieClient {
try { try {
Map<String, String> mapSchema = parquetSchemaToMapSchema(storageSchema, syncConfig.supportTimestamp, false); Map<String, String> mapSchema = parquetSchemaToMapSchema(storageSchema, syncConfig.supportTimestamp, false);
List<Column> schemaPartitionKeys = new ArrayList<>();
List<Column> schemaWithoutPartitionKeys = new ArrayList<>(); List<Column> schemaWithoutPartitionKeys = new ArrayList<>();
for (String key : mapSchema.keySet()) { for (String key : mapSchema.keySet()) {
String keyType = getPartitionKeyType(mapSchema, key); String keyType = getPartitionKeyType(mapSchema, key);
Column column = new Column().withName(key).withType(keyType.toLowerCase()).withComment(""); Column column = new Column().withName(key).withType(keyType.toLowerCase()).withComment("");
// In Glue, the full schema should exclude the partition keys // In Glue, the full schema should exclude the partition keys
if (syncConfig.partitionFields.contains(key)) { if (!syncConfig.partitionFields.contains(key)) {
schemaPartitionKeys.add(column);
} else {
schemaWithoutPartitionKeys.add(column); schemaWithoutPartitionKeys.add(column);
} }
} }
// now create the schema partition
List<Column> schemaPartitionKeys = syncConfig.partitionFields.stream().map(partitionKey -> {
String keyType = getPartitionKeyType(mapSchema, partitionKey);
return new Column().withName(partitionKey).withType(keyType.toLowerCase()).withComment("");
}).collect(Collectors.toList());
StorageDescriptor storageDescriptor = new StorageDescriptor(); StorageDescriptor storageDescriptor = new StorageDescriptor();
serdeProperties.put("serialization.format", "1"); serdeProperties.put("serialization.format", "1");
storageDescriptor storageDescriptor