[HUDI-3978] Fix use of partition path field as hive partition field in flink (#5434)
* Fix partition path fields as hive sync partition fields error
This commit is contained in:
@@ -18,6 +18,7 @@
|
|||||||
|
|
||||||
package org.apache.hudi.sink.utils;
|
package org.apache.hudi.sink.utils;
|
||||||
|
|
||||||
|
import org.apache.flink.annotation.VisibleForTesting;
|
||||||
import org.apache.hudi.aws.sync.AwsGlueCatalogSyncTool;
|
import org.apache.hudi.aws.sync.AwsGlueCatalogSyncTool;
|
||||||
import org.apache.hudi.common.fs.FSUtils;
|
import org.apache.hudi.common.fs.FSUtils;
|
||||||
import org.apache.hudi.configuration.FlinkOptions;
|
import org.apache.hudi.configuration.FlinkOptions;
|
||||||
@@ -70,7 +71,8 @@ public class HiveSyncContext {
|
|||||||
return new HiveSyncContext(syncConfig, hiveConf, fs);
|
return new HiveSyncContext(syncConfig, hiveConf, fs);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static HiveSyncConfig buildSyncConfig(Configuration conf) {
|
@VisibleForTesting
|
||||||
|
public static HiveSyncConfig buildSyncConfig(Configuration conf) {
|
||||||
HiveSyncConfig hiveSyncConfig = new HiveSyncConfig();
|
HiveSyncConfig hiveSyncConfig = new HiveSyncConfig();
|
||||||
hiveSyncConfig.basePath = conf.getString(FlinkOptions.PATH);
|
hiveSyncConfig.basePath = conf.getString(FlinkOptions.PATH);
|
||||||
hiveSyncConfig.baseFileFormat = conf.getString(FlinkOptions.HIVE_SYNC_FILE_FORMAT);
|
hiveSyncConfig.baseFileFormat = conf.getString(FlinkOptions.HIVE_SYNC_FILE_FORMAT);
|
||||||
@@ -83,7 +85,7 @@ public class HiveSyncContext {
|
|||||||
hiveSyncConfig.tableProperties = conf.getString(FlinkOptions.HIVE_SYNC_TABLE_PROPERTIES);
|
hiveSyncConfig.tableProperties = conf.getString(FlinkOptions.HIVE_SYNC_TABLE_PROPERTIES);
|
||||||
hiveSyncConfig.serdeProperties = conf.getString(FlinkOptions.HIVE_SYNC_TABLE_SERDE_PROPERTIES);
|
hiveSyncConfig.serdeProperties = conf.getString(FlinkOptions.HIVE_SYNC_TABLE_SERDE_PROPERTIES);
|
||||||
hiveSyncConfig.jdbcUrl = conf.getString(FlinkOptions.HIVE_SYNC_JDBC_URL);
|
hiveSyncConfig.jdbcUrl = conf.getString(FlinkOptions.HIVE_SYNC_JDBC_URL);
|
||||||
hiveSyncConfig.partitionFields = Arrays.asList(FilePathUtils.extractPartitionKeys(conf));
|
hiveSyncConfig.partitionFields = Arrays.asList(FilePathUtils.extractHivePartitionFields(conf));
|
||||||
hiveSyncConfig.partitionValueExtractorClass = conf.getString(FlinkOptions.HIVE_SYNC_PARTITION_EXTRACTOR_CLASS_NAME);
|
hiveSyncConfig.partitionValueExtractorClass = conf.getString(FlinkOptions.HIVE_SYNC_PARTITION_EXTRACTOR_CLASS_NAME);
|
||||||
hiveSyncConfig.useJdbc = conf.getBoolean(FlinkOptions.HIVE_SYNC_USE_JDBC);
|
hiveSyncConfig.useJdbc = conf.getBoolean(FlinkOptions.HIVE_SYNC_USE_JDBC);
|
||||||
hiveSyncConfig.useFileListingFromMetadata = conf.getBoolean(FlinkOptions.METADATA_ENABLED);
|
hiveSyncConfig.useFileListingFromMetadata = conf.getBoolean(FlinkOptions.METADATA_ENABLED);
|
||||||
|
|||||||
@@ -425,4 +425,17 @@ public class FilePathUtils {
|
|||||||
}
|
}
|
||||||
return conf.getString(FlinkOptions.PARTITION_PATH_FIELD).split(",");
|
return conf.getString(FlinkOptions.PARTITION_PATH_FIELD).split(",");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extracts the hive sync partition fields with given configuration.
|
||||||
|
*
|
||||||
|
* @param conf The flink configuration
|
||||||
|
* @return array of the hive partition fields
|
||||||
|
*/
|
||||||
|
public static String[] extractHivePartitionFields(org.apache.flink.configuration.Configuration conf) {
|
||||||
|
if (FlinkOptions.isDefaultValueDefined(conf, FlinkOptions.HIVE_SYNC_PARTITION_FIELDS)) {
|
||||||
|
return extractPartitionKeys(conf);
|
||||||
|
}
|
||||||
|
return conf.getString(FlinkOptions.HIVE_SYNC_PARTITION_FIELDS).split(",");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,62 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hudi.sink.utils;
|
||||||
|
|
||||||
|
import org.apache.flink.configuration.Configuration;
|
||||||
|
|
||||||
|
import org.apache.hudi.configuration.FlinkOptions;
|
||||||
|
import org.apache.hudi.hive.HiveSyncConfig;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import java.lang.reflect.Method;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test cases for {@link HiveSyncContext}.
|
||||||
|
*/
|
||||||
|
public class TestHiveSyncContext {
|
||||||
|
/**
|
||||||
|
* Test that the file ids generated by the task can finally shuffled to itself.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testBuildSyncConfig() throws Exception {
|
||||||
|
Configuration configuration1 = new Configuration();
|
||||||
|
Configuration configuration2 = new Configuration();
|
||||||
|
String hiveSyncPartitionField = "hiveSyncPartitionField";
|
||||||
|
String partitionPathField = "partitionPathField";
|
||||||
|
|
||||||
|
configuration1.setString(FlinkOptions.HIVE_SYNC_PARTITION_FIELDS, hiveSyncPartitionField);
|
||||||
|
configuration1.setString(FlinkOptions.PARTITION_PATH_FIELD, partitionPathField);
|
||||||
|
|
||||||
|
configuration2.setString(FlinkOptions.PARTITION_PATH_FIELD, partitionPathField);
|
||||||
|
|
||||||
|
Class<?> threadClazz = Class.forName("org.apache.hudi.sink.utils.HiveSyncContext");
|
||||||
|
Method buildSyncConfigMethod = threadClazz.getDeclaredMethod("buildSyncConfig", Configuration.class);
|
||||||
|
buildSyncConfigMethod.setAccessible(true);
|
||||||
|
|
||||||
|
HiveSyncConfig hiveSyncConfig1 = HiveSyncContext.buildSyncConfig(configuration1);
|
||||||
|
HiveSyncConfig hiveSyncConfig2 = HiveSyncContext.buildSyncConfig(configuration2);
|
||||||
|
|
||||||
|
assertTrue(hiveSyncConfig1.partitionFields.get(0).equals(hiveSyncPartitionField));
|
||||||
|
assertTrue(hiveSyncConfig2.partitionFields.get(0).equals(partitionPathField));
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user