[HUDI-3730] Improve meta sync class design and hierarchies (#5854)
* [HUDI-3730] Improve meta sync class design and hierarchies (#5754) * Implements class design proposed in RFC-55 Co-authored-by: jian.feng <fengjian428@gmial.com> Co-authored-by: jian.feng <jian.feng@shopee.com>
This commit is contained in:
@@ -50,6 +50,12 @@ import java.util.stream.Collectors;
|
||||
|
||||
import static org.apache.hudi.common.testutils.RawTripTestPayload.recordsToStrings;
|
||||
import static org.apache.hudi.common.testutils.Transformations.randomSelectAsHoodieKeys;
|
||||
import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_PASS;
|
||||
import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_SYNC_ENABLED;
|
||||
import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_URL;
|
||||
import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_USER;
|
||||
import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_DATABASE_NAME;
|
||||
import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_TABLE_NAME;
|
||||
|
||||
/**
|
||||
* Sample program that writes & reads hoodie tables via the Spark datasource.
|
||||
@@ -256,12 +262,12 @@ public class HoodieJavaApp {
|
||||
private DataFrameWriter<Row> updateHiveSyncConfig(DataFrameWriter<Row> writer) {
|
||||
if (enableHiveSync) {
|
||||
LOG.info("Enabling Hive sync to " + hiveJdbcUrl);
|
||||
writer = writer.option(HiveSyncConfig.META_SYNC_TABLE_NAME.key(), hiveTable)
|
||||
.option(HiveSyncConfig.META_SYNC_DATABASE_NAME.key(), hiveDB)
|
||||
.option(HiveSyncConfig.HIVE_URL.key(), hiveJdbcUrl)
|
||||
.option(HiveSyncConfig.HIVE_USER.key(), hiveUser)
|
||||
.option(HiveSyncConfig.HIVE_PASS.key(), hivePass)
|
||||
.option(HiveSyncConfig.HIVE_SYNC_ENABLED.key(), "true");
|
||||
writer = writer.option(META_SYNC_TABLE_NAME.key(), hiveTable)
|
||||
.option(META_SYNC_DATABASE_NAME.key(), hiveDB)
|
||||
.option(HIVE_URL.key(), hiveJdbcUrl)
|
||||
.option(HIVE_USER.key(), hiveUser)
|
||||
.option(HIVE_PASS.key(), hivePass)
|
||||
.option(HIVE_SYNC_ENABLED.key(), "true");
|
||||
if (nonPartitionedTable) {
|
||||
writer = writer
|
||||
.option(HiveSyncConfig.META_SYNC_PARTITION_EXTRACTOR_CLASS.key(),
|
||||
|
||||
@@ -46,6 +46,12 @@ import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import static org.apache.hudi.common.testutils.RawTripTestPayload.recordsToStrings;
|
||||
import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_PASS;
|
||||
import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_SYNC_ENABLED;
|
||||
import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_URL;
|
||||
import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_USER;
|
||||
import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_DATABASE_NAME;
|
||||
import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_TABLE_NAME;
|
||||
|
||||
public class HoodieJavaGenerateApp {
|
||||
@Parameter(names = {"--table-path", "-p"}, description = "Path for Hoodie sample table")
|
||||
@@ -126,12 +132,12 @@ public class HoodieJavaGenerateApp {
|
||||
private DataFrameWriter<Row> updateHiveSyncConfig(DataFrameWriter<Row> writer) {
|
||||
if (enableHiveSync) {
|
||||
LOG.info("Enabling Hive sync to " + hiveJdbcUrl);
|
||||
writer = writer.option(HiveSyncConfig.META_SYNC_TABLE_NAME.key(), hiveTable)
|
||||
.option(HiveSyncConfig.META_SYNC_DATABASE_NAME.key(), hiveDB)
|
||||
.option(HiveSyncConfig.HIVE_URL.key(), hiveJdbcUrl)
|
||||
.option(HiveSyncConfig.HIVE_USER.key(), hiveUser)
|
||||
.option(HiveSyncConfig.HIVE_PASS.key(), hivePass)
|
||||
.option(HiveSyncConfig.HIVE_SYNC_ENABLED.key(), "true");
|
||||
writer = writer.option(META_SYNC_TABLE_NAME.key(), hiveTable)
|
||||
.option(META_SYNC_DATABASE_NAME.key(), hiveDB)
|
||||
.option(HIVE_URL.key(), hiveJdbcUrl)
|
||||
.option(HIVE_USER.key(), hiveUser)
|
||||
.option(HIVE_PASS.key(), hivePass)
|
||||
.option(HIVE_SYNC_ENABLED.key(), "true");
|
||||
if (nonPartitionedTable) {
|
||||
writer = writer
|
||||
.option(HiveSyncConfig.META_SYNC_PARTITION_EXTRACTOR_CLASS.key(),
|
||||
|
||||
@@ -54,6 +54,12 @@ import java.util.concurrent.Future;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static org.apache.hudi.common.testutils.RawTripTestPayload.recordsToStrings;
|
||||
import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_PASS;
|
||||
import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_SYNC_ENABLED;
|
||||
import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_URL;
|
||||
import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_USER;
|
||||
import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_DATABASE_NAME;
|
||||
import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_TABLE_NAME;
|
||||
|
||||
/**
|
||||
* Sample program that writes & reads hoodie tables via the Spark datasource streaming.
|
||||
@@ -383,12 +389,12 @@ public class HoodieJavaStreamingApp {
|
||||
private DataStreamWriter<Row> updateHiveSyncConfig(DataStreamWriter<Row> writer) {
|
||||
if (enableHiveSync) {
|
||||
LOG.info("Enabling Hive sync to " + hiveJdbcUrl);
|
||||
writer = writer.option(HiveSyncConfig.META_SYNC_TABLE_NAME.key(), hiveTable)
|
||||
.option(HiveSyncConfig.META_SYNC_DATABASE_NAME.key(), hiveDB)
|
||||
.option(HiveSyncConfig.HIVE_URL.key(), hiveJdbcUrl)
|
||||
.option(HiveSyncConfig.HIVE_USER.key(), hiveUser)
|
||||
.option(HiveSyncConfig.HIVE_PASS.key(), hivePass)
|
||||
.option(HiveSyncConfig.HIVE_SYNC_ENABLED.key(), "true");
|
||||
writer = writer.option(META_SYNC_TABLE_NAME.key(), hiveTable)
|
||||
.option(META_SYNC_DATABASE_NAME.key(), hiveDB)
|
||||
.option(HIVE_URL.key(), hiveJdbcUrl)
|
||||
.option(HIVE_USER.key(), hiveUser)
|
||||
.option(HIVE_PASS.key(), hivePass)
|
||||
.option(HIVE_SYNC_ENABLED.key(), "true");
|
||||
if (useMultiPartitionKeys) {
|
||||
writer = writer.option(HiveSyncConfig.META_SYNC_PARTITION_FIELDS.key(), "year,month,day").option(
|
||||
HiveSyncConfig.META_SYNC_PARTITION_EXTRACTOR_CLASS.key(),
|
||||
|
||||
@@ -18,12 +18,6 @@
|
||||
|
||||
package org.apache.hudi;
|
||||
|
||||
import org.apache.avro.Conversions;
|
||||
import org.apache.avro.LogicalTypes;
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericData;
|
||||
import org.apache.avro.generic.GenericFixed;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.hudi.avro.HoodieAvroUtils;
|
||||
import org.apache.hudi.client.SparkRDDWriteClient;
|
||||
import org.apache.hudi.common.config.TypedProperties;
|
||||
@@ -37,8 +31,14 @@ import org.apache.hudi.config.HoodieStorageConfig;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
import org.apache.hudi.execution.bulkinsert.RDDCustomColumnsSortPartitioner;
|
||||
import org.apache.hudi.hive.HiveSyncConfig;
|
||||
import org.apache.hudi.table.BulkInsertPartitioner;
|
||||
|
||||
import org.apache.avro.Conversions;
|
||||
import org.apache.avro.LogicalTypes;
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericData;
|
||||
import org.apache.avro.generic.GenericFixed;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Row;
|
||||
@@ -54,7 +54,6 @@ import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.junit.jupiter.params.ParameterizedTest;
|
||||
import org.junit.jupiter.params.provider.Arguments;
|
||||
import org.junit.jupiter.params.provider.MethodSource;
|
||||
import org.junit.jupiter.params.provider.ValueSource;
|
||||
import org.mockito.ArgumentCaptor;
|
||||
import org.mockito.Captor;
|
||||
import org.mockito.Mock;
|
||||
@@ -70,18 +69,13 @@ import java.util.Map;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import static org.apache.hudi.DataSourceUtils.tryOverrideParquetWriteLegacyFormatProperty;
|
||||
import static org.apache.hudi.common.model.HoodieFileFormat.PARQUET;
|
||||
import static org.apache.hudi.hive.ddl.HiveSyncMode.HMS;
|
||||
import static org.hamcrest.CoreMatchers.containsString;
|
||||
import static org.hamcrest.CoreMatchers.equalTo;
|
||||
import static org.hamcrest.CoreMatchers.instanceOf;
|
||||
import static org.hamcrest.CoreMatchers.is;
|
||||
import static org.hamcrest.MatcherAssert.assertThat;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertNull;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
import static org.mockito.ArgumentMatchers.any;
|
||||
import static org.mockito.ArgumentMatchers.anyString;
|
||||
import static org.mockito.Mockito.times;
|
||||
@@ -252,29 +246,6 @@ public class TestDataSourceUtils {
|
||||
});
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@ValueSource(booleans = {true, false})
|
||||
public void testBuildHiveSyncConfig(boolean useSyncMode) {
|
||||
TypedProperties props = new TypedProperties();
|
||||
if (useSyncMode) {
|
||||
props.setProperty(DataSourceWriteOptions.HIVE_SYNC_MODE().key(), HMS.name());
|
||||
props.setProperty(DataSourceWriteOptions.HIVE_USE_JDBC().key(), String.valueOf(false));
|
||||
}
|
||||
props.setProperty(DataSourceWriteOptions.HIVE_DATABASE().key(), HIVE_DATABASE);
|
||||
props.setProperty(DataSourceWriteOptions.HIVE_TABLE().key(), HIVE_TABLE);
|
||||
HiveSyncConfig hiveSyncConfig = DataSourceUtils.buildHiveSyncConfig(props, config.getBasePath(), PARQUET.name());
|
||||
|
||||
if (useSyncMode) {
|
||||
assertFalse(hiveSyncConfig.useJdbc);
|
||||
assertEquals(HMS.name(), hiveSyncConfig.syncMode);
|
||||
} else {
|
||||
assertTrue(hiveSyncConfig.useJdbc);
|
||||
assertNull(hiveSyncConfig.syncMode);
|
||||
}
|
||||
assertEquals(HIVE_DATABASE, hiveSyncConfig.databaseName);
|
||||
assertEquals(HIVE_TABLE, hiveSyncConfig.tableName);
|
||||
}
|
||||
|
||||
private void setAndVerifyHoodieWriteClientWith(final String partitionerClassName) {
|
||||
config = HoodieWriteConfig.newBuilder().withPath(config.getBasePath())
|
||||
.withUserDefinedBulkInsertPartitionerClass(partitionerClassName)
|
||||
|
||||
Reference in New Issue
Block a user