[HUDI-3357] MVP implementation of BigQuerySyncTool (#5125)
Co-authored-by: Raymond Xu <2701446+xushiyan@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
c19f505b5a
commit
20964df770
@@ -0,0 +1,118 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.gcp.bigquery;
|
||||
|
||||
import org.apache.hudi.common.config.TypedProperties;
|
||||
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_ASSUME_DATE_PARTITIONING;
|
||||
import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_DATASET_LOCATION;
|
||||
import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_DATASET_NAME;
|
||||
import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_PARTITION_FIELDS;
|
||||
import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_PROJECT_ID;
|
||||
import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_SOURCE_URI;
|
||||
import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_SOURCE_URI_PREFIX;
|
||||
import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_SYNC_BASE_PATH;
|
||||
import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_TABLE_NAME;
|
||||
import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_USE_FILE_LISTING_FROM_METADATA;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
public class TestBigQuerySyncConfig {
|
||||
|
||||
BigQuerySyncConfig syncConfig;
|
||||
|
||||
@BeforeEach
|
||||
void setUp() {
|
||||
syncConfig = new BigQuerySyncConfig();
|
||||
syncConfig.projectId = "fooproject";
|
||||
syncConfig.datasetName = "foodataset";
|
||||
syncConfig.datasetLocation = "US";
|
||||
syncConfig.tableName = "footable";
|
||||
syncConfig.sourceUri = "gs://test-bucket/dwh/table_name/dt=*";
|
||||
syncConfig.sourceUriPrefix = "gs://test-bucket/dwh/table_name/";
|
||||
syncConfig.basePath = "gs://test-bucket/dwh/table_name";
|
||||
syncConfig.partitionFields = Arrays.asList("a", "b");
|
||||
syncConfig.useFileListingFromMetadata = true;
|
||||
syncConfig.assumeDatePartitioning = true;
|
||||
syncConfig.help = true;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCopy() {
|
||||
BigQuerySyncConfig copied = BigQuerySyncConfig.copy(syncConfig);
|
||||
assertEquals(copied.partitionFields, syncConfig.partitionFields);
|
||||
assertEquals(copied.basePath, syncConfig.basePath);
|
||||
assertEquals(copied.projectId, syncConfig.projectId);
|
||||
assertEquals(copied.datasetName, syncConfig.datasetName);
|
||||
assertEquals(copied.datasetLocation, syncConfig.datasetLocation);
|
||||
assertEquals(copied.tableName, syncConfig.tableName);
|
||||
assertEquals(copied.sourceUri, syncConfig.sourceUri);
|
||||
assertEquals(copied.sourceUriPrefix, syncConfig.sourceUriPrefix);
|
||||
assertEquals(copied.useFileListingFromMetadata, syncConfig.useFileListingFromMetadata);
|
||||
assertEquals(copied.assumeDatePartitioning, syncConfig.assumeDatePartitioning);
|
||||
assertEquals(copied.help, syncConfig.help);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testToProps() {
|
||||
TypedProperties props = syncConfig.toProps();
|
||||
assertEquals("fooproject", props.getString(BIGQUERY_SYNC_PROJECT_ID));
|
||||
assertEquals("foodataset", props.getString(BIGQUERY_SYNC_DATASET_NAME));
|
||||
assertEquals("US", props.getString(BIGQUERY_SYNC_DATASET_LOCATION));
|
||||
assertEquals("footable", props.getString(BIGQUERY_SYNC_TABLE_NAME));
|
||||
assertEquals("gs://test-bucket/dwh/table_name/dt=*", props.getString(BIGQUERY_SYNC_SOURCE_URI));
|
||||
assertEquals("gs://test-bucket/dwh/table_name/", props.getString(BIGQUERY_SYNC_SOURCE_URI_PREFIX));
|
||||
assertEquals("gs://test-bucket/dwh/table_name", props.getString(BIGQUERY_SYNC_SYNC_BASE_PATH));
|
||||
assertEquals("a,b", props.getString(BIGQUERY_SYNC_PARTITION_FIELDS));
|
||||
assertEquals("true", props.getString(BIGQUERY_SYNC_USE_FILE_LISTING_FROM_METADATA));
|
||||
assertEquals("true", props.getString(BIGQUERY_SYNC_ASSUME_DATE_PARTITIONING));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void fromProps() {
|
||||
TypedProperties props = new TypedProperties();
|
||||
props.put(BIGQUERY_SYNC_PROJECT_ID, "fooproject");
|
||||
props.put(BIGQUERY_SYNC_DATASET_NAME, "foodataset");
|
||||
props.put(BIGQUERY_SYNC_DATASET_LOCATION, "US");
|
||||
props.put(BIGQUERY_SYNC_TABLE_NAME, "footable");
|
||||
props.put(BIGQUERY_SYNC_SOURCE_URI, "gs://test-bucket/dwh/table_name/dt=*");
|
||||
props.put(BIGQUERY_SYNC_SOURCE_URI_PREFIX, "gs://test-bucket/dwh/table_name/");
|
||||
props.put(BIGQUERY_SYNC_SYNC_BASE_PATH, "gs://test-bucket/dwh/table_name");
|
||||
props.put(BIGQUERY_SYNC_PARTITION_FIELDS, "a,b");
|
||||
props.put(BIGQUERY_SYNC_USE_FILE_LISTING_FROM_METADATA, true);
|
||||
props.put(BIGQUERY_SYNC_ASSUME_DATE_PARTITIONING, true);
|
||||
BigQuerySyncConfig cfg = BigQuerySyncConfig.fromProps(props);
|
||||
|
||||
assertEquals(syncConfig.projectId, cfg.projectId);
|
||||
assertEquals(syncConfig.datasetName, cfg.datasetName);
|
||||
assertEquals(syncConfig.datasetLocation, cfg.datasetLocation);
|
||||
assertEquals(syncConfig.tableName, cfg.tableName);
|
||||
assertEquals(syncConfig.sourceUri, cfg.sourceUri);
|
||||
assertEquals(syncConfig.sourceUriPrefix, cfg.sourceUriPrefix);
|
||||
assertEquals(syncConfig.basePath, cfg.basePath);
|
||||
assertEquals(syncConfig.partitionFields, cfg.partitionFields);
|
||||
assertEquals(syncConfig.useFileListingFromMetadata, cfg.useFileListingFromMetadata);
|
||||
assertEquals(syncConfig.assumeDatePartitioning, cfg.assumeDatePartitioning);
|
||||
}
|
||||
}
|
||||
29
hudi-gcp/src/test/resources/log4j-surefire-quiet.properties
Normal file
29
hudi-gcp/src/test/resources/log4j-surefire-quiet.properties
Normal file
@@ -0,0 +1,29 @@
|
||||
###
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
###
|
||||
log4j.rootLogger=ERROR, CONSOLE
|
||||
log4j.logger.org.apache.hudi=ERROR
|
||||
|
||||
# CONSOLE is set to be a ConsoleAppender.
|
||||
log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
|
||||
# CONSOLE uses PatternLayout.
|
||||
log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
|
||||
log4j.appender.CONSOLE.layout.ConversionPattern=[%-5p] %d %c %x - %m%n
|
||||
log4j.appender.CONSOLE.filter.a=org.apache.log4j.varia.LevelRangeFilter
|
||||
log4j.appender.CONSOLE.filter.a.AcceptOnMatch=true
|
||||
log4j.appender.CONSOLE.filter.a.LevelMin=WARN
|
||||
log4j.appender.CONSOLE.filter.a.LevelMax=FATAL
|
||||
29
hudi-gcp/src/test/resources/log4j-surefire.properties
Normal file
29
hudi-gcp/src/test/resources/log4j-surefire.properties
Normal file
@@ -0,0 +1,29 @@
|
||||
###
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
###
|
||||
log4j.rootLogger=WARN, CONSOLE
|
||||
log4j.logger.org.apache.hudi=INFO
|
||||
|
||||
# A1 is set to be a ConsoleAppender.
|
||||
log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
|
||||
# A1 uses PatternLayout.
|
||||
log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
|
||||
log4j.appender.CONSOLE.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
|
||||
log4j.appender.CONSOLE.filter.a=org.apache.log4j.varia.LevelRangeFilter
|
||||
log4j.appender.CONSOLE.filter.a.AcceptOnMatch=true
|
||||
log4j.appender.CONSOLE.filter.a.LevelMin=WARN
|
||||
log4j.appender.CONSOLE.filter.a.LevelMax=FATAL
|
||||
Reference in New Issue
Block a user