[HUDI-703] Add test for HoodieSyncCommand (#1774)
This commit is contained in:
@@ -0,0 +1,121 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.integ;
|
||||
|
||||
import org.apache.hadoop.hbase.TableExistsException;
|
||||
import org.apache.hudi.common.config.TypedProperties;
|
||||
import org.apache.hudi.common.model.HoodieTableType;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
/**
|
||||
* Base class to run cmd and generate data in hive.
|
||||
*/
|
||||
public class HoodieTestHiveBase extends ITTestBase {
|
||||
|
||||
protected enum PartitionType {
|
||||
SINGLE_KEY_PARTITIONED, MULTI_KEYS_PARTITIONED, NON_PARTITIONED,
|
||||
}
|
||||
|
||||
private final static int DEFAULT_TIME_WAIT = 5000;
|
||||
private final static String OVERWRITE_COMMIT_TYPE = "overwrite";
|
||||
|
||||
/**
|
||||
* A basic integration test that runs HoodieJavaApp to create a sample Hoodie data-set and performs upserts on it.
|
||||
* Hive integration and upsert functionality is checked by running a count query in hive console. TODO: Add
|
||||
* spark-shell test-case
|
||||
*/
|
||||
public void generateDataByHoodieJavaApp(String hiveTableName, String tableType, PartitionType partitionType,
|
||||
String commitType, String hoodieTableName) throws Exception {
|
||||
|
||||
String hdfsPath = getHDFSPath(hiveTableName);
|
||||
String hdfsUrl = "hdfs://namenode" + hdfsPath;
|
||||
|
||||
Pair<String, String> stdOutErr;
|
||||
if (OVERWRITE_COMMIT_TYPE.equals(commitType)) {
|
||||
// Drop Table if it exists
|
||||
try {
|
||||
dropHiveTables(hiveTableName, tableType);
|
||||
} catch (AssertionError ex) {
|
||||
// In travis, sometimes, the hivemetastore is not ready even though we wait for the port to be up
|
||||
// Workaround to sleep for 5 secs and retry
|
||||
// Set sleep time by hoodie.hiveserver.time.wait
|
||||
Thread.sleep(getTimeWait());
|
||||
dropHiveTables(hiveTableName, tableType);
|
||||
}
|
||||
|
||||
// Ensure table does not exist
|
||||
stdOutErr = executeHiveCommand("show tables like '" + hiveTableName + "'");
|
||||
if (!stdOutErr.getLeft().isEmpty()) {
|
||||
throw new TableExistsException("Dropped table " + hiveTableName + " exists!");
|
||||
}
|
||||
}
|
||||
|
||||
// Run Hoodie Java App
|
||||
String cmd = String.format("%s %s --hive-sync --table-path %s --hive-url %s --table-type %s --hive-table %s" +
|
||||
" --commit-type %s --table-name %s", HOODIE_JAVA_APP, "HoodieJavaGenerateApp", hdfsUrl, HIVE_SERVER_JDBC_URL,
|
||||
tableType, hiveTableName, commitType, hoodieTableName);
|
||||
if (partitionType == PartitionType.MULTI_KEYS_PARTITIONED) {
|
||||
cmd = cmd + " --use-multi-partition-keys";
|
||||
} else if (partitionType == PartitionType.NON_PARTITIONED){
|
||||
cmd = cmd + " --non-partitioned";
|
||||
}
|
||||
executeCommandStringInDocker(ADHOC_1_CONTAINER, cmd, true);
|
||||
|
||||
String snapshotTableName = getSnapshotTableName(tableType, hiveTableName);
|
||||
|
||||
// Ensure table does exist
|
||||
stdOutErr = executeHiveCommand("show tables like '" + snapshotTableName + "'");
|
||||
assertEquals(snapshotTableName, stdOutErr.getLeft(), "Table exists");
|
||||
}
|
||||
|
||||
protected void dropHiveTables(String hiveTableName, String tableType) throws Exception {
|
||||
if (tableType.equals(HoodieTableType.MERGE_ON_READ.name())) {
|
||||
executeHiveCommand("drop table if exists " + hiveTableName + "_rt");
|
||||
executeHiveCommand("drop table if exists " + hiveTableName + "_ro");
|
||||
} else {
|
||||
executeHiveCommand("drop table if exists " + hiveTableName);
|
||||
}
|
||||
}
|
||||
|
||||
protected String getHDFSPath(String hiveTableName) {
|
||||
return "/" + hiveTableName;
|
||||
}
|
||||
|
||||
protected String getSnapshotTableName(String tableType, String hiveTableName) {
|
||||
return tableType.equals(HoodieTableType.MERGE_ON_READ.name())
|
||||
? hiveTableName + "_rt" : hiveTableName;
|
||||
}
|
||||
|
||||
private int getTimeWait() {
|
||||
try (InputStream stream = HoodieTestHiveBase.class.getClassLoader().getResourceAsStream("hoodie-docker.properties")) {
|
||||
TypedProperties properties = new TypedProperties();
|
||||
properties.load(stream);
|
||||
return properties.getInteger("hoodie.hiveserver.time.wait", DEFAULT_TIME_WAIT);
|
||||
} catch (IOException e) {
|
||||
LOG.warn("Can not load property file, use default time wait for hiveserver.");
|
||||
return DEFAULT_TIME_WAIT;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -180,7 +180,7 @@ public abstract class ITTestBase {
|
||||
}
|
||||
}
|
||||
|
||||
TestExecStartResultCallback executeCommandStringInDocker(String containerName, String cmd, boolean expectedToSucceed)
|
||||
protected TestExecStartResultCallback executeCommandStringInDocker(String containerName, String cmd, boolean expectedToSucceed)
|
||||
throws Exception {
|
||||
LOG.info("\n\n#################################################################################################");
|
||||
LOG.info("Container : " + containerName + ", Running command :" + cmd);
|
||||
@@ -190,7 +190,7 @@ public abstract class ITTestBase {
|
||||
return executeCommandInDocker(containerName, cmdSplits, expectedToSucceed);
|
||||
}
|
||||
|
||||
Pair<String, String> executeHiveCommand(String hiveCommand) throws Exception {
|
||||
protected Pair<String, String> executeHiveCommand(String hiveCommand) throws Exception {
|
||||
|
||||
LOG.info("\n\n#################################################################################################");
|
||||
LOG.info("Running hive command :" + hiveCommand);
|
||||
|
||||
@@ -137,13 +137,13 @@ public class ITTestHoodieSanity extends ITTestBase {
|
||||
// Run Hoodie Java App
|
||||
String cmd;
|
||||
if (partitionType == PartitionType.SINGLE_KEY_PARTITIONED) {
|
||||
cmd = HOODIE_JAVA_APP + " --hive-sync --table-path " + hdfsUrl + " --hive-url " + HIVE_SERVER_JDBC_URL
|
||||
cmd = HOODIE_JAVA_APP + " HoodieJavaApp --hive-sync --table-path " + hdfsUrl + " --hive-url " + HIVE_SERVER_JDBC_URL
|
||||
+ " --table-type " + tableType + " --hive-table " + hiveTableName;
|
||||
} else if (partitionType == PartitionType.MULTI_KEYS_PARTITIONED) {
|
||||
cmd = HOODIE_JAVA_APP + " --hive-sync --table-path " + hdfsUrl + " --hive-url " + HIVE_SERVER_JDBC_URL
|
||||
cmd = HOODIE_JAVA_APP + " HoodieJavaApp --hive-sync --table-path " + hdfsUrl + " --hive-url " + HIVE_SERVER_JDBC_URL
|
||||
+ " --table-type " + tableType + " --hive-table " + hiveTableName + " --use-multi-partition-keys";
|
||||
} else {
|
||||
cmd = HOODIE_JAVA_APP + " --hive-sync --table-path " + hdfsUrl + " --hive-url " + HIVE_SERVER_JDBC_URL
|
||||
cmd = HOODIE_JAVA_APP + " HoodieJavaApp --hive-sync --table-path " + hdfsUrl + " --hive-url " + HIVE_SERVER_JDBC_URL
|
||||
+ " --table-type " + tableType + " --hive-table " + hiveTableName + " --non-partitioned";
|
||||
}
|
||||
executeCommandStringInDocker(ADHOC_1_CONTAINER, cmd, true);
|
||||
|
||||
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.integ.command;
|
||||
|
||||
import org.apache.hudi.common.model.HoodieFileFormat;
|
||||
import org.apache.hudi.common.model.HoodieTableType;
|
||||
|
||||
import org.apache.hudi.integ.HoodieTestHiveBase;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
/**
|
||||
* Integration test class for HoodieSyncCommand in hudi-cli module.
|
||||
*/
|
||||
public class ITTestHoodieSyncCommand extends HoodieTestHiveBase {
|
||||
|
||||
private static final String HUDI_CLI_TOOL = HOODIE_WS_ROOT + "/hudi-cli/hudi-cli.sh";
|
||||
private static final String SYNC_VALIDATE_COMMANDS = HOODIE_WS_ROOT + "/docker/demo/sync-validate.commands";
|
||||
|
||||
@Test
|
||||
public void testValidateSync() throws Exception {
|
||||
String hiveTableName = "docker_hoodie_sync_valid_test";
|
||||
String hiveTableName2 = "docker_hoodie_sync_valid_test_2";
|
||||
|
||||
generateDataByHoodieJavaApp(
|
||||
hiveTableName, HoodieTableType.COPY_ON_WRITE.name(), PartitionType.SINGLE_KEY_PARTITIONED, "overwrite", hiveTableName);
|
||||
|
||||
syncHoodieTable(hiveTableName2, "INSERT");
|
||||
|
||||
generateDataByHoodieJavaApp(
|
||||
hiveTableName, HoodieTableType.COPY_ON_WRITE.name(), PartitionType.SINGLE_KEY_PARTITIONED, "append", hiveTableName);
|
||||
|
||||
TestExecStartResultCallback result =
|
||||
executeCommandStringInDocker(ADHOC_1_CONTAINER, HUDI_CLI_TOOL + " --cmdfile " + SYNC_VALIDATE_COMMANDS, true);
|
||||
|
||||
String expected = String.format("Count difference now is (count(%s) - count(%s) == %d. Catch up count is %d",
|
||||
hiveTableName, hiveTableName2, 100, 200);
|
||||
assertTrue(result.getStderr().toString().contains(expected));
|
||||
|
||||
dropHiveTables(hiveTableName, HoodieTableType.COPY_ON_WRITE.name());
|
||||
dropHiveTables(hiveTableName2, HoodieTableType.COPY_ON_WRITE.name());
|
||||
}
|
||||
|
||||
private void syncHoodieTable(String hiveTableName, String op) throws Exception {
|
||||
StringBuilder cmdBuilder = new StringBuilder("spark-submit --packages org.apache.spark:spark-avro_2.11:2.4.4 ")
|
||||
.append(" --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer ").append(HUDI_UTILITIES_BUNDLE)
|
||||
.append(" --table-type COPY_ON_WRITE ")
|
||||
.append(" --base-file-format ").append(HoodieFileFormat.PARQUET.toString())
|
||||
.append(" --source-class org.apache.hudi.utilities.sources.HoodieIncrSource --source-ordering-field timestamp ")
|
||||
.append(" --target-base-path ").append(getHDFSPath(hiveTableName))
|
||||
.append(" --target-table ").append(hiveTableName)
|
||||
.append(" --op ").append(op)
|
||||
.append(" --props file:///var/hoodie/ws/docker/demo/config/hoodie-incr.properties")
|
||||
.append(" --enable-hive-sync");
|
||||
executeCommandStringInDocker(ADHOC_1_CONTAINER, cmdBuilder.toString(), true);
|
||||
}
|
||||
}
|
||||
18
hudi-integ-test/src/test/resources/hoodie-docker.properties
Normal file
18
hudi-integ-test/src/test/resources/hoodie-docker.properties
Normal file
@@ -0,0 +1,18 @@
|
||||
###
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
###
|
||||
hoodie.hiveserver.time.wait=5000
|
||||
Reference in New Issue
Block a user