1
0

[HUDI-296] Explore use of spotless to auto fix formatting errors (#945)

- Add spotless format fixing to project
- One time reformatting for conformity
- Build fails for formatting changes and mvn spotless:apply autofixes them
This commit is contained in:
leesf
2019-10-10 20:19:40 +08:00
committed by vinoth chandar
parent 834c591955
commit b19bed442d
381 changed files with 7350 additions and 9064 deletions

View File

@@ -87,8 +87,7 @@ public abstract class ITTestBase {
}
private static String getHiveConsoleCommandFile(String commandFile, String additionalVar) {
StringBuilder builder = new StringBuilder()
.append("beeline -u " + HIVE_SERVER_JDBC_URL)
StringBuilder builder = new StringBuilder().append("beeline -u " + HIVE_SERVER_JDBC_URL)
.append(" --hiveconf hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat ")
.append(" --hiveconf hive.stats.autogather=false ")
.append(" --hivevar hudi.hadoop.bundle=" + HUDI_HADOOP_BUNDLE);
@@ -100,30 +99,23 @@ public abstract class ITTestBase {
}
static String getSparkShellCommand(String commandFile) {
return new StringBuilder()
.append("spark-shell --jars ").append(HUDI_SPARK_BUNDLE)
return new StringBuilder().append("spark-shell --jars ").append(HUDI_SPARK_BUNDLE)
.append(" --master local[2] --driver-class-path ").append(HADOOP_CONF_DIR)
.append(" --conf spark.sql.hive.convertMetastoreParquet=false --deploy-mode client --driver-memory 1G --executor-memory 1G --num-executors 1 ")
.append(" --packages com.databricks:spark-avro_2.11:4.0.0 ")
.append(" -i ").append(commandFile)
.toString();
.append(
" --conf spark.sql.hive.convertMetastoreParquet=false --deploy-mode client --driver-memory 1G --executor-memory 1G --num-executors 1 ")
.append(" --packages com.databricks:spark-avro_2.11:4.0.0 ").append(" -i ").append(commandFile).toString();
}
@Before
public void init() {
String dockerHost = (OVERRIDDEN_DOCKER_HOST != null) ? OVERRIDDEN_DOCKER_HOST : DEFAULT_DOCKER_HOST;
//Assuming insecure docker engine
DockerClientConfig config = DefaultDockerClientConfig.createDefaultConfigBuilder()
.withDockerHost(dockerHost)
.build();
// Assuming insecure docker engine
DockerClientConfig config =
DefaultDockerClientConfig.createDefaultConfigBuilder().withDockerHost(dockerHost).build();
// using jaxrs/jersey implementation here (netty impl is also available)
DockerCmdExecFactory dockerCmdExecFactory = new JerseyDockerCmdExecFactory()
.withConnectTimeout(1000)
.withMaxTotalConnections(100)
.withMaxPerRouteConnections(10);
dockerClient = DockerClientBuilder.getInstance(config)
.withDockerCmdExecFactory(dockerCmdExecFactory)
.build();
DockerCmdExecFactory dockerCmdExecFactory = new JerseyDockerCmdExecFactory().withConnectTimeout(1000)
.withMaxTotalConnections(100).withMaxPerRouteConnections(10);
dockerClient = DockerClientBuilder.getInstance(config).withDockerCmdExecFactory(dockerCmdExecFactory).build();
await().atMost(60, SECONDS).until(this::servicesUp);
}
@@ -131,8 +123,7 @@ public abstract class ITTestBase {
List<Container> containerList = dockerClient.listContainersCmd().exec();
for (Container c : containerList) {
if (!c.getState().equalsIgnoreCase("running")) {
LOG.info("Container : " + Arrays.toString(c.getNames())
+ "not in running state, Curr State :" + c.getState());
LOG.info("Container : " + Arrays.toString(c.getNames()) + "not in running state, Curr State :" + c.getState());
return false;
}
}
@@ -142,31 +133,31 @@ public abstract class ITTestBase {
}
private String singleSpace(String str) {
return str.replaceAll("[\\s]+"," ");
return str.replaceAll("[\\s]+", " ");
}
private TestExecStartResultCallback executeCommandInDocker(String containerName, String[] command,
boolean expectedToSucceed) throws Exception {
Container sparkWorkerContainer = runningContainers.get(containerName);
ExecCreateCmd cmd = dockerClient.execCreateCmd(sparkWorkerContainer.getId())
.withCmd(command).withAttachStdout(true).withAttachStderr(true);
ExecCreateCmd cmd = dockerClient.execCreateCmd(sparkWorkerContainer.getId()).withCmd(command).withAttachStdout(true)
.withAttachStderr(true);
ExecCreateCmdResponse createCmdResponse = cmd.exec();
TestExecStartResultCallback callback = new TestExecStartResultCallback(new ByteArrayOutputStream(),
new ByteArrayOutputStream());
dockerClient.execStartCmd(createCmdResponse.getId()).withDetach(false).withTty(false)
.exec(callback).awaitCompletion();
TestExecStartResultCallback callback =
new TestExecStartResultCallback(new ByteArrayOutputStream(), new ByteArrayOutputStream());
dockerClient.execStartCmd(createCmdResponse.getId()).withDetach(false).withTty(false).exec(callback)
.awaitCompletion();
int exitCode = dockerClient.inspectExecCmd(createCmdResponse.getId()).exec().getExitCode();
LOG.info("Exit code for command : " + exitCode);
LOG.error("\n\n ###### Stdout #######\n" + callback.getStdout().toString());
LOG.error("\n\n ###### Stderr #######\n" + callback.getStderr().toString());
if (expectedToSucceed) {
Assert.assertTrue("Command (" + Arrays.toString(command)
+ ") expected to succeed. Exit (" + exitCode + ")", exitCode == 0);
Assert.assertTrue("Command (" + Arrays.toString(command) + ") expected to succeed. Exit (" + exitCode + ")",
exitCode == 0);
} else {
Assert.assertTrue("Command (" + Arrays.toString(command)
+ ") expected to fail. Exit (" + exitCode + ")", exitCode != 0);
Assert.assertTrue("Command (" + Arrays.toString(command) + ") expected to fail. Exit (" + exitCode + ")",
exitCode != 0);
}
cmd.close();
return callback;
@@ -178,8 +169,8 @@ public abstract class ITTestBase {
}
}
TestExecStartResultCallback executeCommandStringInDocker(String containerName, String cmd,
boolean expectedToSucceed) throws Exception {
TestExecStartResultCallback executeCommandStringInDocker(String containerName, String cmd, boolean expectedToSucceed)
throws Exception {
LOG.info("\n\n#################################################################################################");
LOG.info("Container : " + containerName + ", Running command :" + cmd);
LOG.info("\n#################################################################################################");
@@ -211,16 +202,16 @@ public abstract class ITTestBase {
Pair<String, String> executeSparkSQLCommand(String commandFile, boolean expectedToSucceed) throws Exception {
String sparkShellCmd = getSparkShellCommand(commandFile);
TestExecStartResultCallback callback = executeCommandStringInDocker(ADHOC_1_CONTAINER,
sparkShellCmd, expectedToSucceed);
TestExecStartResultCallback callback =
executeCommandStringInDocker(ADHOC_1_CONTAINER, sparkShellCmd, expectedToSucceed);
return Pair.of(callback.getStdout().toString(), callback.getStderr().toString());
}
private void saveUpLogs() {
try {
// save up the Hive log files for introspection
String hiveLogStr = executeCommandStringInDocker(HIVESERVER, "cat /tmp/root/hive.log", true)
.getStdout().toString();
String hiveLogStr =
executeCommandStringInDocker(HIVESERVER, "cat /tmp/root/hive.log", true).getStdout().toString();
String filePath = System.getProperty("java.io.tmpdir") + "/" + System.currentTimeMillis() + "-hive.log";
FileIOUtils.writeStringToFile(hiveLogStr, filePath);
LOG.info("Hive log saved up at : " + filePath);
@@ -240,10 +231,10 @@ public abstract class ITTestBase {
int lastIndex = 0;
int count = 0;
while(lastIndex != -1){
while (lastIndex != -1) {
lastIndex = stdOutSingleSpaced.indexOf(expectedOutput, lastIndex);
if(lastIndex != -1){
count ++;
if (lastIndex != -1) {
count++;
lastIndex += expectedOutput.length();
}
}

View File

@@ -35,10 +35,8 @@ public class ITTestHoodieDemo extends ITTestBase {
private static String HDFS_BATCH_PATH1 = HDFS_DATA_DIR + "/" + "batch_1.json";
private static String HDFS_BATCH_PATH2 = HDFS_DATA_DIR + "/" + "batch_2.json";
private static String INPUT_BATCH_PATH1 = HOODIE_WS_ROOT +
"/docker/demo/data/batch_1.json";
private static String INPUT_BATCH_PATH2 = HOODIE_WS_ROOT +
"/docker/demo/data/batch_2.json";
private static String INPUT_BATCH_PATH1 = HOODIE_WS_ROOT + "/docker/demo/data/batch_1.json";
private static String INPUT_BATCH_PATH2 = HOODIE_WS_ROOT + "/docker/demo/data/batch_2.json";
private static String COW_BASE_PATH = "/user/hive/warehouse/stock_ticks_cow";
private static String MOR_BASE_PATH = "/user/hive/warehouse/stock_ticks_mor";
@@ -58,13 +56,13 @@ public class ITTestHoodieDemo extends ITTestBase {
private static String HIVE_INCREMENTAL_COMMANDS = HOODIE_WS_ROOT + "/docker/demo/hive-incremental.commands";
private static String HIVE_SYNC_CMD_FMT = " --enable-hive-sync "
+ " --hoodie-conf hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000 "
+ " --hoodie-conf hoodie.datasource.hive_sync.username=hive "
+ " --hoodie-conf hoodie.datasource.hive_sync.password=hive "
+ " --hoodie-conf hoodie.datasource.hive_sync.partition_fields=%s "
+ " --hoodie-conf hoodie.datasource.hive_sync.database=default "
+ " --hoodie-conf hoodie.datasource.hive_sync.table=%s";
private static String HIVE_SYNC_CMD_FMT =
" --enable-hive-sync " + " --hoodie-conf hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000 "
+ " --hoodie-conf hoodie.datasource.hive_sync.username=hive "
+ " --hoodie-conf hoodie.datasource.hive_sync.password=hive "
+ " --hoodie-conf hoodie.datasource.hive_sync.partition_fields=%s "
+ " --hoodie-conf hoodie.datasource.hive_sync.database=default "
+ " --hoodie-conf hoodie.datasource.hive_sync.table=%s";
@Test
@@ -90,32 +88,30 @@ public class ITTestHoodieDemo extends ITTestBase {
}
private void setupDemo() throws Exception {
List<String> cmds = new ImmutableList.Builder<String>()
.add("hdfs dfsadmin -safemode wait") // handle NN going into safe mode at times
List<String> cmds = new ImmutableList.Builder<String>().add("hdfs dfsadmin -safemode wait") // handle NN going into
// safe mode at times
.add("hdfs dfs -mkdir -p " + HDFS_DATA_DIR)
.add("hdfs dfs -copyFromLocal -f " + INPUT_BATCH_PATH1 + " " + HDFS_BATCH_PATH1)
.add("/bin/bash " + DEMO_CONTAINER_SCRIPT)
.build();
.add("/bin/bash " + DEMO_CONTAINER_SCRIPT).build();
executeCommandStringsInDocker(ADHOC_1_CONTAINER, cmds);
}
private void ingestFirstBatchAndHiveSync() throws Exception {
List<String> cmds = new ImmutableList.Builder<String>()
.add("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer "
+ HUDI_UTILITIES_BUNDLE + " --storage-type COPY_ON_WRITE "
+ " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
+ " --target-base-path " + COW_BASE_PATH + " --target-table " + COW_TABLE_NAME
+ " --props /var/demo/config/dfs-source.properties "
+ " --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider "
+ String.format(HIVE_SYNC_CMD_FMT, "dt", COW_TABLE_NAME))
.add("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer "
+ HUDI_UTILITIES_BUNDLE + " --storage-type MERGE_ON_READ "
+ " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
+ " --target-base-path " + MOR_BASE_PATH + " --target-table " + MOR_TABLE_NAME
+ " --props /var/demo/config/dfs-source.properties "
+ " --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider "
+ " --disable-compaction "
+ String.format(HIVE_SYNC_CMD_FMT, "dt", MOR_TABLE_NAME))
.add("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer " + HUDI_UTILITIES_BUNDLE
+ " --storage-type COPY_ON_WRITE "
+ " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
+ " --target-base-path " + COW_BASE_PATH + " --target-table " + COW_TABLE_NAME
+ " --props /var/demo/config/dfs-source.properties "
+ " --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider "
+ String.format(HIVE_SYNC_CMD_FMT, "dt", COW_TABLE_NAME))
.add("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer " + HUDI_UTILITIES_BUNDLE
+ " --storage-type MERGE_ON_READ "
+ " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
+ " --target-base-path " + MOR_BASE_PATH + " --target-table " + MOR_TABLE_NAME
+ " --props /var/demo/config/dfs-source.properties "
+ " --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider "
+ " --disable-compaction " + String.format(HIVE_SYNC_CMD_FMT, "dt", MOR_TABLE_NAME))
.build();
executeCommandStringsInDocker(ADHOC_1_CONTAINER, cmds);
@@ -128,32 +124,25 @@ public class ITTestHoodieDemo extends ITTestBase {
assertStdOutContains(stdOutErrPair, "| stock_ticks_mor_rt |");
assertStdOutContains(stdOutErrPair,
"| partition |\n"
+ "+----------------+\n"
+ "| dt=2018-08-31 |\n"
+ "+----------------+\n", 3);
"| partition |\n" + "+----------------+\n" + "| dt=2018-08-31 |\n" + "+----------------+\n", 3);
stdOutErrPair = executeHiveCommandFile(HIVE_BATCH1_COMMANDS);
assertStdOutContains(stdOutErrPair, "| symbol | _c1 |\n"
+ "+---------+----------------------+\n"
assertStdOutContains(stdOutErrPair, "| symbol | _c1 |\n" + "+---------+----------------------+\n"
+ "| GOOG | 2018-08-31 10:29:00 |\n", 3);
assertStdOutContains(stdOutErrPair,
"| symbol | ts | volume | open | close |\n"
+ "+---------+----------------------+---------+------------+-----------+\n"
+ "| GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 |\n"
+ "| GOOG | 2018-08-31 10:29:00 | 3391 | 1230.1899 | 1230.085 |\n", 3);
+ "+---------+----------------------+---------+------------+-----------+\n"
+ "| GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 |\n"
+ "| GOOG | 2018-08-31 10:29:00 | 3391 | 1230.1899 | 1230.085 |\n",
3);
}
private void testSparkSQLAfterFirstBatch() throws Exception {
Pair<String, String> stdOutErrPair = executeSparkSQLCommand(SPARKSQL_BATCH1_COMMANDS, true);
assertStdOutContains(stdOutErrPair, "|default |stock_ticks_cow |false |\n"
+ "|default |stock_ticks_mor |false |\n" + "|default |stock_ticks_mor_rt |false |");
assertStdOutContains(stdOutErrPair,
"|default |stock_ticks_cow |false |\n"
+ "|default |stock_ticks_mor |false |\n"
+ "|default |stock_ticks_mor_rt |false |");
assertStdOutContains(stdOutErrPair,
"+------+-------------------+\n"
+ "|GOOG |2018-08-31 10:29:00|\n"
+ "+------+-------------------+", 3);
"+------+-------------------+\n" + "|GOOG |2018-08-31 10:29:00|\n" + "+------+-------------------+", 3);
assertStdOutContains(stdOutErrPair, "|GOOG |2018-08-31 09:59:00|6330 |1230.5 |1230.02 |", 3);
assertStdOutContains(stdOutErrPair, "|GOOG |2018-08-31 10:29:00|3391 |1230.1899|1230.085|", 3);
}
@@ -161,34 +150,29 @@ public class ITTestHoodieDemo extends ITTestBase {
private void ingestSecondBatchAndHiveSync() throws Exception {
List<String> cmds = new ImmutableList.Builder<String>()
.add("hdfs dfs -copyFromLocal -f " + INPUT_BATCH_PATH2 + " " + HDFS_BATCH_PATH2)
.add("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer "
+ HUDI_UTILITIES_BUNDLE + " --storage-type COPY_ON_WRITE "
+ " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
+ " --target-base-path " + COW_BASE_PATH + " --target-table " + COW_TABLE_NAME
+ " --props /var/demo/config/dfs-source.properties "
+ " --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider "
+ String.format(HIVE_SYNC_CMD_FMT, "dt", COW_TABLE_NAME))
.add("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer "
+ HUDI_UTILITIES_BUNDLE + " --storage-type MERGE_ON_READ "
+ " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
+ " --target-base-path " + MOR_BASE_PATH + " --target-table " + MOR_TABLE_NAME
+ " --props /var/demo/config/dfs-source.properties "
+ " --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider "
+ " --disable-compaction "
+ String.format(HIVE_SYNC_CMD_FMT, "dt", MOR_TABLE_NAME))
.add("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer " + HUDI_UTILITIES_BUNDLE
+ " --storage-type COPY_ON_WRITE "
+ " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
+ " --target-base-path " + COW_BASE_PATH + " --target-table " + COW_TABLE_NAME
+ " --props /var/demo/config/dfs-source.properties "
+ " --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider "
+ String.format(HIVE_SYNC_CMD_FMT, "dt", COW_TABLE_NAME))
.add("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer " + HUDI_UTILITIES_BUNDLE
+ " --storage-type MERGE_ON_READ "
+ " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
+ " --target-base-path " + MOR_BASE_PATH + " --target-table " + MOR_TABLE_NAME
+ " --props /var/demo/config/dfs-source.properties "
+ " --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider "
+ " --disable-compaction " + String.format(HIVE_SYNC_CMD_FMT, "dt", MOR_TABLE_NAME))
.build();
executeCommandStringsInDocker(ADHOC_1_CONTAINER, cmds);
}
private void testHiveAfterSecondBatch() throws Exception {
Pair<String, String> stdOutErrPair = executeHiveCommandFile(HIVE_BATCH1_COMMANDS);
assertStdOutContains(stdOutErrPair,
"| symbol | _c1 |\n"
+ "+---------+----------------------+\n"
assertStdOutContains(stdOutErrPair, "| symbol | _c1 |\n" + "+---------+----------------------+\n"
+ "| GOOG | 2018-08-31 10:29:00 |\n");
assertStdOutContains(stdOutErrPair,
"| symbol | _c1 |\n"
+ "+---------+----------------------+\n"
assertStdOutContains(stdOutErrPair, "| symbol | _c1 |\n" + "+---------+----------------------+\n"
+ "| GOOG | 2018-08-31 10:59:00 |\n", 2);
assertStdOutContains(stdOutErrPair,
"| symbol | ts | volume | open | close |\n"
@@ -197,75 +181,66 @@ public class ITTestHoodieDemo extends ITTestBase {
+ "| GOOG | 2018-08-31 10:29:00 | 3391 | 1230.1899 | 1230.085 |\n");
assertStdOutContains(stdOutErrPair,
"| symbol | ts | volume | open | close |\n"
+ "+---------+----------------------+---------+------------+-----------+\n"
+ "| GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 |\n"
+ "| GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 |\n", 2);
+ "+---------+----------------------+---------+------------+-----------+\n"
+ "| GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 |\n"
+ "| GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 |\n",
2);
}
private void testHiveAfterSecondBatchAfterCompaction() throws Exception {
Pair<String, String> stdOutErrPair = executeHiveCommandFile(HIVE_BATCH2_COMMANDS);
assertStdOutContains(stdOutErrPair,
"| symbol | _c1 |\n"
+ "+---------+----------------------+\n"
assertStdOutContains(stdOutErrPair, "| symbol | _c1 |\n" + "+---------+----------------------+\n"
+ "| GOOG | 2018-08-31 10:59:00 |", 2);
assertStdOutContains(stdOutErrPair, "| symbol | ts | volume | open | close |\n"
+ "+---------+----------------------+---------+------------+-----------+\n"
+ "| GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 |\n"
+ "| GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 |", 2);
assertStdOutContains(stdOutErrPair,
"| symbol | ts | volume | open | close |\n"
+ "+---------+----------------------+---------+------------+-----------+\n"
+ "| GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 |\n"
+ "| GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 |",
2);
}
private void testSparkSQLAfterSecondBatch() throws Exception {
Pair<String, String> stdOutErrPair = executeSparkSQLCommand(SPARKSQL_BATCH2_COMMANDS, true);
assertStdOutContains(stdOutErrPair,
"+------+-------------------+\n"
+ "|GOOG |2018-08-31 10:59:00|\n"
+ "+------+-------------------+", 2);
"+------+-------------------+\n" + "|GOOG |2018-08-31 10:59:00|\n" + "+------+-------------------+", 2);
assertStdOutContains(stdOutErrPair, "|GOOG |2018-08-31 09:59:00|6330 |1230.5 |1230.02 |", 3);
assertStdOutContains(stdOutErrPair, "|GOOG |2018-08-31 10:59:00|9021 |1227.1993|1227.215|", 2);
assertStdOutContains(stdOutErrPair,
"+------+-------------------+\n"
+ "|GOOG |2018-08-31 10:29:00|\n"
+ "+------+-------------------+");
"+------+-------------------+\n" + "|GOOG |2018-08-31 10:29:00|\n" + "+------+-------------------+");
assertStdOutContains(stdOutErrPair, "|GOOG |2018-08-31 10:29:00|3391 |1230.1899|1230.085|");
}
private void testIncrementalHiveQuery() throws Exception {
String minCommitTime = executeCommandStringInDocker(ADHOC_2_CONTAINER, MIN_COMMIT_TIME_SCRIPT, true)
.getStdout().toString();
Pair<String, String> stdOutErrPair = executeHiveCommandFile(HIVE_INCREMENTAL_COMMANDS,
"min.commit.time=" + minCommitTime +"`");
String minCommitTime =
executeCommandStringInDocker(ADHOC_2_CONTAINER, MIN_COMMIT_TIME_SCRIPT, true).getStdout().toString();
Pair<String, String> stdOutErrPair =
executeHiveCommandFile(HIVE_INCREMENTAL_COMMANDS, "min.commit.time=" + minCommitTime + "`");
assertStdOutContains(stdOutErrPair, "| GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 |");
}
private void testIncrementalHiveQueryAfterCompaction() throws Exception {
String minCommitTime = executeCommandStringInDocker(ADHOC_2_CONTAINER, MIN_COMMIT_TIME_SCRIPT, true)
.getStdout().toString();
Pair<String, String> stdOutErrPair = executeHiveCommandFile(HIVE_INCREMENTAL_COMMANDS,
"min.commit.time=" + minCommitTime +"`");
String minCommitTime =
executeCommandStringInDocker(ADHOC_2_CONTAINER, MIN_COMMIT_TIME_SCRIPT, true).getStdout().toString();
Pair<String, String> stdOutErrPair =
executeHiveCommandFile(HIVE_INCREMENTAL_COMMANDS, "min.commit.time=" + minCommitTime + "`");
assertStdOutContains(stdOutErrPair,
"| symbol | ts | volume | open | close |\n"
+ "+---------+----------------------+---------+------------+-----------+\n"
+ "| GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 |");
+ "+---------+----------------------+---------+------------+-----------+\n"
+ "| GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 |");
}
private void testIncrementalSparkSQLQuery() throws Exception {
Pair<String, String> stdOutErrPair = executeSparkSQLCommand(SPARKSQL_INCREMENTAL_COMMANDS, true);
assertStdOutContains(stdOutErrPair, "|GOOG |2018-08-31 10:59:00|9021 |1227.1993|1227.215|");
assertStdOutContains(stdOutErrPair,
"|default |stock_ticks_cow |false |\n"
+ "|default |stock_ticks_derived_mor |false |\n"
+ "|default |stock_ticks_derived_mor_rt|false |\n"
+ "|default |stock_ticks_mor |false |\n"
+ "|default |stock_ticks_mor_rt |false |\n"
assertStdOutContains(stdOutErrPair, "|default |stock_ticks_cow |false |\n"
+ "|default |stock_ticks_derived_mor |false |\n" + "|default |stock_ticks_derived_mor_rt|false |\n"
+ "|default |stock_ticks_mor |false |\n" + "|default |stock_ticks_mor_rt |false |\n"
+ "| |stock_ticks_cow_incr |true |");
assertStdOutContains(stdOutErrPair,
"|count(1)|\n"
+ "+--------+\n"
+ "|99 |", 2);
assertStdOutContains(stdOutErrPair, "|count(1)|\n" + "+--------+\n" + "|99 |", 2);
}
private void scheduleAndRunCompaction() throws Exception {
executeCommandStringInDocker(ADHOC_1_CONTAINER, HUDI_CLI_TOOL + " --cmdfile " + COMPACTION_COMMANDS, true);
}
}
}

View File

@@ -28,16 +28,14 @@ import org.junit.Test;
public class ITTestHoodieSanity extends ITTestBase {
enum PartitionType {
SINGLE_KEY_PARTITIONED,
MULTI_KEYS_PARTITIONED,
NON_PARTITIONED,
SINGLE_KEY_PARTITIONED, MULTI_KEYS_PARTITIONED, NON_PARTITIONED,
}
@Test
/**
* A basic integration test that runs HoodieJavaApp to create a sample COW Hoodie with single partition key
* data-set and performs upserts on it. Hive integration and upsert functionality is checked by running a count
* query in hive console.
* A basic integration test that runs HoodieJavaApp to create a sample COW Hoodie with single partition key data-set
* and performs upserts on it. Hive integration and upsert functionality is checked by running a count query in hive
* console.
*/
public void testRunHoodieJavaAppOnSinglePartitionKeyCOWTable() throws Exception {
String hiveTableName = "docker_hoodie_single_partition_key_cow_test";
@@ -48,8 +46,8 @@ public class ITTestHoodieSanity extends ITTestBase {
@Test
/**
* A basic integration test that runs HoodieJavaApp to create a sample COW Hoodie with multiple partition-keys
* data-set and performs upserts on it. Hive integration and upsert functionality is checked by running a count
* query in hive console.
* data-set and performs upserts on it. Hive integration and upsert functionality is checked by running a count query
* in hive console.
*/
public void testRunHoodieJavaAppOnMultiPartitionKeysCOWTable() throws Exception {
String hiveTableName = "docker_hoodie_multi_partition_key_cow_test";
@@ -59,9 +57,9 @@ public class ITTestHoodieSanity extends ITTestBase {
@Test
/**
* A basic integration test that runs HoodieJavaApp to create a sample non-partitioned COW Hoodie
* data-set and performs upserts on it. Hive integration and upsert functionality is checked by running a count
* query in hive console.
* A basic integration test that runs HoodieJavaApp to create a sample non-partitioned COW Hoodie data-set and
* performs upserts on it. Hive integration and upsert functionality is checked by running a count query in hive
* console.
*/
public void testRunHoodieJavaAppOnNonPartitionedCOWTable() throws Exception {
String hiveTableName = "docker_hoodie_non_partition_key_cow_test";
@@ -70,10 +68,9 @@ public class ITTestHoodieSanity extends ITTestBase {
}
/**
* A basic integration test that runs HoodieJavaApp to create a sample COW Hoodie
* data-set and performs upserts on it. Hive integration and upsert functionality is checked by running a count
* query in hive console.
* TODO: Add spark-shell test-case
* A basic integration test that runs HoodieJavaApp to create a sample COW Hoodie data-set and performs upserts on it.
* Hive integration and upsert functionality is checked by running a count query in hive console. TODO: Add
* spark-shell test-case
*/
public void testRunHoodieJavaAppOnCOWTable(String hiveTableName, PartitionType partitionType) throws Exception {
@@ -98,16 +95,14 @@ public class ITTestHoodieSanity extends ITTestBase {
// Run Hoodie Java App
String cmd;
if (partitionType == PartitionType.SINGLE_KEY_PARTITIONED) {
cmd = HOODIE_JAVA_APP + " --hive-sync --table-path " + hdfsUrl
+ " --hive-url " + HIVE_SERVER_JDBC_URL + " --hive-table " + hiveTableName;
cmd = HOODIE_JAVA_APP + " --hive-sync --table-path " + hdfsUrl + " --hive-url " + HIVE_SERVER_JDBC_URL
+ " --hive-table " + hiveTableName;
} else if (partitionType == PartitionType.MULTI_KEYS_PARTITIONED) {
cmd = HOODIE_JAVA_APP + " --hive-sync --table-path " + hdfsUrl
+ " --hive-url " + HIVE_SERVER_JDBC_URL + " --hive-table " + hiveTableName
+ " --use-multi-partition-keys";
cmd = HOODIE_JAVA_APP + " --hive-sync --table-path " + hdfsUrl + " --hive-url " + HIVE_SERVER_JDBC_URL
+ " --hive-table " + hiveTableName + " --use-multi-partition-keys";
} else {
cmd = HOODIE_JAVA_APP + " --hive-sync --table-path " + hdfsUrl
+ " --hive-url " + HIVE_SERVER_JDBC_URL + " --hive-table " + hiveTableName
+ " --non-partitioned";
cmd = HOODIE_JAVA_APP + " --hive-sync --table-path " + hdfsUrl + " --hive-url " + HIVE_SERVER_JDBC_URL
+ " --hive-table " + hiveTableName + " --non-partitioned";
}
executeCommandStringInDocker(ADHOC_1_CONTAINER, cmd, true);