[HUDI-296] Explore use of spotless to auto fix formatting errors (#945)
- Add spotless format fixing to project - One time reformatting for conformity - Build fails for formatting changes and mvn spotless:apply autofixes them
This commit is contained in:
@@ -87,8 +87,7 @@ public abstract class ITTestBase {
|
||||
}
|
||||
|
||||
private static String getHiveConsoleCommandFile(String commandFile, String additionalVar) {
|
||||
StringBuilder builder = new StringBuilder()
|
||||
.append("beeline -u " + HIVE_SERVER_JDBC_URL)
|
||||
StringBuilder builder = new StringBuilder().append("beeline -u " + HIVE_SERVER_JDBC_URL)
|
||||
.append(" --hiveconf hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat ")
|
||||
.append(" --hiveconf hive.stats.autogather=false ")
|
||||
.append(" --hivevar hudi.hadoop.bundle=" + HUDI_HADOOP_BUNDLE);
|
||||
@@ -100,30 +99,23 @@ public abstract class ITTestBase {
|
||||
}
|
||||
|
||||
static String getSparkShellCommand(String commandFile) {
|
||||
return new StringBuilder()
|
||||
.append("spark-shell --jars ").append(HUDI_SPARK_BUNDLE)
|
||||
return new StringBuilder().append("spark-shell --jars ").append(HUDI_SPARK_BUNDLE)
|
||||
.append(" --master local[2] --driver-class-path ").append(HADOOP_CONF_DIR)
|
||||
.append(" --conf spark.sql.hive.convertMetastoreParquet=false --deploy-mode client --driver-memory 1G --executor-memory 1G --num-executors 1 ")
|
||||
.append(" --packages com.databricks:spark-avro_2.11:4.0.0 ")
|
||||
.append(" -i ").append(commandFile)
|
||||
.toString();
|
||||
.append(
|
||||
" --conf spark.sql.hive.convertMetastoreParquet=false --deploy-mode client --driver-memory 1G --executor-memory 1G --num-executors 1 ")
|
||||
.append(" --packages com.databricks:spark-avro_2.11:4.0.0 ").append(" -i ").append(commandFile).toString();
|
||||
}
|
||||
|
||||
@Before
|
||||
public void init() {
|
||||
String dockerHost = (OVERRIDDEN_DOCKER_HOST != null) ? OVERRIDDEN_DOCKER_HOST : DEFAULT_DOCKER_HOST;
|
||||
//Assuming insecure docker engine
|
||||
DockerClientConfig config = DefaultDockerClientConfig.createDefaultConfigBuilder()
|
||||
.withDockerHost(dockerHost)
|
||||
.build();
|
||||
// Assuming insecure docker engine
|
||||
DockerClientConfig config =
|
||||
DefaultDockerClientConfig.createDefaultConfigBuilder().withDockerHost(dockerHost).build();
|
||||
// using jaxrs/jersey implementation here (netty impl is also available)
|
||||
DockerCmdExecFactory dockerCmdExecFactory = new JerseyDockerCmdExecFactory()
|
||||
.withConnectTimeout(1000)
|
||||
.withMaxTotalConnections(100)
|
||||
.withMaxPerRouteConnections(10);
|
||||
dockerClient = DockerClientBuilder.getInstance(config)
|
||||
.withDockerCmdExecFactory(dockerCmdExecFactory)
|
||||
.build();
|
||||
DockerCmdExecFactory dockerCmdExecFactory = new JerseyDockerCmdExecFactory().withConnectTimeout(1000)
|
||||
.withMaxTotalConnections(100).withMaxPerRouteConnections(10);
|
||||
dockerClient = DockerClientBuilder.getInstance(config).withDockerCmdExecFactory(dockerCmdExecFactory).build();
|
||||
await().atMost(60, SECONDS).until(this::servicesUp);
|
||||
}
|
||||
|
||||
@@ -131,8 +123,7 @@ public abstract class ITTestBase {
|
||||
List<Container> containerList = dockerClient.listContainersCmd().exec();
|
||||
for (Container c : containerList) {
|
||||
if (!c.getState().equalsIgnoreCase("running")) {
|
||||
LOG.info("Container : " + Arrays.toString(c.getNames())
|
||||
+ "not in running state, Curr State :" + c.getState());
|
||||
LOG.info("Container : " + Arrays.toString(c.getNames()) + "not in running state, Curr State :" + c.getState());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -142,31 +133,31 @@ public abstract class ITTestBase {
|
||||
}
|
||||
|
||||
private String singleSpace(String str) {
|
||||
return str.replaceAll("[\\s]+"," ");
|
||||
return str.replaceAll("[\\s]+", " ");
|
||||
}
|
||||
|
||||
private TestExecStartResultCallback executeCommandInDocker(String containerName, String[] command,
|
||||
boolean expectedToSucceed) throws Exception {
|
||||
Container sparkWorkerContainer = runningContainers.get(containerName);
|
||||
ExecCreateCmd cmd = dockerClient.execCreateCmd(sparkWorkerContainer.getId())
|
||||
.withCmd(command).withAttachStdout(true).withAttachStderr(true);
|
||||
ExecCreateCmd cmd = dockerClient.execCreateCmd(sparkWorkerContainer.getId()).withCmd(command).withAttachStdout(true)
|
||||
.withAttachStderr(true);
|
||||
|
||||
ExecCreateCmdResponse createCmdResponse = cmd.exec();
|
||||
TestExecStartResultCallback callback = new TestExecStartResultCallback(new ByteArrayOutputStream(),
|
||||
new ByteArrayOutputStream());
|
||||
dockerClient.execStartCmd(createCmdResponse.getId()).withDetach(false).withTty(false)
|
||||
.exec(callback).awaitCompletion();
|
||||
TestExecStartResultCallback callback =
|
||||
new TestExecStartResultCallback(new ByteArrayOutputStream(), new ByteArrayOutputStream());
|
||||
dockerClient.execStartCmd(createCmdResponse.getId()).withDetach(false).withTty(false).exec(callback)
|
||||
.awaitCompletion();
|
||||
int exitCode = dockerClient.inspectExecCmd(createCmdResponse.getId()).exec().getExitCode();
|
||||
LOG.info("Exit code for command : " + exitCode);
|
||||
LOG.error("\n\n ###### Stdout #######\n" + callback.getStdout().toString());
|
||||
LOG.error("\n\n ###### Stderr #######\n" + callback.getStderr().toString());
|
||||
|
||||
if (expectedToSucceed) {
|
||||
Assert.assertTrue("Command (" + Arrays.toString(command)
|
||||
+ ") expected to succeed. Exit (" + exitCode + ")", exitCode == 0);
|
||||
Assert.assertTrue("Command (" + Arrays.toString(command) + ") expected to succeed. Exit (" + exitCode + ")",
|
||||
exitCode == 0);
|
||||
} else {
|
||||
Assert.assertTrue("Command (" + Arrays.toString(command)
|
||||
+ ") expected to fail. Exit (" + exitCode + ")", exitCode != 0);
|
||||
Assert.assertTrue("Command (" + Arrays.toString(command) + ") expected to fail. Exit (" + exitCode + ")",
|
||||
exitCode != 0);
|
||||
}
|
||||
cmd.close();
|
||||
return callback;
|
||||
@@ -178,8 +169,8 @@ public abstract class ITTestBase {
|
||||
}
|
||||
}
|
||||
|
||||
TestExecStartResultCallback executeCommandStringInDocker(String containerName, String cmd,
|
||||
boolean expectedToSucceed) throws Exception {
|
||||
TestExecStartResultCallback executeCommandStringInDocker(String containerName, String cmd, boolean expectedToSucceed)
|
||||
throws Exception {
|
||||
LOG.info("\n\n#################################################################################################");
|
||||
LOG.info("Container : " + containerName + ", Running command :" + cmd);
|
||||
LOG.info("\n#################################################################################################");
|
||||
@@ -211,16 +202,16 @@ public abstract class ITTestBase {
|
||||
|
||||
Pair<String, String> executeSparkSQLCommand(String commandFile, boolean expectedToSucceed) throws Exception {
|
||||
String sparkShellCmd = getSparkShellCommand(commandFile);
|
||||
TestExecStartResultCallback callback = executeCommandStringInDocker(ADHOC_1_CONTAINER,
|
||||
sparkShellCmd, expectedToSucceed);
|
||||
TestExecStartResultCallback callback =
|
||||
executeCommandStringInDocker(ADHOC_1_CONTAINER, sparkShellCmd, expectedToSucceed);
|
||||
return Pair.of(callback.getStdout().toString(), callback.getStderr().toString());
|
||||
}
|
||||
|
||||
private void saveUpLogs() {
|
||||
try {
|
||||
// save up the Hive log files for introspection
|
||||
String hiveLogStr = executeCommandStringInDocker(HIVESERVER, "cat /tmp/root/hive.log", true)
|
||||
.getStdout().toString();
|
||||
String hiveLogStr =
|
||||
executeCommandStringInDocker(HIVESERVER, "cat /tmp/root/hive.log", true).getStdout().toString();
|
||||
String filePath = System.getProperty("java.io.tmpdir") + "/" + System.currentTimeMillis() + "-hive.log";
|
||||
FileIOUtils.writeStringToFile(hiveLogStr, filePath);
|
||||
LOG.info("Hive log saved up at : " + filePath);
|
||||
@@ -240,10 +231,10 @@ public abstract class ITTestBase {
|
||||
|
||||
int lastIndex = 0;
|
||||
int count = 0;
|
||||
while(lastIndex != -1){
|
||||
while (lastIndex != -1) {
|
||||
lastIndex = stdOutSingleSpaced.indexOf(expectedOutput, lastIndex);
|
||||
if(lastIndex != -1){
|
||||
count ++;
|
||||
if (lastIndex != -1) {
|
||||
count++;
|
||||
lastIndex += expectedOutput.length();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,10 +35,8 @@ public class ITTestHoodieDemo extends ITTestBase {
|
||||
private static String HDFS_BATCH_PATH1 = HDFS_DATA_DIR + "/" + "batch_1.json";
|
||||
private static String HDFS_BATCH_PATH2 = HDFS_DATA_DIR + "/" + "batch_2.json";
|
||||
|
||||
private static String INPUT_BATCH_PATH1 = HOODIE_WS_ROOT +
|
||||
"/docker/demo/data/batch_1.json";
|
||||
private static String INPUT_BATCH_PATH2 = HOODIE_WS_ROOT +
|
||||
"/docker/demo/data/batch_2.json";
|
||||
private static String INPUT_BATCH_PATH1 = HOODIE_WS_ROOT + "/docker/demo/data/batch_1.json";
|
||||
private static String INPUT_BATCH_PATH2 = HOODIE_WS_ROOT + "/docker/demo/data/batch_2.json";
|
||||
|
||||
private static String COW_BASE_PATH = "/user/hive/warehouse/stock_ticks_cow";
|
||||
private static String MOR_BASE_PATH = "/user/hive/warehouse/stock_ticks_mor";
|
||||
@@ -58,13 +56,13 @@ public class ITTestHoodieDemo extends ITTestBase {
|
||||
private static String HIVE_INCREMENTAL_COMMANDS = HOODIE_WS_ROOT + "/docker/demo/hive-incremental.commands";
|
||||
|
||||
|
||||
private static String HIVE_SYNC_CMD_FMT = " --enable-hive-sync "
|
||||
+ " --hoodie-conf hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000 "
|
||||
+ " --hoodie-conf hoodie.datasource.hive_sync.username=hive "
|
||||
+ " --hoodie-conf hoodie.datasource.hive_sync.password=hive "
|
||||
+ " --hoodie-conf hoodie.datasource.hive_sync.partition_fields=%s "
|
||||
+ " --hoodie-conf hoodie.datasource.hive_sync.database=default "
|
||||
+ " --hoodie-conf hoodie.datasource.hive_sync.table=%s";
|
||||
private static String HIVE_SYNC_CMD_FMT =
|
||||
" --enable-hive-sync " + " --hoodie-conf hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000 "
|
||||
+ " --hoodie-conf hoodie.datasource.hive_sync.username=hive "
|
||||
+ " --hoodie-conf hoodie.datasource.hive_sync.password=hive "
|
||||
+ " --hoodie-conf hoodie.datasource.hive_sync.partition_fields=%s "
|
||||
+ " --hoodie-conf hoodie.datasource.hive_sync.database=default "
|
||||
+ " --hoodie-conf hoodie.datasource.hive_sync.table=%s";
|
||||
|
||||
|
||||
@Test
|
||||
@@ -90,32 +88,30 @@ public class ITTestHoodieDemo extends ITTestBase {
|
||||
}
|
||||
|
||||
private void setupDemo() throws Exception {
|
||||
List<String> cmds = new ImmutableList.Builder<String>()
|
||||
.add("hdfs dfsadmin -safemode wait") // handle NN going into safe mode at times
|
||||
List<String> cmds = new ImmutableList.Builder<String>().add("hdfs dfsadmin -safemode wait") // handle NN going into
|
||||
// safe mode at times
|
||||
.add("hdfs dfs -mkdir -p " + HDFS_DATA_DIR)
|
||||
.add("hdfs dfs -copyFromLocal -f " + INPUT_BATCH_PATH1 + " " + HDFS_BATCH_PATH1)
|
||||
.add("/bin/bash " + DEMO_CONTAINER_SCRIPT)
|
||||
.build();
|
||||
.add("/bin/bash " + DEMO_CONTAINER_SCRIPT).build();
|
||||
executeCommandStringsInDocker(ADHOC_1_CONTAINER, cmds);
|
||||
}
|
||||
|
||||
private void ingestFirstBatchAndHiveSync() throws Exception {
|
||||
List<String> cmds = new ImmutableList.Builder<String>()
|
||||
.add("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer "
|
||||
+ HUDI_UTILITIES_BUNDLE + " --storage-type COPY_ON_WRITE "
|
||||
+ " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
|
||||
+ " --target-base-path " + COW_BASE_PATH + " --target-table " + COW_TABLE_NAME
|
||||
+ " --props /var/demo/config/dfs-source.properties "
|
||||
+ " --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider "
|
||||
+ String.format(HIVE_SYNC_CMD_FMT, "dt", COW_TABLE_NAME))
|
||||
.add("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer "
|
||||
+ HUDI_UTILITIES_BUNDLE + " --storage-type MERGE_ON_READ "
|
||||
+ " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
|
||||
+ " --target-base-path " + MOR_BASE_PATH + " --target-table " + MOR_TABLE_NAME
|
||||
+ " --props /var/demo/config/dfs-source.properties "
|
||||
+ " --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider "
|
||||
+ " --disable-compaction "
|
||||
+ String.format(HIVE_SYNC_CMD_FMT, "dt", MOR_TABLE_NAME))
|
||||
.add("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer " + HUDI_UTILITIES_BUNDLE
|
||||
+ " --storage-type COPY_ON_WRITE "
|
||||
+ " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
|
||||
+ " --target-base-path " + COW_BASE_PATH + " --target-table " + COW_TABLE_NAME
|
||||
+ " --props /var/demo/config/dfs-source.properties "
|
||||
+ " --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider "
|
||||
+ String.format(HIVE_SYNC_CMD_FMT, "dt", COW_TABLE_NAME))
|
||||
.add("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer " + HUDI_UTILITIES_BUNDLE
|
||||
+ " --storage-type MERGE_ON_READ "
|
||||
+ " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
|
||||
+ " --target-base-path " + MOR_BASE_PATH + " --target-table " + MOR_TABLE_NAME
|
||||
+ " --props /var/demo/config/dfs-source.properties "
|
||||
+ " --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider "
|
||||
+ " --disable-compaction " + String.format(HIVE_SYNC_CMD_FMT, "dt", MOR_TABLE_NAME))
|
||||
.build();
|
||||
|
||||
executeCommandStringsInDocker(ADHOC_1_CONTAINER, cmds);
|
||||
@@ -128,32 +124,25 @@ public class ITTestHoodieDemo extends ITTestBase {
|
||||
assertStdOutContains(stdOutErrPair, "| stock_ticks_mor_rt |");
|
||||
|
||||
assertStdOutContains(stdOutErrPair,
|
||||
"| partition |\n"
|
||||
+ "+----------------+\n"
|
||||
+ "| dt=2018-08-31 |\n"
|
||||
+ "+----------------+\n", 3);
|
||||
"| partition |\n" + "+----------------+\n" + "| dt=2018-08-31 |\n" + "+----------------+\n", 3);
|
||||
|
||||
stdOutErrPair = executeHiveCommandFile(HIVE_BATCH1_COMMANDS);
|
||||
assertStdOutContains(stdOutErrPair, "| symbol | _c1 |\n"
|
||||
+ "+---------+----------------------+\n"
|
||||
assertStdOutContains(stdOutErrPair, "| symbol | _c1 |\n" + "+---------+----------------------+\n"
|
||||
+ "| GOOG | 2018-08-31 10:29:00 |\n", 3);
|
||||
assertStdOutContains(stdOutErrPair,
|
||||
"| symbol | ts | volume | open | close |\n"
|
||||
+ "+---------+----------------------+---------+------------+-----------+\n"
|
||||
+ "| GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 |\n"
|
||||
+ "| GOOG | 2018-08-31 10:29:00 | 3391 | 1230.1899 | 1230.085 |\n", 3);
|
||||
+ "+---------+----------------------+---------+------------+-----------+\n"
|
||||
+ "| GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 |\n"
|
||||
+ "| GOOG | 2018-08-31 10:29:00 | 3391 | 1230.1899 | 1230.085 |\n",
|
||||
3);
|
||||
}
|
||||
|
||||
private void testSparkSQLAfterFirstBatch() throws Exception {
|
||||
Pair<String, String> stdOutErrPair = executeSparkSQLCommand(SPARKSQL_BATCH1_COMMANDS, true);
|
||||
assertStdOutContains(stdOutErrPair, "|default |stock_ticks_cow |false |\n"
|
||||
+ "|default |stock_ticks_mor |false |\n" + "|default |stock_ticks_mor_rt |false |");
|
||||
assertStdOutContains(stdOutErrPair,
|
||||
"|default |stock_ticks_cow |false |\n"
|
||||
+ "|default |stock_ticks_mor |false |\n"
|
||||
+ "|default |stock_ticks_mor_rt |false |");
|
||||
assertStdOutContains(stdOutErrPair,
|
||||
"+------+-------------------+\n"
|
||||
+ "|GOOG |2018-08-31 10:29:00|\n"
|
||||
+ "+------+-------------------+", 3);
|
||||
"+------+-------------------+\n" + "|GOOG |2018-08-31 10:29:00|\n" + "+------+-------------------+", 3);
|
||||
assertStdOutContains(stdOutErrPair, "|GOOG |2018-08-31 09:59:00|6330 |1230.5 |1230.02 |", 3);
|
||||
assertStdOutContains(stdOutErrPair, "|GOOG |2018-08-31 10:29:00|3391 |1230.1899|1230.085|", 3);
|
||||
}
|
||||
@@ -161,34 +150,29 @@ public class ITTestHoodieDemo extends ITTestBase {
|
||||
private void ingestSecondBatchAndHiveSync() throws Exception {
|
||||
List<String> cmds = new ImmutableList.Builder<String>()
|
||||
.add("hdfs dfs -copyFromLocal -f " + INPUT_BATCH_PATH2 + " " + HDFS_BATCH_PATH2)
|
||||
.add("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer "
|
||||
+ HUDI_UTILITIES_BUNDLE + " --storage-type COPY_ON_WRITE "
|
||||
+ " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
|
||||
+ " --target-base-path " + COW_BASE_PATH + " --target-table " + COW_TABLE_NAME
|
||||
+ " --props /var/demo/config/dfs-source.properties "
|
||||
+ " --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider "
|
||||
+ String.format(HIVE_SYNC_CMD_FMT, "dt", COW_TABLE_NAME))
|
||||
.add("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer "
|
||||
+ HUDI_UTILITIES_BUNDLE + " --storage-type MERGE_ON_READ "
|
||||
+ " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
|
||||
+ " --target-base-path " + MOR_BASE_PATH + " --target-table " + MOR_TABLE_NAME
|
||||
+ " --props /var/demo/config/dfs-source.properties "
|
||||
+ " --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider "
|
||||
+ " --disable-compaction "
|
||||
+ String.format(HIVE_SYNC_CMD_FMT, "dt", MOR_TABLE_NAME))
|
||||
.add("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer " + HUDI_UTILITIES_BUNDLE
|
||||
+ " --storage-type COPY_ON_WRITE "
|
||||
+ " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
|
||||
+ " --target-base-path " + COW_BASE_PATH + " --target-table " + COW_TABLE_NAME
|
||||
+ " --props /var/demo/config/dfs-source.properties "
|
||||
+ " --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider "
|
||||
+ String.format(HIVE_SYNC_CMD_FMT, "dt", COW_TABLE_NAME))
|
||||
.add("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer " + HUDI_UTILITIES_BUNDLE
|
||||
+ " --storage-type MERGE_ON_READ "
|
||||
+ " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
|
||||
+ " --target-base-path " + MOR_BASE_PATH + " --target-table " + MOR_TABLE_NAME
|
||||
+ " --props /var/demo/config/dfs-source.properties "
|
||||
+ " --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider "
|
||||
+ " --disable-compaction " + String.format(HIVE_SYNC_CMD_FMT, "dt", MOR_TABLE_NAME))
|
||||
.build();
|
||||
executeCommandStringsInDocker(ADHOC_1_CONTAINER, cmds);
|
||||
}
|
||||
|
||||
private void testHiveAfterSecondBatch() throws Exception {
|
||||
Pair<String, String> stdOutErrPair = executeHiveCommandFile(HIVE_BATCH1_COMMANDS);
|
||||
assertStdOutContains(stdOutErrPair,
|
||||
"| symbol | _c1 |\n"
|
||||
+ "+---------+----------------------+\n"
|
||||
assertStdOutContains(stdOutErrPair, "| symbol | _c1 |\n" + "+---------+----------------------+\n"
|
||||
+ "| GOOG | 2018-08-31 10:29:00 |\n");
|
||||
assertStdOutContains(stdOutErrPair,
|
||||
"| symbol | _c1 |\n"
|
||||
+ "+---------+----------------------+\n"
|
||||
assertStdOutContains(stdOutErrPair, "| symbol | _c1 |\n" + "+---------+----------------------+\n"
|
||||
+ "| GOOG | 2018-08-31 10:59:00 |\n", 2);
|
||||
assertStdOutContains(stdOutErrPair,
|
||||
"| symbol | ts | volume | open | close |\n"
|
||||
@@ -197,75 +181,66 @@ public class ITTestHoodieDemo extends ITTestBase {
|
||||
+ "| GOOG | 2018-08-31 10:29:00 | 3391 | 1230.1899 | 1230.085 |\n");
|
||||
assertStdOutContains(stdOutErrPair,
|
||||
"| symbol | ts | volume | open | close |\n"
|
||||
+ "+---------+----------------------+---------+------------+-----------+\n"
|
||||
+ "| GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 |\n"
|
||||
+ "| GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 |\n", 2);
|
||||
+ "+---------+----------------------+---------+------------+-----------+\n"
|
||||
+ "| GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 |\n"
|
||||
+ "| GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 |\n",
|
||||
2);
|
||||
}
|
||||
|
||||
private void testHiveAfterSecondBatchAfterCompaction() throws Exception {
|
||||
Pair<String, String> stdOutErrPair = executeHiveCommandFile(HIVE_BATCH2_COMMANDS);
|
||||
assertStdOutContains(stdOutErrPair,
|
||||
"| symbol | _c1 |\n"
|
||||
+ "+---------+----------------------+\n"
|
||||
assertStdOutContains(stdOutErrPair, "| symbol | _c1 |\n" + "+---------+----------------------+\n"
|
||||
+ "| GOOG | 2018-08-31 10:59:00 |", 2);
|
||||
assertStdOutContains(stdOutErrPair, "| symbol | ts | volume | open | close |\n"
|
||||
+ "+---------+----------------------+---------+------------+-----------+\n"
|
||||
+ "| GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 |\n"
|
||||
+ "| GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 |", 2);
|
||||
assertStdOutContains(stdOutErrPair,
|
||||
"| symbol | ts | volume | open | close |\n"
|
||||
+ "+---------+----------------------+---------+------------+-----------+\n"
|
||||
+ "| GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 |\n"
|
||||
+ "| GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 |",
|
||||
2);
|
||||
}
|
||||
|
||||
private void testSparkSQLAfterSecondBatch() throws Exception {
|
||||
Pair<String, String> stdOutErrPair = executeSparkSQLCommand(SPARKSQL_BATCH2_COMMANDS, true);
|
||||
assertStdOutContains(stdOutErrPair,
|
||||
"+------+-------------------+\n"
|
||||
+ "|GOOG |2018-08-31 10:59:00|\n"
|
||||
+ "+------+-------------------+", 2);
|
||||
"+------+-------------------+\n" + "|GOOG |2018-08-31 10:59:00|\n" + "+------+-------------------+", 2);
|
||||
|
||||
assertStdOutContains(stdOutErrPair, "|GOOG |2018-08-31 09:59:00|6330 |1230.5 |1230.02 |", 3);
|
||||
assertStdOutContains(stdOutErrPair, "|GOOG |2018-08-31 10:59:00|9021 |1227.1993|1227.215|", 2);
|
||||
assertStdOutContains(stdOutErrPair,
|
||||
"+------+-------------------+\n"
|
||||
+ "|GOOG |2018-08-31 10:29:00|\n"
|
||||
+ "+------+-------------------+");
|
||||
"+------+-------------------+\n" + "|GOOG |2018-08-31 10:29:00|\n" + "+------+-------------------+");
|
||||
assertStdOutContains(stdOutErrPair, "|GOOG |2018-08-31 10:29:00|3391 |1230.1899|1230.085|");
|
||||
}
|
||||
|
||||
private void testIncrementalHiveQuery() throws Exception {
|
||||
String minCommitTime = executeCommandStringInDocker(ADHOC_2_CONTAINER, MIN_COMMIT_TIME_SCRIPT, true)
|
||||
.getStdout().toString();
|
||||
Pair<String, String> stdOutErrPair = executeHiveCommandFile(HIVE_INCREMENTAL_COMMANDS,
|
||||
"min.commit.time=" + minCommitTime +"`");
|
||||
String minCommitTime =
|
||||
executeCommandStringInDocker(ADHOC_2_CONTAINER, MIN_COMMIT_TIME_SCRIPT, true).getStdout().toString();
|
||||
Pair<String, String> stdOutErrPair =
|
||||
executeHiveCommandFile(HIVE_INCREMENTAL_COMMANDS, "min.commit.time=" + minCommitTime + "`");
|
||||
assertStdOutContains(stdOutErrPair, "| GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 |");
|
||||
}
|
||||
|
||||
private void testIncrementalHiveQueryAfterCompaction() throws Exception {
|
||||
String minCommitTime = executeCommandStringInDocker(ADHOC_2_CONTAINER, MIN_COMMIT_TIME_SCRIPT, true)
|
||||
.getStdout().toString();
|
||||
Pair<String, String> stdOutErrPair = executeHiveCommandFile(HIVE_INCREMENTAL_COMMANDS,
|
||||
"min.commit.time=" + minCommitTime +"`");
|
||||
String minCommitTime =
|
||||
executeCommandStringInDocker(ADHOC_2_CONTAINER, MIN_COMMIT_TIME_SCRIPT, true).getStdout().toString();
|
||||
Pair<String, String> stdOutErrPair =
|
||||
executeHiveCommandFile(HIVE_INCREMENTAL_COMMANDS, "min.commit.time=" + minCommitTime + "`");
|
||||
assertStdOutContains(stdOutErrPair,
|
||||
"| symbol | ts | volume | open | close |\n"
|
||||
+ "+---------+----------------------+---------+------------+-----------+\n"
|
||||
+ "| GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 |");
|
||||
+ "+---------+----------------------+---------+------------+-----------+\n"
|
||||
+ "| GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 |");
|
||||
}
|
||||
|
||||
private void testIncrementalSparkSQLQuery() throws Exception {
|
||||
Pair<String, String> stdOutErrPair = executeSparkSQLCommand(SPARKSQL_INCREMENTAL_COMMANDS, true);
|
||||
assertStdOutContains(stdOutErrPair, "|GOOG |2018-08-31 10:59:00|9021 |1227.1993|1227.215|");
|
||||
assertStdOutContains(stdOutErrPair,
|
||||
"|default |stock_ticks_cow |false |\n"
|
||||
+ "|default |stock_ticks_derived_mor |false |\n"
|
||||
+ "|default |stock_ticks_derived_mor_rt|false |\n"
|
||||
+ "|default |stock_ticks_mor |false |\n"
|
||||
+ "|default |stock_ticks_mor_rt |false |\n"
|
||||
assertStdOutContains(stdOutErrPair, "|default |stock_ticks_cow |false |\n"
|
||||
+ "|default |stock_ticks_derived_mor |false |\n" + "|default |stock_ticks_derived_mor_rt|false |\n"
|
||||
+ "|default |stock_ticks_mor |false |\n" + "|default |stock_ticks_mor_rt |false |\n"
|
||||
+ "| |stock_ticks_cow_incr |true |");
|
||||
assertStdOutContains(stdOutErrPair,
|
||||
"|count(1)|\n"
|
||||
+ "+--------+\n"
|
||||
+ "|99 |", 2);
|
||||
assertStdOutContains(stdOutErrPair, "|count(1)|\n" + "+--------+\n" + "|99 |", 2);
|
||||
}
|
||||
|
||||
private void scheduleAndRunCompaction() throws Exception {
|
||||
executeCommandStringInDocker(ADHOC_1_CONTAINER, HUDI_CLI_TOOL + " --cmdfile " + COMPACTION_COMMANDS, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,16 +28,14 @@ import org.junit.Test;
|
||||
public class ITTestHoodieSanity extends ITTestBase {
|
||||
|
||||
enum PartitionType {
|
||||
SINGLE_KEY_PARTITIONED,
|
||||
MULTI_KEYS_PARTITIONED,
|
||||
NON_PARTITIONED,
|
||||
SINGLE_KEY_PARTITIONED, MULTI_KEYS_PARTITIONED, NON_PARTITIONED,
|
||||
}
|
||||
|
||||
@Test
|
||||
/**
|
||||
* A basic integration test that runs HoodieJavaApp to create a sample COW Hoodie with single partition key
|
||||
* data-set and performs upserts on it. Hive integration and upsert functionality is checked by running a count
|
||||
* query in hive console.
|
||||
* A basic integration test that runs HoodieJavaApp to create a sample COW Hoodie with single partition key data-set
|
||||
* and performs upserts on it. Hive integration and upsert functionality is checked by running a count query in hive
|
||||
* console.
|
||||
*/
|
||||
public void testRunHoodieJavaAppOnSinglePartitionKeyCOWTable() throws Exception {
|
||||
String hiveTableName = "docker_hoodie_single_partition_key_cow_test";
|
||||
@@ -48,8 +46,8 @@ public class ITTestHoodieSanity extends ITTestBase {
|
||||
@Test
|
||||
/**
|
||||
* A basic integration test that runs HoodieJavaApp to create a sample COW Hoodie with multiple partition-keys
|
||||
* data-set and performs upserts on it. Hive integration and upsert functionality is checked by running a count
|
||||
* query in hive console.
|
||||
* data-set and performs upserts on it. Hive integration and upsert functionality is checked by running a count query
|
||||
* in hive console.
|
||||
*/
|
||||
public void testRunHoodieJavaAppOnMultiPartitionKeysCOWTable() throws Exception {
|
||||
String hiveTableName = "docker_hoodie_multi_partition_key_cow_test";
|
||||
@@ -59,9 +57,9 @@ public class ITTestHoodieSanity extends ITTestBase {
|
||||
|
||||
@Test
|
||||
/**
|
||||
* A basic integration test that runs HoodieJavaApp to create a sample non-partitioned COW Hoodie
|
||||
* data-set and performs upserts on it. Hive integration and upsert functionality is checked by running a count
|
||||
* query in hive console.
|
||||
* A basic integration test that runs HoodieJavaApp to create a sample non-partitioned COW Hoodie data-set and
|
||||
* performs upserts on it. Hive integration and upsert functionality is checked by running a count query in hive
|
||||
* console.
|
||||
*/
|
||||
public void testRunHoodieJavaAppOnNonPartitionedCOWTable() throws Exception {
|
||||
String hiveTableName = "docker_hoodie_non_partition_key_cow_test";
|
||||
@@ -70,10 +68,9 @@ public class ITTestHoodieSanity extends ITTestBase {
|
||||
}
|
||||
|
||||
/**
|
||||
* A basic integration test that runs HoodieJavaApp to create a sample COW Hoodie
|
||||
* data-set and performs upserts on it. Hive integration and upsert functionality is checked by running a count
|
||||
* query in hive console.
|
||||
* TODO: Add spark-shell test-case
|
||||
* A basic integration test that runs HoodieJavaApp to create a sample COW Hoodie data-set and performs upserts on it.
|
||||
* Hive integration and upsert functionality is checked by running a count query in hive console. TODO: Add
|
||||
* spark-shell test-case
|
||||
*/
|
||||
public void testRunHoodieJavaAppOnCOWTable(String hiveTableName, PartitionType partitionType) throws Exception {
|
||||
|
||||
@@ -98,16 +95,14 @@ public class ITTestHoodieSanity extends ITTestBase {
|
||||
// Run Hoodie Java App
|
||||
String cmd;
|
||||
if (partitionType == PartitionType.SINGLE_KEY_PARTITIONED) {
|
||||
cmd = HOODIE_JAVA_APP + " --hive-sync --table-path " + hdfsUrl
|
||||
+ " --hive-url " + HIVE_SERVER_JDBC_URL + " --hive-table " + hiveTableName;
|
||||
cmd = HOODIE_JAVA_APP + " --hive-sync --table-path " + hdfsUrl + " --hive-url " + HIVE_SERVER_JDBC_URL
|
||||
+ " --hive-table " + hiveTableName;
|
||||
} else if (partitionType == PartitionType.MULTI_KEYS_PARTITIONED) {
|
||||
cmd = HOODIE_JAVA_APP + " --hive-sync --table-path " + hdfsUrl
|
||||
+ " --hive-url " + HIVE_SERVER_JDBC_URL + " --hive-table " + hiveTableName
|
||||
+ " --use-multi-partition-keys";
|
||||
cmd = HOODIE_JAVA_APP + " --hive-sync --table-path " + hdfsUrl + " --hive-url " + HIVE_SERVER_JDBC_URL
|
||||
+ " --hive-table " + hiveTableName + " --use-multi-partition-keys";
|
||||
} else {
|
||||
cmd = HOODIE_JAVA_APP + " --hive-sync --table-path " + hdfsUrl
|
||||
+ " --hive-url " + HIVE_SERVER_JDBC_URL + " --hive-table " + hiveTableName
|
||||
+ " --non-partitioned";
|
||||
cmd = HOODIE_JAVA_APP + " --hive-sync --table-path " + hdfsUrl + " --hive-url " + HIVE_SERVER_JDBC_URL
|
||||
+ " --hive-table " + hiveTableName + " --non-partitioned";
|
||||
}
|
||||
executeCommandStringInDocker(ADHOC_1_CONTAINER, cmd, true);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user