[HUDI-68] Pom cleanup & demo automation (#846)
- [HUDI-172] Cleanup Maven POM/Classpath - Fix ordering of dependencies in poms, to enable better resolution - Idea is to place more specific ones at the top - And place dependencies which use them below them - [HUDI-68] : Automate demo steps on docker setup - Move hive queries from hive cli to beeline - Standardize on taking query input from text command files - Deltastreamer ingest, also does hive sync in a single step - Spark Incremental Query materialized as a derived Hive table using datasource - Fix flakiness in HDFS spin up and output comparison - Code cleanup around streamlining and loc reduction - Also fixed pom to not shade some hive classs in spark, to enable hive sync
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||
HUDI_JAR=`ls $DIR/target/hudi-cli-*-SNAPSHOT.jar | grep -v source | grep -v javadoc`
|
||||
HOODIE_JAR=`ls $DIR/target/hudi-cli-*-SNAPSHOT.jar | grep -v source | grep -v javadoc`
|
||||
if [ -z "$HADOOP_CONF_DIR" ]; then
|
||||
echo "setting hadoop conf dir"
|
||||
HADOOP_CONF_DIR="/etc/hadoop/conf"
|
||||
@@ -13,5 +13,4 @@ fi
|
||||
if [ -z "$CLIENT_JAR" ]; then
|
||||
echo "client jar location not set"
|
||||
fi
|
||||
echo "java -cp ${HADOOP_CONF_DIR}:${SPARK_CONF_DIR}:$DIR/target/lib/*:$HUDI_JAR:${CLIENT_JAR} -DSPARK_CONF_DIR=${SPARK_CONF_DIR} -DHADOOP_CONF_DIR=${HADOOP_CONF_DIR} org.springframework.shell.Bootstrap"
|
||||
java -cp ${HADOOP_CONF_DIR}:${SPARK_CONF_DIR}:$DIR/target/lib/*:$HUDI_JAR:${CLIENT_JAR} -DSPARK_CONF_DIR=${SPARK_CONF_DIR} -DHADOOP_CONF_DIR=${HADOOP_CONF_DIR} org.springframework.shell.Bootstrap
|
||||
java -cp ${HADOOP_CONF_DIR}:${SPARK_CONF_DIR}:$DIR/target/lib/*:$HOODIE_JAR:${CLIENT_JAR} -DSPARK_CONF_DIR=${SPARK_CONF_DIR} -DHADOOP_CONF_DIR=${HADOOP_CONF_DIR} org.springframework.shell.Bootstrap $@
|
||||
|
||||
@@ -29,8 +29,6 @@
|
||||
<properties>
|
||||
<spring.shell.version>1.2.0.RELEASE</spring.shell.version>
|
||||
<jar.mainclass>org.springframework.shell.Bootstrap</jar.mainclass>
|
||||
<log4j.version>1.2.17</log4j.version>
|
||||
<junit.version>4.10</junit.version>
|
||||
<notice.dir>${project.basedir}/src/main/resources/META-INF</notice.dir>
|
||||
</properties>
|
||||
|
||||
@@ -61,7 +59,7 @@
|
||||
<plugin>
|
||||
<groupId>net.alchim31.maven</groupId>
|
||||
<artifactId>scala-maven-plugin</artifactId>
|
||||
<version>3.2.1</version>
|
||||
<version>${scala-maven-plugin.version}</version>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</pluginManagement>
|
||||
@@ -133,23 +131,42 @@
|
||||
|
||||
|
||||
<dependencies>
|
||||
|
||||
<!-- Scala -->
|
||||
<dependency>
|
||||
<groupId>org.scala-lang</groupId>
|
||||
<artifactId>scala-library</artifactId>
|
||||
<version>${scala.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Hoodie -->
|
||||
<dependency>
|
||||
<groupId>org.springframework.shell</groupId>
|
||||
<artifactId>spring-shell</artifactId>
|
||||
<version>${spring.shell.version}</version>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-client</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>de.vandermeer</groupId>
|
||||
<artifactId>asciitable</artifactId>
|
||||
<version>0.2.5</version>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-common</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-hive</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-utilities</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Logging -->
|
||||
<dependency>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Spark -->
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.11</artifactId>
|
||||
@@ -159,6 +176,24 @@
|
||||
<artifactId>spark-sql_2.11</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Apache Commons -->
|
||||
<dependency>
|
||||
<groupId>commons-dbcp</groupId>
|
||||
<artifactId>commons-dbcp</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.shell</groupId>
|
||||
<artifactId>spring-shell</artifactId>
|
||||
<version>${spring.shell.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>de.vandermeer</groupId>
|
||||
<artifactId>asciitable</artifactId>
|
||||
<version>0.2.5</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.jakewharton.fliptables</groupId>
|
||||
<artifactId>fliptables</artifactId>
|
||||
@@ -166,60 +201,25 @@
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
<version>${log4j.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-hive</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-client</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<groupId>joda-time</groupId>
|
||||
<artifactId>joda-time</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Hadoop -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-hdfs</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-common</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit-dep</artifactId>
|
||||
<version>${junit.version}</version>
|
||||
<version>${junit-dep.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>commons-dbcp</groupId>
|
||||
<artifactId>commons-dbcp</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>joda-time</groupId>
|
||||
<artifactId>joda-time</artifactId>
|
||||
<version>2.9.6</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-utilities</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
|
||||
</project>
|
||||
|
||||
@@ -217,12 +217,23 @@ public class CompactionCommand implements CommandMarker {
|
||||
final String sparkMemory,
|
||||
@CliOption(key = "retry", unspecifiedDefaultValue = "1", help = "Number of retries")
|
||||
final String retry,
|
||||
@CliOption(key = "compactionInstant", mandatory = true, help = "Base path for the target hoodie dataset")
|
||||
final String compactionInstantTime) throws Exception {
|
||||
@CliOption(key = "compactionInstant", mandatory = false, help = "Base path for the target hoodie dataset")
|
||||
String compactionInstantTime) throws Exception {
|
||||
boolean initialized = HoodieCLI.initConf();
|
||||
HoodieCLI.initFS(initialized);
|
||||
|
||||
if (HoodieCLI.tableMetadata.getTableType() == HoodieTableType.MERGE_ON_READ) {
|
||||
if (null == compactionInstantTime) {
|
||||
// pick outstanding one with lowest timestamp
|
||||
Option<String> firstPendingInstant = HoodieCLI.tableMetadata.reloadActiveTimeline()
|
||||
.filterCompletedAndCompactionInstants().filter(instant -> instant.getAction()
|
||||
.equals(HoodieTimeline.COMPACTION_ACTION)).firstInstant().map(HoodieInstant::getTimestamp);
|
||||
if (!firstPendingInstant.isPresent()) {
|
||||
return "NO PENDING COMPACTION TO RUN";
|
||||
}
|
||||
compactionInstantTime = firstPendingInstant.get();
|
||||
}
|
||||
|
||||
String sparkPropertiesPath = Utils.getDefaultPropertiesFile(
|
||||
scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
|
||||
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
|
||||
|
||||
Reference in New Issue
Block a user