1
0

Adding HiveSyncTool to sync hoodie dataset schema/partitions to Hive

- Designed to be run by your workflow manager after hoodie upsert
 - Assumes jdbc connectivity via HiveServer2, which should work with all major distros
This commit is contained in:
Vinoth Chandar
2017-04-03 14:44:43 -07:00
committed by vinoth chandar
parent 2b6322318c
commit 542d622e49
11 changed files with 313 additions and 114 deletions

View File

@@ -24,12 +24,17 @@
<modelVersion>4.0.0</modelVersion>
<artifactId>hoodie-hive</artifactId>
<packaging>jar</packaging>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
@@ -63,6 +68,7 @@
<artifactId>libthrift</artifactId>
<version>0.9.2</version>
</dependency>
<!-- Apache commons -->
<dependency>
<groupId>commons-dbcp</groupId>
@@ -79,6 +85,11 @@
<artifactId>slf4j-log4j12</artifactId>
</dependency>
<dependency>
<groupId>com.beust</groupId>
<artifactId>jcommander</artifactId>
</dependency>
<!-- Hadoop Testing -->
<dependency>
<groupId>junit</groupId>
@@ -136,7 +147,54 @@
<groupId>org.apache.rat</groupId>
<artifactId>apache-rat-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.4.1</version>
<configuration>
<descriptors>
<descriptor>src/assembly/src.xml</descriptor>
</descriptors>
<archive>
<manifest>
<mainClass>com.uber.hoodie.hive.example.HoodieHiveSyncExample</mainClass>
</manifest>
</archive>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<!-- bind to the packaging phase -->
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<version>2.4</version>
<executions>
<execution>
<id>copy-dependencies</id>
<phase>package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/jars</outputDirectory>
<overWriteReleases>false</overWriteReleases>
<overWriteSnapshots>false</overWriteSnapshots>
<overWriteIfNewer>true</overWriteIfNewer>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>