1
0

[HUDI-68] Pom cleanup & demo automation (#846)

- [HUDI-172] Cleanup Maven POM/Classpath
  - Fix ordering of dependencies in poms, to enable better resolution
  - Idea is to place more specific ones at the top
  - And place dependencies which use them below them
- [HUDI-68] : Automate demo steps on docker setup
 - Move hive queries from hive cli to beeline
 - Standardize on taking query input from text command files
 - Deltastreamer ingest, also does hive sync in a single step
 - Spark Incremental Query materialized as a derived Hive table using datasource
 - Fix flakiness in HDFS spin up and output comparison
 - Code cleanup around streamlining and loc reduction
 - Also fixed pom to not shade some hive classs in spark, to enable hive sync
This commit is contained in:
vinoth chandar
2019-08-22 20:18:50 -07:00
committed by GitHub
parent 92eed6aca8
commit 6edf0b9def
34 changed files with 2382 additions and 1681 deletions

View File

@@ -217,12 +217,23 @@ public class CompactionCommand implements CommandMarker {
final String sparkMemory,
@CliOption(key = "retry", unspecifiedDefaultValue = "1", help = "Number of retries")
final String retry,
@CliOption(key = "compactionInstant", mandatory = true, help = "Base path for the target hoodie dataset")
final String compactionInstantTime) throws Exception {
@CliOption(key = "compactionInstant", mandatory = false, help = "Base path for the target hoodie dataset")
String compactionInstantTime) throws Exception {
boolean initialized = HoodieCLI.initConf();
HoodieCLI.initFS(initialized);
if (HoodieCLI.tableMetadata.getTableType() == HoodieTableType.MERGE_ON_READ) {
if (null == compactionInstantTime) {
// pick outstanding one with lowest timestamp
Option<String> firstPendingInstant = HoodieCLI.tableMetadata.reloadActiveTimeline()
.filterCompletedAndCompactionInstants().filter(instant -> instant.getAction()
.equals(HoodieTimeline.COMPACTION_ACTION)).firstInstant().map(HoodieInstant::getTimestamp);
if (!firstPendingInstant.isPresent()) {
return "NO PENDING COMPACTION TO RUN";
}
compactionInstantTime = firstPendingInstant.get();
}
String sparkPropertiesPath = Utils.getDefaultPropertiesFile(
scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);