Update docs

2017-04-26 14:03:06 -07:00
parent da17c5c607
commit b4e787ce1d
2 changed files with 76 additions and 9 deletions
--- a/docs/quickstart.md
+++ b/docs/quickstart.md
@@ -24,16 +24,31 @@ $ mvn clean install -DskipTests

 ## Generate a Hoodie Dataset

-Create the output folder on your local HDFS
-```
-hdfs dfs -mkdir -p /tmp/hoodie/sample-table
+
+You can run the __hoodie-client/src/test/java/HoodieClientExample.java__ class, to place a two commits (commit 1 => 100 inserts, commit 2 => 100 updates to previously inserted 100 records) onto your HDFS/local filesystem
 ```

-You can run the __HoodieClientExample__ class, to place a two commits (commit 1 => 100 inserts, commit 2 => 100 updates to previously inserted 100 records) onto your HDFS at /tmp/hoodie/sample-table
-```
-hdfs dfs -copyFromLocal /tmp/hoodie/sample-table/* /tmp/hoodie/sample-table
+Usage: <main class> [options]
+  Options:
+    --help, -h
+
+       Default: false
+    --table-name, -n
+       table name for Hoodie sample table
+       Default: hoodie_rt
+    --table-path, -p
+       path for Hoodie sample table
+       Default: file:///tmp/hoodie/sample-table
+    --table-type, -t
+       One of COPY_ON_WRITE or MERGE_ON_READ
+       Default: COPY_ON_WRITE
+
+
 ```

+The class lets you choose table names, output paths and one of the storage types.
+
+
 ## Register Dataset to Hive Metastore

 Now, lets see how we can publish this data into Hive.
@@ -68,6 +83,10 @@ java -cp target/hoodie-hive-0.3.1-SNAPSHOT-jar-with-dependencies.jar:target/jars

 ```

+{% include callout.html content="Hive sync tools does not yet support Merge-On-Read tables." type="info" %}
+
+
+
 #### Manually via Beeline
 Add in the hoodie-hadoop-mr jar so, Hive can read the Hoodie dataset and answer the query.

@@ -77,7 +96,7 @@ Added [file:///tmp/hoodie-hadoop-mr-0.2.7.jar] to class path
 Added resources: [file:///tmp/hoodie-hadoop-mr-0.2.7.jar]
 ```

-Then, you need to create a ReadOptimized table as below (only type supported as of now)and register the sample partitions
+Then, you need to create a __ReadOptimized__ Hive table as below (only type supported as of now)and register the sample partitions


 ```
@@ -109,6 +128,43 @@ ALTER TABLE `hoodie_test` ADD IF NOT EXISTS PARTITION (datestr='2015-03-17') LOC
 set mapreduce.framework.name=yarn;
 ```

+And you can generate a __Realtime__ Hive table, as below
+
+```
+DROP TABLE hoodie_rt;
+CREATE EXTERNAL TABLE hoodie_rt(
+`_hoodie_commit_time` string,
+`_hoodie_commit_seqno` string,
+`_hoodie_record_key` string,
+`_hoodie_partition_path` string,
+`_hoodie_file_name` string,
+ timestamp double,
+ `_row_key` string,
+ rider string,
+ driver string,
+ begin_lat double,
+ begin_lon double,
+ end_lat double,
+ end_lon double,
+ fare double)
+PARTITIONED BY (`datestr` string)
+ROW FORMAT SERDE
+   'com.uber.hoodie.hadoop.realtime.HoodieParquetSerde'
+STORED AS INPUTFORMAT
+   'com.uber.hoodie.hadoop.realtime.HoodieRealtimeInputFormat'
+OUTPUTFORMAT
+   'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
+LOCATION
+   'file:///tmp/hoodie/sample-table';
+
+ALTER TABLE `hoodie_rt` ADD IF NOT EXISTS PARTITION (datestr='2016-03-15') LOCATION 'file:///tmp/hoodie/sample-table/2016/03/15';
+ALTER TABLE `hoodie_rt` ADD IF NOT EXISTS PARTITION (datestr='2015-03-16') LOCATION 'file:///tmp/hoodie/sample-table/2015/03/16';
+ALTER TABLE `hoodie_rt` ADD IF NOT EXISTS PARTITION (datestr='2015-03-17') LOCATION 'file:///tmp/hoodie/sample-table/2015/03/17';
+
+```
+
+
+
 ## Querying The Dataset

 Now, we can proceed to query the dataset, as we would normally do across all the three query engines supported.
@@ -138,15 +194,17 @@ $ spark-shell --jars /tmp/hoodie-hadoop-mr-0.2.7.jar --driver-class-path $HADOOP
 scala> val sqlContext = new org.apache.spark.sql.SQLContext(sc)
 scala> sqlContext.sql("show tables").show(10000)
 scala> sqlContext.sql("describe hoodie_test").show(10000)
+scala> sqlContext.sql("describe hoodie_rt").show(10000)
 scala> sqlContext.sql("select count(*) from hoodie_test").show(10000)
 ```

+You can also use the sample queries in __hoodie-utilities/src/test/java/HoodieSparkSQLExample.java__ for running on `hoodie_rt`

 ### Presto

 Checkout the 'master' branch on OSS Presto, build it, and place your installation somewhere.

-* Copy the hoodie-hadoop-mr-0.2.7 jar into $PRESTO_INSTALL/plugin/hive-hadoop2/
+* Copy the hoodie-hadoop-mr-* jar into $PRESTO_INSTALL/plugin/hive-hadoop2/
 * Startup your server and you should be able to query the same Hive table via Presto

 ```
@@ -183,6 +241,7 @@ hive>
 ```


+{% include note.html content="This is only supported for Read-optimized tables for now." %}



--- a/hoodie-client/src/test/java/HoodieClientExample.java
+++ b/hoodie-client/src/test/java/HoodieClientExample.java
@@ -52,11 +52,19 @@ public class HoodieClientExample {
    @Parameter(names={"--table-type", "-t"}, description = "One of COPY_ON_WRITE or MERGE_ON_READ")
    private String tableType =  HoodieTableType.COPY_ON_WRITE.name();

+    @Parameter(names = {"--help", "-h"}, help = true)
+    public Boolean help = false;
+
    private static Logger logger = LogManager.getLogger(HoodieClientExample.class);

    public static void main(String[] args) throws Exception {
        HoodieClientExample cli = new HoodieClientExample();
-        new JCommander(cli, args);
+        JCommander cmd = new JCommander(cli, args);
+
+        if (cli.help || args.length == 0) {
+            cmd.usage();
+            System.exit(1);
+        }
        cli.run();
    }