Adding hoodie-spark to support Spark Datasource for Hoodie

- Write with COW/MOR paths work fully - Read with RO view works on both storages* - Incremental view supported on COW - Refactored out HoodieReadClient methods, to just contain key based access - HoodieDataSourceHelpers class can be now used to construct inputs to datasource - Tests in hoodie-client using new helpers and mechanisms - Basic tests around save modes & insert/upserts (more to follow) - Bumped up scala to 2.11, since 2.10 is deprecated & complains with scalatest - Updated documentation to describe usage - New sample app written using the DataSource API
2017-08-28 01:28:08 -07:00
parent c98ee057fc
commit 64e0573aca
44 changed files with 1830 additions and 331 deletions
--- a/hoodie-utilities/src/test/java/HoodieSparkSQLExample.java
+++ b/hoodie-utilities/src/test/java/HoodieSparkSQLExample.java
@@ -1,43 +0,0 @@
-/*
- *  Copyright (c) 2017 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *           http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- *
- */
-
-
-import org.apache.spark.sql.SparkSession;
-
-/**
- * Examples to do Spark SQL on Hoodie dataset.
- */
-public class HoodieSparkSQLExample {
-
-    public static void main(String[] args) throws Exception {
-
-        SparkSession spark = SparkSession.builder()
-                .appName("Hoodie SparkSQL")
-                .config("hive.metastore.uris","thrift://localhost:10000")
-                .config("spark.sql.hive.convertMetastoreParquet", false)
-                .enableHiveSupport()
-                .master("local[2]")
-                .getOrCreate();
-
-        spark.sql("describe hoodie_rt").show();
-        spark.sql("select * from hoodie_rt").show();
-        spark.sql("select end_lon as e1, driver, rider as r1, datestr, driver, datestr, rider, _hoodie_record_key from hoodie_rt").show();
-        spark.sql("select fare, begin_lon, begin_lat, timestamp from hoodie_rt where fare > 2.0").show();
-        spark.sql("select count(*) as cnt, _hoodie_file_name as file from hoodie_rt group by _hoodie_file_name").show();
-    }
-}