CodeStyle formatting to conform to basic Checkstyle rules.
The code-style rules follow google style with some changes: 1. Increase line length from 100 to 120 2. Disable JavaDoc related checkstyles as this needs more manual work. Both source and test code are checked for code-style
This commit is contained in:
committed by
vinoth chandar
parent
987f5d6b96
commit
788e4f2d2e
@@ -1,12 +1,12 @@
|
||||
/*
|
||||
* Copyright (c) 2017 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@@ -16,7 +16,6 @@
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
import com.beust.jcommander.JCommander;
|
||||
import com.beust.jcommander.Parameter;
|
||||
import com.uber.hoodie.DataSourceReadOptions;
|
||||
@@ -68,10 +67,9 @@ public class HoodieJavaApp {
|
||||
public void run() throws Exception {
|
||||
|
||||
// Spark session setup..
|
||||
SparkSession spark = SparkSession.builder()
|
||||
.appName("Hoodie Spark APP")
|
||||
.config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
|
||||
.master("local[1]")
|
||||
SparkSession spark = SparkSession.builder().appName("Hoodie Spark APP")
|
||||
.config("spark.serializer",
|
||||
"org.apache.spark.serializer.KryoSerializer").master("local[1]")
|
||||
.getOrCreate();
|
||||
JavaSparkContext jssc = new JavaSparkContext(spark.sparkContext());
|
||||
FileSystem fs = FileSystem.get(jssc.hadoopConfiguration());
|
||||
@@ -83,13 +81,12 @@ public class HoodieJavaApp {
|
||||
* Commit with only inserts
|
||||
*/
|
||||
// Generate some input..
|
||||
List<String> records1 = DataSourceTestUtils
|
||||
.convertToStringList(dataGen.generateInserts("001"/* ignore */, 100));
|
||||
List<String> records1 = DataSourceTestUtils.convertToStringList(
|
||||
dataGen.generateInserts("001"/* ignore */, 100));
|
||||
Dataset<Row> inputDF1 = spark.read().json(jssc.parallelize(records1, 2));
|
||||
|
||||
// Save as hoodie dataset (copy on write)
|
||||
inputDF1.write()
|
||||
.format("com.uber.hoodie") // specify the hoodie source
|
||||
inputDF1.write().format("com.uber.hoodie") // specify the hoodie source
|
||||
.option("hoodie.insert.shuffle.parallelism",
|
||||
"2") // any hoodie client config can be passed like this
|
||||
.option("hoodie.upsert.shuffle.parallelism",
|
||||
@@ -104,7 +101,8 @@ public class HoodieJavaApp {
|
||||
"timestamp") // use to combine duplicate records in input/with disk val
|
||||
.option(HoodieWriteConfig.TABLE_NAME, tableName) // Used by hive sync and queries
|
||||
.mode(
|
||||
SaveMode.Overwrite) // This will remove any existing data at path below, and create a new dataset if needed
|
||||
SaveMode.Overwrite) // This will remove any existing data at path below, and create a
|
||||
// new dataset if needed
|
||||
.save(tablePath); // ultimately where the dataset will be placed
|
||||
String commitInstantTime1 = HoodieDataSourceHelpers.latestCommit(fs, tablePath);
|
||||
logger.info("First commit at instant time :" + commitInstantTime1);
|
||||
@@ -112,28 +110,24 @@ public class HoodieJavaApp {
|
||||
/**
|
||||
* Commit that updates records
|
||||
*/
|
||||
List<String> records2 = DataSourceTestUtils
|
||||
.convertToStringList(dataGen.generateUpdates("002"/* ignore */, 100));
|
||||
List<String> records2 = DataSourceTestUtils.convertToStringList(
|
||||
dataGen.generateUpdates("002"/* ignore */, 100));
|
||||
Dataset<Row> inputDF2 = spark.read().json(jssc.parallelize(records2, 2));
|
||||
inputDF2.write()
|
||||
.format("com.uber.hoodie")
|
||||
.option("hoodie.insert.shuffle.parallelism", "2")
|
||||
inputDF2.write().format("com.uber.hoodie").option("hoodie.insert.shuffle.parallelism", "2")
|
||||
.option("hoodie.upsert.shuffle.parallelism", "2")
|
||||
.option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "_row_key")
|
||||
.option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "partition")
|
||||
.option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY(), "timestamp")
|
||||
.option(HoodieWriteConfig.TABLE_NAME, tableName)
|
||||
.mode(SaveMode.Append)
|
||||
.save(tablePath);
|
||||
.option(HoodieWriteConfig.TABLE_NAME, tableName).mode(SaveMode.Append).save(tablePath);
|
||||
String commitInstantTime2 = HoodieDataSourceHelpers.latestCommit(fs, tablePath);
|
||||
logger.info("Second commit at instant time :" + commitInstantTime1);
|
||||
|
||||
/**
|
||||
* Read & do some queries
|
||||
*/
|
||||
Dataset<Row> hoodieROViewDF = spark.read()
|
||||
.format("com.uber.hoodie")
|
||||
// pass any path glob, can include hoodie & non-hoodie datasets
|
||||
Dataset<Row> hoodieROViewDF = spark.read().format("com.uber.hoodie")
|
||||
// pass any path glob, can include hoodie & non-hoodie
|
||||
// datasets
|
||||
.load(tablePath + "/*/*/*/*");
|
||||
hoodieROViewDF.registerTempTable("hoodie_ro");
|
||||
spark.sql("describe hoodie_ro").show();
|
||||
@@ -149,7 +143,8 @@ public class HoodieJavaApp {
|
||||
DataSourceReadOptions.VIEW_TYPE_INCREMENTAL_OPT_VAL())
|
||||
.option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY(),
|
||||
commitInstantTime1) // Only changes in write 2 above
|
||||
.load(tablePath); // For incremental view, pass in the root/base path of dataset
|
||||
.load(
|
||||
tablePath); // For incremental view, pass in the root/base path of dataset
|
||||
|
||||
logger.info("You will only see records from : " + commitInstantTime2);
|
||||
hoodieIncViewDF.groupBy(hoodieIncViewDF.col("_hoodie_commit_time")).count().show();
|
||||
|
||||
Reference in New Issue
Block a user