1
0

[HUDI-476]: Add hudi-examples module (#1151)

add hoodie delta streamer mock source example and dfs source and kafka source examples

Signed-off-by: dengziming <dengziming1993@gmail.com>

add defaultSparkConf utils method

change version of hudi-examples to 0.5.2-SNAPSHOT
change the artifcatId of hudi-spark and hudi-utilities
alter some code to adapt kafka2.0

Update scritps

Add license
This commit is contained in:
dengziming
2020-05-28 01:44:39 +08:00
committed by GitHub
parent 03f136361a
commit bde7a7043e
18 changed files with 1113 additions and 144 deletions

View File

@@ -0,0 +1,35 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Simple examples of HoodieDeltaStreamer which read data from a mock HoodieExampleDataGenerator,
# this is an example for developers to define your own custom data source.
BASE_PATH=$(cd `dirname $0`; pwd)
${BASE_PATH}/hudi-delta-streamer \
--hoodie-conf hoodie.datasource.write.recordkey.field=uuid \
--hoodie-conf hoodie.datasource.write.partitionpath.field=driver \
--target-base-path /tmp/hoodie/deltastreamertable \
--table-type MERGE_ON_READ \
--target-table deltastreamertable \
--source-ordering-field ts \
--source-class org.apache.hudi.examples.common.RandomJsonSource \
--schemaprovider-class org.apache.hudi.examples.common.ExampleDataSchemaProvider \
--transformer-class org.apache.hudi.examples.common.IdentityTransformer \
--continuous

View File

@@ -0,0 +1,35 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Simple examples of HoodieDeltaStreamer which read data from JsonDFSSource,
# which will read data from a dfs directory for once, then write data to a hudi table which could be queried.
BASE_PATH=$(cd `dirname $0`; pwd)
${BASE_PATH}/hudi-delta-streamer \
--hoodie-conf hoodie.datasource.write.recordkey.field=uuid \
--hoodie-conf hoodie.datasource.write.partitionpath.field=driver \
--hoodie-conf hoodie.deltastreamer.source.dfs.root=hudi-examples/src/main/resources/delta-streamer-config/dfs \
--target-base-path /tmp/hoodie/deltastreamertable \
--table-type MERGE_ON_READ \
--target-table deltastreamertable \
--source-ordering-field ts \
--source-class org.apache.hudi.utilities.sources.JsonDFSSource \
--schemaprovider-class org.apache.hudi.examples.common.ExampleDataSchemaProvider \
--transformer-class org.apache.hudi.examples.common.IdentityTransformer

View File

@@ -0,0 +1,39 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
EXAMPLES_DIR="$(dirname $(dirname "${BASH_SOURCE[0]}"))"
PROJECT_DIR="$(dirname ${EXAMPLES_DIR})"
JAR_FILE=`ls ${PROJECT_DIR}/packaging/hudi-utilities-bundle/target/hudi-utilities-bundle_*.jar | grep -v sources | grep -v tests.jar`
EXAMPLES_JARS=`ls ${PROJECT_DIR}/hudi-examples/target/hudi-examples-*.jar | grep -v sources | grep -v tests.jar`
if [ -z "${SPARK_MASTER}" ]; then
SPARK_MASTER="yarn-cluster"
fi
exec "${SPARK_HOME}"/bin/spark-submit \
--master ${SPARK_MASTER} \
--conf spark.serializer="org.apache.spark.serializer.KryoSerializer" \
--conf spark.kryoserializer.buffer.max=128m \
--conf spark.yarn.queue=root.default \
--conf spark.yarn.submit.waitAppCompletion=false \
--packages org.apache.spark:spark-avro_2.11:2.4.4 \
--jars ${EXAMPLES_JARS} \
--class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer \
"${JAR_FILE}" \
$@

View File

@@ -0,0 +1,36 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Simple examples of HoodieDeltaStreamer which read data from kafka,
# create the source topic using: kafka-topics.sh --create --zookeeper zk:2181 --replication-factor 3 --partitions 1 --topic hoodie-source-topic
# insert data using: kafka-console-producer.sh --broker-list localhost:9092 --topic hoodie-source-topic
# start the delta-streamer
BASE_PATH=$(cd `dirname $0`; pwd)
${BASE_PATH}/hudi-delta-streamer \
--props hudi-examples/src/main/resources/delta-streamer-config/kafka/kafka-source.properties \
--target-base-path /tmp/hoodie/deltastreamertable \
--table-type MERGE_ON_READ \
--target-table deltastreamertable \
--source-ordering-field ts \
--source-class org.apache.hudi.utilities.sources.JsonKafkaSource \
--schemaprovider-class org.apache.hudi.examples.common.ExampleDataSchemaProvider \
--transformer-class org.apache.hudi.examples.common.IdentityTransformer \
--continuous