[HUDI-476]: Add hudi-examples module (#1151)
add hoodie delta streamer mock source example and dfs source and kafka source examples Signed-off-by: dengziming <dengziming1993@gmail.com> add defaultSparkConf utils method change version of hudi-examples to 0.5.2-SNAPSHOT change the artifcatId of hudi-spark and hudi-utilities alter some code to adapt kafka2.0 Update scritps Add license
This commit is contained in:
35
hudi-examples/bin/custom-delta-streamer-example.sh
Executable file
35
hudi-examples/bin/custom-delta-streamer-example.sh
Executable file
@@ -0,0 +1,35 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# Simple examples of HoodieDeltaStreamer which read data from a mock HoodieExampleDataGenerator,
|
||||
# this is an example for developers to define your own custom data source.
|
||||
|
||||
BASE_PATH=$(cd `dirname $0`; pwd)
|
||||
|
||||
${BASE_PATH}/hudi-delta-streamer \
|
||||
--hoodie-conf hoodie.datasource.write.recordkey.field=uuid \
|
||||
--hoodie-conf hoodie.datasource.write.partitionpath.field=driver \
|
||||
--target-base-path /tmp/hoodie/deltastreamertable \
|
||||
--table-type MERGE_ON_READ \
|
||||
--target-table deltastreamertable \
|
||||
--source-ordering-field ts \
|
||||
--source-class org.apache.hudi.examples.common.RandomJsonSource \
|
||||
--schemaprovider-class org.apache.hudi.examples.common.ExampleDataSchemaProvider \
|
||||
--transformer-class org.apache.hudi.examples.common.IdentityTransformer \
|
||||
--continuous
|
||||
35
hudi-examples/bin/dfs-delta-streamer-example.sh
Executable file
35
hudi-examples/bin/dfs-delta-streamer-example.sh
Executable file
@@ -0,0 +1,35 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# Simple examples of HoodieDeltaStreamer which read data from JsonDFSSource,
|
||||
# which will read data from a dfs directory for once, then write data to a hudi table which could be queried.
|
||||
|
||||
BASE_PATH=$(cd `dirname $0`; pwd)
|
||||
|
||||
${BASE_PATH}/hudi-delta-streamer \
|
||||
--hoodie-conf hoodie.datasource.write.recordkey.field=uuid \
|
||||
--hoodie-conf hoodie.datasource.write.partitionpath.field=driver \
|
||||
--hoodie-conf hoodie.deltastreamer.source.dfs.root=hudi-examples/src/main/resources/delta-streamer-config/dfs \
|
||||
--target-base-path /tmp/hoodie/deltastreamertable \
|
||||
--table-type MERGE_ON_READ \
|
||||
--target-table deltastreamertable \
|
||||
--source-ordering-field ts \
|
||||
--source-class org.apache.hudi.utilities.sources.JsonDFSSource \
|
||||
--schemaprovider-class org.apache.hudi.examples.common.ExampleDataSchemaProvider \
|
||||
--transformer-class org.apache.hudi.examples.common.IdentityTransformer
|
||||
39
hudi-examples/bin/hudi-delta-streamer
Executable file
39
hudi-examples/bin/hudi-delta-streamer
Executable file
@@ -0,0 +1,39 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
EXAMPLES_DIR="$(dirname $(dirname "${BASH_SOURCE[0]}"))"
|
||||
PROJECT_DIR="$(dirname ${EXAMPLES_DIR})"
|
||||
|
||||
JAR_FILE=`ls ${PROJECT_DIR}/packaging/hudi-utilities-bundle/target/hudi-utilities-bundle_*.jar | grep -v sources | grep -v tests.jar`
|
||||
EXAMPLES_JARS=`ls ${PROJECT_DIR}/hudi-examples/target/hudi-examples-*.jar | grep -v sources | grep -v tests.jar`
|
||||
|
||||
if [ -z "${SPARK_MASTER}" ]; then
|
||||
SPARK_MASTER="yarn-cluster"
|
||||
fi
|
||||
|
||||
exec "${SPARK_HOME}"/bin/spark-submit \
|
||||
--master ${SPARK_MASTER} \
|
||||
--conf spark.serializer="org.apache.spark.serializer.KryoSerializer" \
|
||||
--conf spark.kryoserializer.buffer.max=128m \
|
||||
--conf spark.yarn.queue=root.default \
|
||||
--conf spark.yarn.submit.waitAppCompletion=false \
|
||||
--packages org.apache.spark:spark-avro_2.11:2.4.4 \
|
||||
--jars ${EXAMPLES_JARS} \
|
||||
--class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer \
|
||||
"${JAR_FILE}" \
|
||||
$@
|
||||
36
hudi-examples/bin/kafka-delta-streamer-example.sh
Executable file
36
hudi-examples/bin/kafka-delta-streamer-example.sh
Executable file
@@ -0,0 +1,36 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# Simple examples of HoodieDeltaStreamer which read data from kafka,
|
||||
# create the source topic using: kafka-topics.sh --create --zookeeper zk:2181 --replication-factor 3 --partitions 1 --topic hoodie-source-topic
|
||||
# insert data using: kafka-console-producer.sh --broker-list localhost:9092 --topic hoodie-source-topic
|
||||
# start the delta-streamer
|
||||
|
||||
BASE_PATH=$(cd `dirname $0`; pwd)
|
||||
|
||||
${BASE_PATH}/hudi-delta-streamer \
|
||||
--props hudi-examples/src/main/resources/delta-streamer-config/kafka/kafka-source.properties \
|
||||
--target-base-path /tmp/hoodie/deltastreamertable \
|
||||
--table-type MERGE_ON_READ \
|
||||
--target-table deltastreamertable \
|
||||
--source-ordering-field ts \
|
||||
--source-class org.apache.hudi.utilities.sources.JsonKafkaSource \
|
||||
--schemaprovider-class org.apache.hudi.examples.common.ExampleDataSchemaProvider \
|
||||
--transformer-class org.apache.hudi.examples.common.IdentityTransformer \
|
||||
--continuous
|
||||
Reference in New Issue
Block a user