[HUDI-2394] Implement Kafka Sink Protocol for Hudi for Ingesting Immutable Data (#3592)
- Fixing packaging, naming of classes - Use of log4j over slf4j for uniformity - More follow-on fixes - Added a version to control/coordinator events. - Eliminated the config added to write config - Fixed fetching of checkpoints based on table type - Clean up of naming, code placement Co-authored-by: Rajesh Mahindra <rmahindra@Rajeshs-MacBook-Pro.local> Co-authored-by: Vinoth Chandar <vinoth@apache.org>
This commit is contained in:
5
hudi-kafka-connect/scripts/raw.json
Normal file
5
hudi-kafka-connect/scripts/raw.json
Normal file
@@ -0,0 +1,5 @@
|
||||
{"volume": 0, "symbol": "TPNL", "ts": "2017-08-31 09:30:00", "month": "08", "high": 6.37, "low": 1.37, "key": "TPNL_2017-08-31 09", "year": 2017, "date": "2017/08/31", "close": 4.44, "open": 1.37, "day": "31"}
|
||||
{"volume": 0, "symbol": "SPOT", "ts": "2018-08-31 09:30:00", "month": "08", "high": 1.87, "low": 0.37, "key": "TPNL_2018-08-31 09", "year": 2018, "date": "2018/08/31", "close": 1.44, "open": 1.77, "day": "31"}
|
||||
{"volume": 0, "symbol": "GOOG", "ts": "2019-08-31 09:30:00", "month": "08", "high": 2.1, "low": 1.7, "key": "TPNL_2019-08-31 09", "year": 2019, "date": "2019/08/31", "close": 1.94, "open": 2.0, "day": "31"}
|
||||
{"volume": 0, "symbol": "MSFT", "ts": "2020-08-31 09:30:00", "month": "08", "high": 3.33, "low": 0.87, "key": "TPNL_2020-08-31 09", "year": 2020, "date": "2020/08/31", "close": 3.33, "open": 3.1, "day": "31"}
|
||||
{"volume": 0, "symbol": "APPL", "ts": "2021-08-31 09:30:00", "month": "08", "high": 3.17, "low": 2.37, "key": "TPNL_2021-08-31 09", "year": 2021, "date": "2021/08/31", "close": 2.66, "open": 3.1, "day": "31"}
|
||||
38
hudi-kafka-connect/scripts/runKafkaTrafficGenerator.sh
Normal file
38
hudi-kafka-connect/scripts/runKafkaTrafficGenerator.sh
Normal file
@@ -0,0 +1,38 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
#!/bin/bash
|
||||
|
||||
# First delete the existing topic
|
||||
$KAFKA_HOME/bin/kafka-topics.sh --delete --topic hudi-test-topic --bootstrap-server localhost:9092
|
||||
|
||||
# Create the topic with 4 partitions
|
||||
$KAFKA_HOME/bin/kafka-topics.sh --create --topic hudi-test-topic --partitions 4 --replication-factor 1 --bootstrap-server localhost:9092
|
||||
|
||||
# Generate kafka messages from raw records
|
||||
inputFile="raw.json"
|
||||
# Generate the records with unique keys
|
||||
for ((recordKey=0; recordKey<=$1; ))
|
||||
do
|
||||
while IFS= read line
|
||||
do
|
||||
echo $line | jq --argjson recordKey $recordKey -c '.volume = $recordKey' | kcat -P -b localhost:9092 -t hudi-test-topic
|
||||
((recordKey++))
|
||||
if [ $(( $recordKey % 1000 )) -eq 0 ]
|
||||
then sleep 1
|
||||
fi
|
||||
done < "$inputFile"
|
||||
done
|
||||
Reference in New Issue
Block a user