hudi/hudi-integ-test/src/test/resources/unit-test-mor-dag.yaml

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
dag_name: unit-test-mor-dag
dag_rounds: 1
dag_intermittent_delay_mins: 10
dag_content:
  first_insert:
    config:
      record_size: 70000
      num_partitions_insert: 1
      repeat_count: 2
      num_records_insert: 100
    type: InsertNode
    deps: none
  second_insert:
    config:
      record_size: 70000
      num_partitions_insert: 1
      repeat_count: 1
      num_records_insert: 100
    type: InsertNode
    deps: first_insert
  first_rollback:
    config:
    deps: second_insert
    type: RollbackNode
  third_insert:
    config:
      record_size: 70000
      num_partitions_insert: 1
      repeat_count: 1
      num_records_insert: 100
    type: InsertNode
    deps: first_rollback
  first_upsert:
    config:
      record_size: 70000
      num_partitions_upsert: 1
      repeat_count: 1
      num_records_upsert: 100
    type: UpsertNode
    deps: third_insert
  first_hive_sync:
    config:
      queue_name: "adhoc"
      engine: "mr"
    type: HiveSyncNode
    deps: first_upsert
  first_hive_query:
    config:
      hive_props:
        prop2: "set spark.yarn.queue="
        prop3: "set hive.strict.checks.large.query=false"
        prop4: "set hive.stats.autogather=false"
      hive_queries:
        query1: "select count(*) from testdb1.table1"
        result1: 300
        query2: "select count(*) from testdb1.table1 group   by `_row_key` having count(*) > 1"
        result2: 0
    type: HiveQueryNode
    deps: first_hive_sync