# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
dag_name: spark-medium-clustering.yaml
dag_rounds: 20
dag_intermittent_delay_mins: 0
dag_content:
  first_insert:
    config:
      record_size: 200
      num_partitions_insert: 50
      repeat_count: 1
      num_records_insert: 10000
    type: SparkInsertNode
    deps: none
  first_validate:
    config:
      validate_hive: false
    type: ValidateDatasetNode
    deps: first_insert
  first_upsert:
    config:
      record_size: 200
      num_partitions_insert: 50
      num_records_insert: 300
      repeat_count: 1
      num_records_upsert: 3000
      num_partitions_upsert: 50
    type: SparkUpsertNode
    deps: first_validate
  first_delete:
    config:
      num_partitions_delete: 50
      num_records_delete: 8000
    type: SparkDeleteNode
    deps: first_upsert
  second_validate:
    config:
      validate_hive: false
      delete_input_data: true
    type: ValidateDatasetNode
    deps: first_delete
  last_validate:
    config:
      execute_itr_count: 20
    type: ValidateAsyncOperations
    deps: second_validate