From cf1543185217c8acd306218a7d00d1022dd35584 Mon Sep 17 00:00:00 2001 From: Sagar Sumit Date: Fri, 10 Sep 2021 18:32:01 +0530 Subject: [PATCH] [HUDI-2393] Add yamls for large scale testing (#3594) --- .../cow-large-scale-long-running.yaml | 71 ++++++++++++++++++ .../large-scale/cow-large-scale-sanity.yaml | 71 ++++++++++++++++++ .../mor-large-scale-long-running.yaml | 75 +++++++++++++++++++ .../large-scale/mor-large-scale-sanity.yaml | 75 +++++++++++++++++++ 4 files changed, 292 insertions(+) create mode 100644 docker/demo/config/test-suite/large-scale/cow-large-scale-long-running.yaml create mode 100644 docker/demo/config/test-suite/large-scale/cow-large-scale-sanity.yaml create mode 100644 docker/demo/config/test-suite/large-scale/mor-large-scale-long-running.yaml create mode 100644 docker/demo/config/test-suite/large-scale/mor-large-scale-sanity.yaml diff --git a/docker/demo/config/test-suite/large-scale/cow-large-scale-long-running.yaml b/docker/demo/config/test-suite/large-scale/cow-large-scale-long-running.yaml new file mode 100644 index 000000000..9231407c9 --- /dev/null +++ b/docker/demo/config/test-suite/large-scale/cow-large-scale-long-running.yaml @@ -0,0 +1,71 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Sanity yaml to test simple operations. +dag_name: cow-large-scale-long-running.yaml +dag_rounds: 50 +dag_intermittent_delay_mins: 1 +dag_content: + first_insert: + config: + record_size: 1000 + num_partitions_insert: 10 + repeat_count: 1 + num_records_insert: 1000000 # this will generate about 1.5 GB data + type: InsertNode + deps: none + second_insert: + config: + record_size: 1000 + num_partitions_insert: 10 + repeat_count: 1 + num_records_insert: 100000 + deps: first_insert + type: InsertNode + third_insert: + config: + record_size: 1000 + num_partitions_insert: 5 + repeat_count: 1 + num_records_insert: 300000 + deps: second_insert + type: InsertNode + first_validate: + config: + validate_hive: false + type: ValidateDatasetNode + deps: third_insert + first_upsert: + config: + record_size: 1000 + num_partitions_insert: 2 + num_records_insert: 3000 + repeat_count: 1 + num_records_upsert: 100000 + num_partitions_upsert: 10 + type: UpsertNode + deps: first_validate + first_delete: + config: + num_partitions_delete: 5 + num_records_delete: 8000 + type: DeleteNode + deps: first_upsert + second_validate: + config: + validate_hive: false + type: ValidateDatasetNode + deps: first_delete diff --git a/docker/demo/config/test-suite/large-scale/cow-large-scale-sanity.yaml b/docker/demo/config/test-suite/large-scale/cow-large-scale-sanity.yaml new file mode 100644 index 000000000..813c7671a --- /dev/null +++ b/docker/demo/config/test-suite/large-scale/cow-large-scale-sanity.yaml @@ -0,0 +1,71 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Sanity yaml to test simple operations. +dag_name: cow-large-scale-sanity.yaml +dag_rounds: 1 +dag_intermittent_delay_mins: 1 +dag_content: + first_insert: + config: + record_size: 1000 + num_partitions_insert: 100 + repeat_count: 1 + num_records_insert: 3000000 # this will generate about 60GB data + type: InsertNode + deps: none + second_insert: + config: + record_size: 1000 + num_partitions_insert: 50 + repeat_count: 1 + num_records_insert: 1000000 + deps: first_insert + type: InsertNode + third_insert: + config: + record_size: 1000 + num_partitions_insert: 5 + repeat_count: 1 + num_records_insert: 300000 + deps: second_insert + type: InsertNode + first_validate: + config: + validate_hive: false + type: ValidateDatasetNode + deps: third_insert + first_upsert: + config: + record_size: 1000 + num_partitions_insert: 2 + num_records_insert: 3000 + repeat_count: 1 + num_records_upsert: 100000 + num_partitions_upsert: 20 + type: UpsertNode + deps: first_validate + first_delete: + config: + num_partitions_delete: 5 + num_records_delete: 8000 + type: DeleteNode + deps: first_upsert + second_validate: + config: + validate_hive: false + type: ValidateDatasetNode + deps: first_delete diff --git a/docker/demo/config/test-suite/large-scale/mor-large-scale-long-running.yaml b/docker/demo/config/test-suite/large-scale/mor-large-scale-long-running.yaml new file mode 100644 index 000000000..f37b206eb --- /dev/null +++ b/docker/demo/config/test-suite/large-scale/mor-large-scale-long-running.yaml @@ -0,0 +1,75 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Sanity yaml to test simple operations. +dag_name: mor-large-scale-long-running.yaml +dag_rounds: 50 +dag_intermittent_delay_mins: 1 +dag_content: + first_insert: + config: + record_size: 1000 + num_partitions_insert: 10 + repeat_count: 1 + num_records_insert: 1000000 # this will generate about 1.5 GB data + type: InsertNode + deps: none + second_insert: + config: + record_size: 1000 + num_partitions_insert: 10 + repeat_count: 1 + num_records_insert: 100000 + deps: first_insert + type: InsertNode + third_insert: + config: + record_size: 1000 + num_partitions_insert: 5 + repeat_count: 1 + num_records_insert: 300000 + deps: second_insert + type: InsertNode + first_validate: + config: + validate_hive: false + type: ValidateDatasetNode + deps: third_insert + first_upsert: + config: + record_size: 1000 + num_partitions_insert: 2 + num_records_insert: 3000 + repeat_count: 1 + num_records_upsert: 100000 + num_partitions_upsert: 10 + type: UpsertNode + deps: first_validate + first_schedule_compact: + config: + type: ScheduleCompactNode + deps: first_upsert + first_delete: + config: + num_partitions_delete: 5 + num_records_delete: 8000 + type: DeleteNode + deps: first_schedule_compact + second_validate: + config: + validate_hive: false + type: ValidateDatasetNode + deps: first_delete diff --git a/docker/demo/config/test-suite/large-scale/mor-large-scale-sanity.yaml b/docker/demo/config/test-suite/large-scale/mor-large-scale-sanity.yaml new file mode 100644 index 000000000..1137b1388 --- /dev/null +++ b/docker/demo/config/test-suite/large-scale/mor-large-scale-sanity.yaml @@ -0,0 +1,75 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Sanity yaml to test simple operations. +dag_name: mor-large-scale-sanity.yaml +dag_rounds: 1 +dag_intermittent_delay_mins: 1 +dag_content: + first_insert: + config: + record_size: 1000 + num_partitions_insert: 100 + repeat_count: 1 + num_records_insert: 3000000 # this will generate about 60GB data + type: InsertNode + deps: none + second_insert: + config: + record_size: 1000 + num_partitions_insert: 50 + repeat_count: 1 + num_records_insert: 1000000 + deps: first_insert + type: InsertNode + third_insert: + config: + record_size: 1000 + num_partitions_insert: 5 + repeat_count: 1 + num_records_insert: 300000 + deps: second_insert + type: InsertNode + first_validate: + config: + validate_hive: false + type: ValidateDatasetNode + deps: third_insert + first_upsert: + config: + record_size: 1000 + num_partitions_insert: 2 + num_records_insert: 3000 + repeat_count: 1 + num_records_upsert: 100000 + num_partitions_upsert: 20 + type: UpsertNode + deps: first_validate + first_schedule_compact: + config: + type: ScheduleCompactNode + deps: first_upsert + first_delete: + config: + num_partitions_delete: 5 + num_records_delete: 8000 + type: DeleteNode + deps: first_schedule_compact + second_validate: + config: + validate_hive: false + type: ValidateDatasetNode + deps: first_delete