From 30cf39301e311e26b29a65b3e4a229da92f925d1 Mon Sep 17 00:00:00 2001 From: Sivabalan Narayanan Date: Mon, 14 Mar 2022 15:39:26 -0700 Subject: [PATCH] [HUDI-3623] Removing hive sync node from non hive yamls (#5040) --- .../test-suite/cow-spark-long-running.yaml | 16 ++-------------- .../config/test-suite/cow-spark-simple.yaml | 16 ++-------------- ...long-running-multi-partitions-metadata.yaml | 8 +------- ...streamer-long-running-multi-partitions.yaml | 8 +------- ...treamer-medium-full-dataset-validation.yaml | 8 +------- .../detlastreamer-long-running-example.yaml | 18 ++---------------- .../test-suite/insert-overwrite-table.yaml | 16 ++-------------- .../config/test-suite/insert-overwrite.yaml | 16 ++-------------- .../config/test-suite/simple-clustering.yaml | 16 ++-------------- .../test-suite/simple-deltastreamer.yaml | 16 ++-------------- .../config/test-suite/spark-clustering.yaml | 8 +------- 11 files changed, 18 insertions(+), 128 deletions(-) diff --git a/docker/demo/config/test-suite/cow-spark-long-running.yaml b/docker/demo/config/test-suite/cow-spark-long-running.yaml index 8229ba749..c25b95c8d 100644 --- a/docker/demo/config/test-suite/cow-spark-long-running.yaml +++ b/docker/demo/config/test-suite/cow-spark-long-running.yaml @@ -25,17 +25,11 @@ dag_content: num_records_insert: 10000 type: SparkInsertNode deps: none - first_hive_sync: - config: - queue_name: "adhoc" - engine: "mr" - type: HiveSyncNode - deps: first_insert first_validate: config: validate_hive: false type: ValidateDatasetNode - deps: first_hive_sync + deps: first_insert first_upsert: config: record_size: 200 @@ -52,18 +46,12 @@ dag_content: num_records_delete: 8000 type: SparkDeleteNode deps: first_upsert - second_hive_sync: - config: - queue_name: "adhoc" - engine: "mr" - type: HiveSyncNode - deps: first_delete second_validate: config: validate_hive: false delete_input_data: true type: ValidateDatasetNode - deps: second_hive_sync + deps: first_delete last_validate: config: execute_itr_count: 30 diff --git a/docker/demo/config/test-suite/cow-spark-simple.yaml b/docker/demo/config/test-suite/cow-spark-simple.yaml index 0859c6320..192adcf37 100644 --- a/docker/demo/config/test-suite/cow-spark-simple.yaml +++ b/docker/demo/config/test-suite/cow-spark-simple.yaml @@ -25,17 +25,11 @@ dag_content: num_records_insert: 100 type: SparkInsertNode deps: none - first_hive_sync: - config: - queue_name: "adhoc" - engine: "mr" - type: HiveSyncNode - deps: first_insert first_validate: config: validate_hive: false type: ValidateDatasetNode - deps: first_hive_sync + deps: first_insert first_upsert: config: record_size: 1000 @@ -52,15 +46,9 @@ dag_content: num_records_delete: 30 type: SparkDeleteNode deps: first_upsert - second_hive_sync: - config: - queue_name: "adhoc" - engine: "mr" - type: HiveSyncNode - deps: first_delete second_validate: config: validate_hive: false delete_input_data: false type: ValidateDatasetNode - deps: second_hive_sync \ No newline at end of file + deps: first_delete \ No newline at end of file diff --git a/docker/demo/config/test-suite/deltastreamer-long-running-multi-partitions-metadata.yaml b/docker/demo/config/test-suite/deltastreamer-long-running-multi-partitions-metadata.yaml index 817552224..0212fdf43 100644 --- a/docker/demo/config/test-suite/deltastreamer-long-running-multi-partitions-metadata.yaml +++ b/docker/demo/config/test-suite/deltastreamer-long-running-multi-partitions-metadata.yaml @@ -57,18 +57,12 @@ dag_content: num_records_delete: 8000 type: DeleteNode deps: first_upsert - second_hive_sync: - config: - queue_name: "adhoc" - engine: "mr" - type: HiveSyncNode - deps: first_delete second_validate: config: validate_hive: false delete_input_data: true type: ValidateDatasetNode - deps: second_hive_sync + deps: first_delete last_validate: config: execute_itr_count: 30 diff --git a/docker/demo/config/test-suite/deltastreamer-long-running-multi-partitions.yaml b/docker/demo/config/test-suite/deltastreamer-long-running-multi-partitions.yaml index ba490a8c1..d7b111947 100644 --- a/docker/demo/config/test-suite/deltastreamer-long-running-multi-partitions.yaml +++ b/docker/demo/config/test-suite/deltastreamer-long-running-multi-partitions.yaml @@ -57,18 +57,12 @@ dag_content: num_records_delete: 8000 type: DeleteNode deps: first_upsert - second_hive_sync: - config: - queue_name: "adhoc" - engine: "mr" - type: HiveSyncNode - deps: first_delete second_validate: config: validate_hive: false delete_input_data: true type: ValidateDatasetNode - deps: second_hive_sync + deps: first_delete last_validate: config: execute_itr_count: 50 diff --git a/docker/demo/config/test-suite/deltastreamer-medium-full-dataset-validation.yaml b/docker/demo/config/test-suite/deltastreamer-medium-full-dataset-validation.yaml index aa83b2780..77898640e 100644 --- a/docker/demo/config/test-suite/deltastreamer-medium-full-dataset-validation.yaml +++ b/docker/demo/config/test-suite/deltastreamer-medium-full-dataset-validation.yaml @@ -60,18 +60,12 @@ dag_content: num_records_delete: 8000 type: DeleteNode deps: first_upsert - second_hive_sync: - config: - queue_name: "adhoc" - engine: "mr" - type: HiveSyncNode - deps: first_delete second_validate: config: validate_hive: false delete_input_data: false type: ValidateDatasetNode - deps: second_hive_sync + deps: first_delete last_validate: config: execute_itr_count: 20 diff --git a/docker/demo/config/test-suite/detlastreamer-long-running-example.yaml b/docker/demo/config/test-suite/detlastreamer-long-running-example.yaml index 28578eb9b..4b2ee7ad1 100644 --- a/docker/demo/config/test-suite/detlastreamer-long-running-example.yaml +++ b/docker/demo/config/test-suite/detlastreamer-long-running-example.yaml @@ -41,17 +41,11 @@ dag_content: num_records_insert: 300 deps: second_insert type: InsertNode - first_hive_sync: - config: - queue_name: "adhoc" - engine: "mr" - type: HiveSyncNode - deps: third_insert first_validate: config: validate_hive: false type: ValidateDatasetNode - deps: first_hive_sync + deps: third_insert first_upsert: config: record_size: 1000 @@ -68,22 +62,14 @@ dag_content: num_records_delete: 8000 type: DeleteNode deps: first_upsert - second_hive_sync: - config: - queue_name: "adhoc" - engine: "mr" - type: HiveSyncNode - deps: first_delete second_validate: config: validate_hive: false delete_input_data: true type: ValidateDatasetNode - deps: second_hive_sync + deps: first_delete last_validate: config: execute_itr_count: 50 - validate_clean: true - validate_archival: true type: ValidateAsyncOperations deps: second_validate diff --git a/docker/demo/config/test-suite/insert-overwrite-table.yaml b/docker/demo/config/test-suite/insert-overwrite-table.yaml index 8b5a26e46..1a58abdcc 100644 --- a/docker/demo/config/test-suite/insert-overwrite-table.yaml +++ b/docker/demo/config/test-suite/insert-overwrite-table.yaml @@ -53,19 +53,13 @@ dag_content: num_partitions_upsert: 10 type: SparkUpsertNode deps: second_insert - first_hive_sync: - config: - queue_name: "adhoc" - engine: "mr" - type: HiveSyncNode - deps: second_upsert first_insert_overwrite_table: config: record_size: 1000 repeat_count: 10 num_records_insert: 10 type: SparkInsertOverwriteTableNode - deps: first_hive_sync + deps: second_upsert delete_all_input_except_last: config: delete_input_data_except_latest: true @@ -89,16 +83,10 @@ dag_content: num_partitions_upsert: 10 type: SparkUpsertNode deps: third_insert - second_hive_sync: - config: - queue_name: "adhoc" - engine: "mr" - type: HiveSyncNode - deps: third_upsert second_validate: config: validate_full_data : true validate_hive: false delete_input_data: false type: ValidateDatasetNode - deps: second_hive_sync + deps: third_upsert diff --git a/docker/demo/config/test-suite/insert-overwrite.yaml b/docker/demo/config/test-suite/insert-overwrite.yaml index f2299c50c..dc185d593 100644 --- a/docker/demo/config/test-suite/insert-overwrite.yaml +++ b/docker/demo/config/test-suite/insert-overwrite.yaml @@ -54,12 +54,6 @@ dag_content: num_partitions_upsert: 10 type: SparkUpsertNode deps: second_insert - first_hive_sync: - config: - queue_name: "adhoc" - engine: "mr" - type: HiveSyncNode - deps: second_upsert first_insert_overwrite: config: record_size: 1000 @@ -67,7 +61,7 @@ dag_content: repeat_count: 1 num_records_insert: 10 type: SparkInsertOverwriteNode - deps: first_hive_sync + deps: second_upsert delete_all_input_except_last: config: delete_input_data_except_latest: true @@ -91,16 +85,10 @@ dag_content: num_partitions_upsert: 10 type: SparkUpsertNode deps: third_insert - second_hive_sync: - config: - queue_name: "adhoc" - engine: "mr" - type: HiveSyncNode - deps: third_upsert second_validate: config: validate_full_data : true validate_hive: false delete_input_data: false type: ValidateDatasetNode - deps: second_hive_sync + deps: third_upsert diff --git a/docker/demo/config/test-suite/simple-clustering.yaml b/docker/demo/config/test-suite/simple-clustering.yaml index 7389ee3eb..01849bb64 100644 --- a/docker/demo/config/test-suite/simple-clustering.yaml +++ b/docker/demo/config/test-suite/simple-clustering.yaml @@ -47,30 +47,18 @@ dag_content: num_records_delete: 9000 type: DeleteNode deps: third_insert - first_hive_sync: - config: - queue_name: "adhoc" - engine: "mr" - type: HiveSyncNode - deps: first_delete first_validate: config: validate_hive: false type: ValidateDatasetNode - deps: first_hive_sync + deps: first_delete first_cluster: config: execute_itr_count: 25 type: ClusteringNode deps: first_validate - second_hive_sync: - config: - queue_name: "adhoc" - engine: "mr" - type: HiveSyncNode - deps: first_cluster second_validate: config: validate_hive: false type: ValidateDatasetNode - deps: second_hive_sync + deps: first_cluster diff --git a/docker/demo/config/test-suite/simple-deltastreamer.yaml b/docker/demo/config/test-suite/simple-deltastreamer.yaml index f49a41baf..11b7f17d3 100644 --- a/docker/demo/config/test-suite/simple-deltastreamer.yaml +++ b/docker/demo/config/test-suite/simple-deltastreamer.yaml @@ -41,17 +41,11 @@ dag_content: num_records_insert: 300 deps: second_insert type: InsertNode - first_hive_sync: - config: - queue_name: "adhoc" - engine: "mr" - type: HiveSyncNode - deps: third_insert first_validate: config: validate_hive: false type: ValidateDatasetNode - deps: first_hive_sync + deps: third_insert first_upsert: config: record_size: 1000 @@ -68,15 +62,9 @@ dag_content: num_records_delete: 2000 type: DeleteNode deps: first_upsert - second_hive_sync: - config: - queue_name: "adhoc" - engine: "mr" - type: HiveSyncNode - deps: first_delete second_validate: config: validate_hive: false delete_input_data: true type: ValidateDatasetNode - deps: second_hive_sync + deps: first_delete diff --git a/docker/demo/config/test-suite/spark-clustering.yaml b/docker/demo/config/test-suite/spark-clustering.yaml index e8e722ca7..8da4f9539 100644 --- a/docker/demo/config/test-suite/spark-clustering.yaml +++ b/docker/demo/config/test-suite/spark-clustering.yaml @@ -59,15 +59,9 @@ dag_content: num_records_delete: 16000 type: SparkDeleteNode deps: second_upsert - second_hive_sync: - config: - queue_name: "adhoc" - engine: "mr" - type: HiveSyncNode - deps: first_delete second_validate: config: validate_hive: false delete_input_data: false type: ValidateDatasetNode - deps: second_hive_sync \ No newline at end of file + deps: first_delete \ No newline at end of file