[MINOR] Moving spark scheduling configs out of DataSourceOptions (#4843)

2022-02-20 13:49:18 -05:00
parent 83279971a1
commit 66ac1446dd
5 changed files with 59 additions and 35 deletions
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala
@@ -479,32 +479,6 @@ object DataSourceWriteOptions {
      + "Use this when you are in the process of migrating from "
      + "com.uber.hoodie to org.apache.hudi. Stop using this after you migrated the table definition to org.apache.hudi input format")

-  // spark data source write pool name. Incase of streaming sink, users might be interested to set custom scheduling configs
-  // for regular writes and async compaction. In such cases, this pool name will be used for spark datasource writes.
-  val SPARK_DATASOURCE_WRITER_POOL_NAME = "sparkdatasourcewrite"
-
-  /*
-  When async compaction is enabled (deltastreamer or streaming sink), users might be interested to set custom
-  scheduling configs for regular writes and async compaction. This is the property used to set custom scheduler config
-  file with spark. In Deltastreamer, the file is generated within hudi and set if necessary. Where as in case of streaming
-  sink, users have to set this property when they invoke spark shell.
-  Sample format of the file contents.
-  <?xml version="1.0"?>
-  <allocations>
-    <pool name="sparkdatasourcewrite">
-      <schedulingMode>FAIR</schedulingMode>
-      <weight>4</weight>
-      <minShare>2</minShare>
-    </pool>
-    <pool name="hoodiecompact">
-      <schedulingMode>FAIR</schedulingMode>
-      <weight>3</weight>
-      <minShare>1</minShare>
-    </pool>
-  </allocations>
-   */
-  val SPARK_SCHEDULER_ALLOCATION_FILE_KEY = "spark.scheduler.allocation.file"
-
  /** @deprecated Use {@link HIVE_SYNC_MODE} instead of this config from 0.9.0 */
  @Deprecated
  val HIVE_USE_JDBC: ConfigProperty[String] = ConfigProperty
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
@@ -124,8 +124,8 @@ object HoodieSparkSqlWriter {

    val jsc = new JavaSparkContext(sparkContext)
    if (asyncCompactionTriggerFn.isDefined) {
-      if (jsc.getConf.getOption(DataSourceWriteOptions.SPARK_SCHEDULER_ALLOCATION_FILE_KEY).isDefined) {
-        jsc.setLocalProperty("spark.scheduler.pool", DataSourceWriteOptions.SPARK_DATASOURCE_WRITER_POOL_NAME)
+      if (jsc.getConf.getOption(SparkConfigs.SPARK_SCHEDULER_ALLOCATION_FILE_KEY).isDefined) {
+        jsc.setLocalProperty("spark.scheduler.pool", SparkConfigs.SPARK_DATASOURCE_WRITER_POOL_NAME)
      }
    }
    val instantTime = HoodieActiveTimeline.createNewInstantTime()
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkConfigs.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkConfigs.scala
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi
+
+object SparkConfigs {
+
+  // spark data source write pool name. Incase of streaming sink, users might be interested to set custom scheduling configs
+  // for regular writes and async compaction. In such cases, this pool name will be used for spark datasource writes.
+  val SPARK_DATASOURCE_WRITER_POOL_NAME = "sparkdatasourcewrite"
+
+  /*
+  When async compaction is enabled (deltastreamer or streaming sink), users might be interested to set custom
+  scheduling configs for regular writes and async compaction. This is the property used to set custom scheduler config
+  file with spark. In Deltastreamer, the file is generated within hudi and set if necessary. Where as in case of streaming
+  sink, users have to set this property when they invoke spark shell.
+  Sample format of the file contents.
+  <?xml version="1.0"?>
+  <allocations>
+    <pool name="sparkdatasourcewrite">
+      <schedulingMode>FAIR</schedulingMode>
+      <weight>4</weight>
+      <minShare>2</minShare>
+    </pool>
+    <pool name="hoodiecompact">
+      <schedulingMode>FAIR</schedulingMode>
+      <weight>3</weight>
+      <minShare>1</minShare>
+    </pool>
+  </allocations>
+   */
+  val SPARK_SCHEDULER_ALLOCATION_FILE_KEY = "spark.scheduler.allocation.file"
+
+}