[MINOR] Moving spark scheduling configs out of DataSourceOptions (#4843)
This commit is contained in:
committed by
GitHub
parent
83279971a1
commit
66ac1446dd
@@ -479,32 +479,6 @@ object DataSourceWriteOptions {
|
||||
+ "Use this when you are in the process of migrating from "
|
||||
+ "com.uber.hoodie to org.apache.hudi. Stop using this after you migrated the table definition to org.apache.hudi input format")
|
||||
|
||||
// spark data source write pool name. Incase of streaming sink, users might be interested to set custom scheduling configs
|
||||
// for regular writes and async compaction. In such cases, this pool name will be used for spark datasource writes.
|
||||
val SPARK_DATASOURCE_WRITER_POOL_NAME = "sparkdatasourcewrite"
|
||||
|
||||
/*
|
||||
When async compaction is enabled (deltastreamer or streaming sink), users might be interested to set custom
|
||||
scheduling configs for regular writes and async compaction. This is the property used to set custom scheduler config
|
||||
file with spark. In Deltastreamer, the file is generated within hudi and set if necessary. Where as in case of streaming
|
||||
sink, users have to set this property when they invoke spark shell.
|
||||
Sample format of the file contents.
|
||||
<?xml version="1.0"?>
|
||||
<allocations>
|
||||
<pool name="sparkdatasourcewrite">
|
||||
<schedulingMode>FAIR</schedulingMode>
|
||||
<weight>4</weight>
|
||||
<minShare>2</minShare>
|
||||
</pool>
|
||||
<pool name="hoodiecompact">
|
||||
<schedulingMode>FAIR</schedulingMode>
|
||||
<weight>3</weight>
|
||||
<minShare>1</minShare>
|
||||
</pool>
|
||||
</allocations>
|
||||
*/
|
||||
val SPARK_SCHEDULER_ALLOCATION_FILE_KEY = "spark.scheduler.allocation.file"
|
||||
|
||||
/** @deprecated Use {@link HIVE_SYNC_MODE} instead of this config from 0.9.0 */
|
||||
@Deprecated
|
||||
val HIVE_USE_JDBC: ConfigProperty[String] = ConfigProperty
|
||||
|
||||
@@ -124,8 +124,8 @@ object HoodieSparkSqlWriter {
|
||||
|
||||
val jsc = new JavaSparkContext(sparkContext)
|
||||
if (asyncCompactionTriggerFn.isDefined) {
|
||||
if (jsc.getConf.getOption(DataSourceWriteOptions.SPARK_SCHEDULER_ALLOCATION_FILE_KEY).isDefined) {
|
||||
jsc.setLocalProperty("spark.scheduler.pool", DataSourceWriteOptions.SPARK_DATASOURCE_WRITER_POOL_NAME)
|
||||
if (jsc.getConf.getOption(SparkConfigs.SPARK_SCHEDULER_ALLOCATION_FILE_KEY).isDefined) {
|
||||
jsc.setLocalProperty("spark.scheduler.pool", SparkConfigs.SPARK_DATASOURCE_WRITER_POOL_NAME)
|
||||
}
|
||||
}
|
||||
val instantTime = HoodieActiveTimeline.createNewInstantTime()
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi
|
||||
|
||||
object SparkConfigs {
|
||||
|
||||
// spark data source write pool name. Incase of streaming sink, users might be interested to set custom scheduling configs
|
||||
// for regular writes and async compaction. In such cases, this pool name will be used for spark datasource writes.
|
||||
val SPARK_DATASOURCE_WRITER_POOL_NAME = "sparkdatasourcewrite"
|
||||
|
||||
/*
|
||||
When async compaction is enabled (deltastreamer or streaming sink), users might be interested to set custom
|
||||
scheduling configs for regular writes and async compaction. This is the property used to set custom scheduler config
|
||||
file with spark. In Deltastreamer, the file is generated within hudi and set if necessary. Where as in case of streaming
|
||||
sink, users have to set this property when they invoke spark shell.
|
||||
Sample format of the file contents.
|
||||
<?xml version="1.0"?>
|
||||
<allocations>
|
||||
<pool name="sparkdatasourcewrite">
|
||||
<schedulingMode>FAIR</schedulingMode>
|
||||
<weight>4</weight>
|
||||
<minShare>2</minShare>
|
||||
</pool>
|
||||
<pool name="hoodiecompact">
|
||||
<schedulingMode>FAIR</schedulingMode>
|
||||
<weight>3</weight>
|
||||
<minShare>1</minShare>
|
||||
</pool>
|
||||
</allocations>
|
||||
*/
|
||||
val SPARK_SCHEDULER_ALLOCATION_FILE_KEY = "spark.scheduler.allocation.file"
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user