[HUDI-3485] Adding scheduler pool configs for async clustering (#5043)
This commit is contained in:
committed by
GitHub
parent
5c1b482a1b
commit
4fed8dd319
@@ -22,6 +22,7 @@ package org.apache.hudi.async;
|
||||
import org.apache.hudi.client.BaseClusterer;
|
||||
import org.apache.hudi.client.BaseHoodieWriteClient;
|
||||
import org.apache.hudi.client.HoodieSparkClusteringClient;
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
|
||||
/**
|
||||
* Async clustering service for Spark structured streaming.
|
||||
@@ -31,8 +32,8 @@ public class SparkStreamingAsyncClusteringService extends AsyncClusteringService
|
||||
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
public SparkStreamingAsyncClusteringService(BaseHoodieWriteClient writeClient) {
|
||||
super(writeClient, true);
|
||||
public SparkStreamingAsyncClusteringService(HoodieEngineContext context, BaseHoodieWriteClient writeClient) {
|
||||
super(context, writeClient, true);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@@ -205,7 +205,8 @@ class HoodieStreamingSink(sqlContext: SQLContext,
|
||||
protected def triggerAsyncClustering(client: SparkRDDWriteClient[HoodieRecordPayload[Nothing]]): Unit = {
|
||||
if (null == asyncClusteringService) {
|
||||
log.info("Triggering async clustering!")
|
||||
asyncClusteringService = new SparkStreamingAsyncClusteringService(client)
|
||||
asyncClusteringService = new SparkStreamingAsyncClusteringService(new HoodieSparkEngineContext(new JavaSparkContext(sqlContext.sparkContext)),
|
||||
client)
|
||||
asyncClusteringService.start(new Function[java.lang.Boolean, java.lang.Boolean] {
|
||||
override def apply(errored: lang.Boolean): lang.Boolean = {
|
||||
log.info(s"Async clustering service shutdown. Errored ? $errored")
|
||||
|
||||
@@ -27,9 +27,9 @@ object SparkConfigs {
|
||||
|
||||
/*
|
||||
When async compaction is enabled (deltastreamer or streaming sink), users might be interested to set custom
|
||||
scheduling configs for regular writes and async compaction. This is the property used to set custom scheduler config
|
||||
file with spark. In Deltastreamer, the file is generated within hudi and set if necessary. Where as in case of streaming
|
||||
sink, users have to set this property when they invoke spark shell.
|
||||
scheduling configs for regular writes and async table services like compaction and clustering. This is the property
|
||||
used to set custom scheduler config file with spark. In Deltastreamer, the file is generated within hudi and set if
|
||||
necessary. Where as in case of streaming sink, users have to set this property when they invoke spark shell.
|
||||
Sample format of the file contents.
|
||||
<?xml version="1.0"?>
|
||||
<allocations>
|
||||
@@ -43,6 +43,11 @@ object SparkConfigs {
|
||||
<weight>3</weight>
|
||||
<minShare>1</minShare>
|
||||
</pool>
|
||||
<pool name="hoodiecluster">
|
||||
<schedulingMode>FAIR</schedulingMode>
|
||||
<weight>2</weight>
|
||||
<minShare>1</minShare>
|
||||
</pool>
|
||||
</allocations>
|
||||
*/
|
||||
val SPARK_SCHEDULER_ALLOCATION_FILE_KEY = "spark.scheduler.allocation.file"
|
||||
|
||||
Reference in New Issue
Block a user