[HUDI-92] Provide reasonable names for Spark DAG stages in HUDI. (#1289)
This commit is contained in:
@@ -86,7 +86,7 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
|
||||
|
||||
@BeforeEach
|
||||
public void setUp() throws Exception {
|
||||
initSparkContexts("TestHoodieBloomIndex");
|
||||
initSparkContexts();
|
||||
initPath();
|
||||
initFileSystem();
|
||||
// We have some records to be tagged (two different partitions)
|
||||
|
||||
@@ -71,7 +71,7 @@ public class TestHoodieGlobalBloomIndex extends HoodieClientTestHarness {
|
||||
|
||||
@BeforeEach
|
||||
public void setUp() throws Exception {
|
||||
initSparkContexts("TestHoodieGlobalBloomIndex");
|
||||
initSparkContexts();
|
||||
initPath();
|
||||
// We have some records to be tagged (two different partitions)
|
||||
String schemaStr = FileIOUtils.readAsUTFString(getClass().getResourceAsStream("/exampleSchema.txt"));
|
||||
|
||||
@@ -60,7 +60,7 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
|
||||
public void init() throws Exception {
|
||||
initDFS();
|
||||
initPath();
|
||||
initSparkContexts("TestHoodieCommitArchiveLog");
|
||||
initSparkContexts();
|
||||
hadoopConf = dfs.getConf();
|
||||
hadoopConf.addResource(dfs.getConf());
|
||||
dfs.mkdirs(new Path(basePath));
|
||||
|
||||
@@ -57,7 +57,7 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
|
||||
|
||||
@BeforeEach
|
||||
public void setUp() throws Exception {
|
||||
initSparkContexts("TestHoodieMergeHandle");
|
||||
initSparkContexts();
|
||||
initPath();
|
||||
initFileSystem();
|
||||
initTestDataGenerator();
|
||||
|
||||
@@ -66,7 +66,7 @@ public class TestHoodieCompactor extends HoodieClientTestHarness {
|
||||
@BeforeEach
|
||||
public void setUp() throws Exception {
|
||||
// Initialize a local spark env
|
||||
initSparkContexts("TestHoodieCompactor");
|
||||
initSparkContexts();
|
||||
|
||||
// Create a temp folder as the base path
|
||||
initPath();
|
||||
|
||||
@@ -41,6 +41,8 @@ import org.apache.hudi.table.HoodieTable;
|
||||
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.sql.SQLContext;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.TestInfo;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@@ -56,7 +58,8 @@ import java.util.concurrent.atomic.AtomicInteger;
|
||||
public abstract class HoodieClientTestHarness extends HoodieCommonTestHarness implements Serializable {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(HoodieClientTestHarness.class);
|
||||
|
||||
|
||||
private String testMethodName;
|
||||
protected transient JavaSparkContext jsc = null;
|
||||
protected transient Configuration hadoopConf = null;
|
||||
protected transient SQLContext sqlContext;
|
||||
@@ -82,6 +85,15 @@ public abstract class HoodieClientTestHarness extends HoodieCommonTestHarness im
|
||||
protected transient MiniDFSCluster dfsCluster;
|
||||
protected transient DistributedFileSystem dfs;
|
||||
|
||||
@BeforeEach
|
||||
public void setTestMethodName(TestInfo testInfo) {
|
||||
if (testInfo.getTestMethod().isPresent()) {
|
||||
testMethodName = testInfo.getTestMethod().get().getName();
|
||||
} else {
|
||||
testMethodName = "Unknown";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes resource group for the subclasses of {@link HoodieClientTestBase}.
|
||||
*/
|
||||
@@ -113,7 +125,7 @@ public abstract class HoodieClientTestHarness extends HoodieCommonTestHarness im
|
||||
*/
|
||||
protected void initSparkContexts(String appName) {
|
||||
// Initialize a local spark env
|
||||
jsc = new JavaSparkContext(HoodieClientTestUtils.getSparkConfForTest(appName));
|
||||
jsc = new JavaSparkContext(HoodieClientTestUtils.getSparkConfForTest(appName + "#" + testMethodName));
|
||||
jsc.setLogLevel("ERROR");
|
||||
hadoopConf = jsc.hadoopConfiguration();
|
||||
|
||||
@@ -122,11 +134,11 @@ public abstract class HoodieClientTestHarness extends HoodieCommonTestHarness im
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes the Spark contexts ({@link JavaSparkContext} and {@link SQLContext}) with a default name
|
||||
* <b>TestHoodieClient</b>.
|
||||
* Initializes the Spark contexts ({@link JavaSparkContext} and {@link SQLContext})
|
||||
* with a default name matching the name of the class.
|
||||
*/
|
||||
protected void initSparkContexts() {
|
||||
initSparkContexts("TestHoodieClient");
|
||||
initSparkContexts(this.getClass().getSimpleName());
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -146,9 +146,30 @@ public class HoodieClientTestUtils {
|
||||
new RandomAccessFile(path, "rw").setLength(length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a Spark config for this test.
|
||||
*
|
||||
* The following properties may be set to customize the Spark context:
|
||||
* SPARK_EVLOG_DIR: Local directory where event logs should be saved. This
|
||||
* allows viewing the logs with spark-history-server.
|
||||
*
|
||||
* @note When running the tests using maven, use the following syntax to set
|
||||
* a property:
|
||||
* mvn -DSPARK_XXX=yyy ...
|
||||
*
|
||||
* @param appName A name for the Spark application. Shown in the Spark web UI.
|
||||
* @return A Spark config
|
||||
*/
|
||||
public static SparkConf getSparkConfForTest(String appName) {
|
||||
SparkConf sparkConf = new SparkConf().setAppName(appName)
|
||||
.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer").setMaster("local[8]");
|
||||
|
||||
String evlogDir = System.getProperty("SPARK_EVLOG_DIR");
|
||||
if (evlogDir != null) {
|
||||
sparkConf.set("spark.eventLog.enabled", "true");
|
||||
sparkConf.set("spark.eventLog.dir", evlogDir);
|
||||
}
|
||||
|
||||
return HoodieReadClient.addHoodieSupport(sparkConf);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user