diff --git a/docker/demo/config/log4j.properties b/docker/demo/config/log4j.properties new file mode 100644 index 000000000..1618bff9c --- /dev/null +++ b/docker/demo/config/log4j.properties @@ -0,0 +1,41 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Set everything to be logged to the console +log4j.rootCategory=WARN, console +log4j.appender.console=org.apache.log4j.ConsoleAppender +log4j.appender.console.target=System.err +log4j.appender.console.layout=org.apache.log4j.PatternLayout +log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n + +# Set the default spark-shell log level to WARN. When running the spark-shell, the +# log level for this class is used to overwrite the root logger's log level, so that +# the user can have different defaults for the shell and regular Spark apps. +log4j.logger.org.apache.spark.repl.Main=WARN + +# Settings to quiet third party logs that are too verbose +log4j.logger.org.spark_project.jetty=WARN +log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR +log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO +log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO +log4j.logger.org.apache.parquet=ERROR +log4j.logger.parquet=ERROR +log4j.logger.org.apache.spark=WARN + +# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support +log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL +log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR \ No newline at end of file diff --git a/docker/demo/setup_demo_container.sh b/docker/demo/setup_demo_container.sh index 212cf0977..098a312b5 100755 --- a/docker/demo/setup_demo_container.sh +++ b/docker/demo/setup_demo_container.sh @@ -17,6 +17,7 @@ echo "Copying spark default config and setting up configs" cp /var/hoodie/ws/docker/demo/config/spark-defaults.conf $SPARK_CONF_DIR/. +cp /var/hoodie/ws/docker/demo/config/log4j.properties $SPARK_CONF_DIR/. hadoop fs -mkdir -p /var/demo/ hadoop fs -mkdir -p /tmp/spark-events hadoop fs -copyFromLocal -f /var/hoodie/ws/docker/demo/config /var/demo/. diff --git a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieDemo.java b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieDemo.java index f069a25d3..dbbe0f191 100644 --- a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieDemo.java +++ b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieDemo.java @@ -125,7 +125,9 @@ public class ITTestHoodieDemo extends ITTestBase { private void ingestFirstBatchAndHiveSync() throws Exception { List cmds = CollectionUtils.createImmutableList( - "spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer " + HUDI_UTILITIES_BUNDLE + "spark-submit" + + " --conf \'spark.executor.extraJavaOptions=-Dlog4jspark.root.logger=WARN,console\'" + + " --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer " + HUDI_UTILITIES_BUNDLE + " --table-type COPY_ON_WRITE " + " --base-file-format " + baseFileFormat.toString() + " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts " @@ -141,7 +143,9 @@ public class ITTestHoodieDemo extends ITTestBase { + " --pass hive" + " --jdbc-url jdbc:hive2://hiveserver:10000" + " --partitioned-by dt", - ("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer " + HUDI_UTILITIES_BUNDLE + ("spark-submit" + + " --conf \'spark.executor.extraJavaOptions=-Dlog4jspark.root.logger=WARN,console\'" + + " --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer " + HUDI_UTILITIES_BUNDLE + " --table-type MERGE_ON_READ " + " --base-file-format " + baseFileFormat.toString() + " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts " @@ -187,14 +191,18 @@ public class ITTestHoodieDemo extends ITTestBase { private void ingestSecondBatchAndHiveSync() throws Exception { List cmds = CollectionUtils.createImmutableList( ("hdfs dfs -copyFromLocal -f " + INPUT_BATCH_PATH2 + " " + HDFS_BATCH_PATH2), - ("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer " + HUDI_UTILITIES_BUNDLE + ("spark-submit" + + " --conf \'spark.executor.extraJavaOptions=-Dlog4jspark.root.logger=WARN,console\'" + + " --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer " + HUDI_UTILITIES_BUNDLE + " --table-type COPY_ON_WRITE " + " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts " + " --target-base-path " + COW_BASE_PATH + " --target-table " + COW_TABLE_NAME + " --props /var/demo/config/dfs-source.properties" + " --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider " + String.format(HIVE_SYNC_CMD_FMT, "dt", COW_TABLE_NAME)), - ("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer " + HUDI_UTILITIES_BUNDLE + ("spark-submit" + + " --conf \'spark.executor.extraJavaOptions=-Dlog4jspark.root.logger=WARN,console\'" + + " --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer " + HUDI_UTILITIES_BUNDLE + " --table-type MERGE_ON_READ " + " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts " + " --target-base-path " + MOR_BASE_PATH + " --target-table " + MOR_TABLE_NAME diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/UtilitiesTestBase.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/UtilitiesTestBase.java index 4fbbe5b91..a0b093ea9 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/UtilitiesTestBase.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/UtilitiesTestBase.java @@ -52,6 +52,8 @@ import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hive.service.server.HiveServer2; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; import org.apache.parquet.avro.AvroParquetWriter; import org.apache.parquet.hadoop.ParquetFileWriter.Mode; import org.apache.parquet.hadoop.ParquetWriter; @@ -90,6 +92,10 @@ public class UtilitiesTestBase { @BeforeAll public static void initClass() throws Exception { + // Set log level to WARN for spark logs to avoid exceeding log limit in travis + Logger rootLogger = Logger.getRootLogger(); + rootLogger.setLevel(Level.ERROR); + Logger.getLogger("org.apache.spark").setLevel(Level.WARN); initClass(false); }