Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id D0432200CCE for ; Sun, 23 Jul 2017 19:38:08 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id CECF4164105; Sun, 23 Jul 2017 17:38:08 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 01FB0164102 for ; Sun, 23 Jul 2017 19:38:07 +0200 (CEST) Received: (qmail 34546 invoked by uid 500); 23 Jul 2017 17:38:07 -0000 Mailing-List: contact commits-help@spark.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list commits@spark.apache.org Received: (qmail 34537 invoked by uid 99); 23 Jul 2017 17:38:07 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 23 Jul 2017 17:38:07 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 18022DFD82; Sun, 23 Jul 2017 17:38:07 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: lixiao@apache.org To: commits@spark.apache.org Message-Id: <80a2ed963c114b0a8c7f94959e624ef1@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: spark git commit: [SPARK-20871][SQL] limit logging of Janino code Date: Sun, 23 Jul 2017 17:38:07 +0000 (UTC) archived-at: Sun, 23 Jul 2017 17:38:09 -0000 Repository: spark Updated Branches: refs/heads/master cecd285a2 -> 2a53fbfce [SPARK-20871][SQL] limit logging of Janino code ## What changes were proposed in this pull request? When the code that is generated is greater than 64k, then Janino compile will fail and CodeGenerator.scala will log the entire code at Error level. SPARK-20871 suggests only logging the code at Debug level. Since, the code is already logged at debug level, this Pull Request proposes not including the formatted code in the Error logging and exception message at all. When an exception occurs, the code will be logged at Info level but truncated if it is more than 1000 lines long. ## How was this patch tested? Existing tests were run. An extra test test case was added to CodeFormatterSuite to test the new maxLines parameter, Author: pj.fanning Closes #18658 from pjfanning/SPARK-20871. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2a53fbfc Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2a53fbfc Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2a53fbfc Branch: refs/heads/master Commit: 2a53fbfce72b3faef020e39a1e8628d68bc95beb Parents: cecd285 Author: pj.fanning Authored: Sun Jul 23 10:38:03 2017 -0700 Committer: gatorsmile Committed: Sun Jul 23 10:38:03 2017 -0700 ---------------------------------------------------------------------- .../expressions/codegen/CodeFormatter.scala | 10 ++++- .../expressions/codegen/CodeGenerator.scala | 13 ++++--- .../org/apache/spark/sql/internal/SQLConf.scala | 10 +++++ .../codegen/CodeFormatterSuite.scala | 40 +++++++++++++++++--- 4 files changed, 61 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/2a53fbfc/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala index 05b7c96..60e600d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala @@ -28,14 +28,20 @@ import java.util.regex.Matcher object CodeFormatter { val commentHolder = """\/\*(.+?)\*\/""".r - def format(code: CodeAndComment): String = { + def format(code: CodeAndComment, maxLines: Int = -1): String = { val formatter = new CodeFormatter - code.body.split("\n").foreach { line => + val lines = code.body.split("\n") + val needToTruncate = maxLines >= 0 && lines.length > maxLines + val filteredLines = if (needToTruncate) lines.take(maxLines) else lines + filteredLines.foreach { line => val commentReplaced = commentHolder.replaceAllIn( line.trim, m => code.comment.get(m.group(1)).map(Matcher.quoteReplacement).getOrElse(m.group(0))) formatter.addLine(commentReplaced) } + if (needToTruncate) { + formatter.addLine(s"[truncated to $maxLines lines (total lines is ${lines.length})]") + } formatter.result() } http://git-wip-us.apache.org/repos/asf/spark/blob/2a53fbfc/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 7cf9daf..a014e2a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -39,6 +39,7 @@ import org.apache.spark.metrics.source.CodegenMetrics import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.util.{ArrayData, MapData} +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.unsafe.Platform import org.apache.spark.unsafe.types._ @@ -1037,12 +1038,10 @@ object CodeGenerator extends Logging { )) evaluator.setExtendedClass(classOf[GeneratedClass]) - lazy val formatted = CodeFormatter.format(code) - logDebug({ // Only add extra debugging info to byte code when we are going to print the source code. evaluator.setDebuggingInformation(true, true, false) - s"\n$formatted" + s"\n${CodeFormatter.format(code)}" }) try { @@ -1050,12 +1049,16 @@ object CodeGenerator extends Logging { recordCompilationStats(evaluator) } catch { case e: JaninoRuntimeException => - val msg = s"failed to compile: $e\n$formatted" + val msg = s"failed to compile: $e" logError(msg, e) + val maxLines = SQLConf.get.loggingMaxLinesForCodegen + logInfo(s"\n${CodeFormatter.format(code, maxLines)}") throw new JaninoRuntimeException(msg, e) case e: CompileException => - val msg = s"failed to compile: $e\n$formatted" + val msg = s"failed to compile: $e" logError(msg, e) + val maxLines = SQLConf.get.loggingMaxLinesForCodegen + logInfo(s"\n${CodeFormatter.format(code, maxLines)}") throw new CompileException(msg, e.getLocation) } evaluator.getClazz().newInstance().asInstanceOf[GeneratedClass] http://git-wip-us.apache.org/repos/asf/spark/blob/2a53fbfc/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 824908d..a819cdd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -564,6 +564,14 @@ object SQLConf { .intConf .createWithDefault(20) + val CODEGEN_LOGGING_MAX_LINES = buildConf("spark.sql.codegen.logging.maxLines") + .internal() + .doc("The maximum number of codegen lines to log when errors occur. Use -1 for unlimited.") + .intConf + .checkValue(maxLines => maxLines >= -1, "The maximum must be a positive integer, 0 to " + + "disable logging or -1 to apply no limit.") + .createWithDefault(1000) + val FILES_MAX_PARTITION_BYTES = buildConf("spark.sql.files.maxPartitionBytes") .doc("The maximum number of bytes to pack into a single partition when reading files.") .longConf @@ -1004,6 +1012,8 @@ class SQLConf extends Serializable with Logging { def maxCaseBranchesForCodegen: Int = getConf(MAX_CASES_BRANCHES) + def loggingMaxLinesForCodegen: Int = getConf(CODEGEN_LOGGING_MAX_LINES) + def tableRelationCacheSize: Int = getConf(StaticSQLConf.FILESOURCE_TABLE_RELATION_CACHE_SIZE) http://git-wip-us.apache.org/repos/asf/spark/blob/2a53fbfc/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala index bc5a8f0..9d0a416 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala @@ -20,18 +20,18 @@ package org.apache.spark.sql.catalyst.expressions.codegen import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.util._ - class CodeFormatterSuite extends SparkFunSuite { - def testCase(name: String)( - input: String, comment: Map[String, String] = Map.empty)(expected: String): Unit = { + def testCase(name: String)(input: String, + comment: Map[String, String] = Map.empty, maxLines: Int = -1)(expected: String): Unit = { test(name) { val sourceCode = new CodeAndComment(input.trim, comment) - if (CodeFormatter.format(sourceCode).trim !== expected.trim) { + if (CodeFormatter.format(sourceCode, maxLines).trim !== expected.trim) { fail( s""" |== FAIL: Formatted code doesn't match === - |${sideBySide(CodeFormatter.format(sourceCode).trim, expected.trim).mkString("\n")} + |${sideBySide(CodeFormatter.format(sourceCode, maxLines).trim, + expected.trim).mkString("\n")} """.stripMargin) } } @@ -129,6 +129,36 @@ class CodeFormatterSuite extends SparkFunSuite { """.stripMargin } + testCase("function calls with maxLines=0") ( + """ + |foo( + |a, + |b, + |c) + """.stripMargin, + maxLines = 0 + ) { + """ + |/* 001 */ [truncated to 0 lines (total lines is 4)] + """.stripMargin + } + + testCase("function calls with maxLines=2") ( + """ + |foo( + |a, + |b, + |c) + """.stripMargin, + maxLines = 2 + ) { + """ + |/* 001 */ foo( + |/* 002 */ a, + |/* 003 */ [truncated to 2 lines (total lines is 4)] + """.stripMargin + } + testCase("single line comments") { """ |// This is a comment about class A { { { ( ( --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org For additional commands, e-mail: commits-help@spark.apache.org