Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 4301B200D4B for ; Mon, 27 Nov 2017 13:32:11 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id 4153A160C13; Mon, 27 Nov 2017 12:32:11 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 84F64160C02 for ; Mon, 27 Nov 2017 13:32:10 +0100 (CET) Received: (qmail 97086 invoked by uid 500); 27 Nov 2017 12:32:09 -0000 Mailing-List: contact commits-help@spark.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list commits@spark.apache.org Received: (qmail 97077 invoked by uid 99); 27 Nov 2017 12:32:09 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 27 Nov 2017 12:32:09 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id ADDF8DF9AE; Mon, 27 Nov 2017 12:32:07 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: wenchen@apache.org To: commits@spark.apache.org Message-Id: <8d0453ad40dc4d758120bc3132858695@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: spark git commit: [SPARK-22603][SQL] Fix 64KB JVM bytecode limit problem with FormatString Date: Mon, 27 Nov 2017 12:32:07 +0000 (UTC) archived-at: Mon, 27 Nov 2017 12:32:11 -0000 Repository: spark Updated Branches: refs/heads/master 5a02e3a2a -> 2dbe275b2 [SPARK-22603][SQL] Fix 64KB JVM bytecode limit problem with FormatString ## What changes were proposed in this pull request? This PR changes `FormatString` code generation to place generated code for expressions for arguments into separated methods if these size could be large. This PR passes variable arguments by using an `Object` array. ## How was this patch tested? Added new test cases into `StringExpressionSuite` Author: Kazuaki Ishizaki Closes #19817 from kiszk/SPARK-22603. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2dbe275b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2dbe275b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2dbe275b Branch: refs/heads/master Commit: 2dbe275b2d26035b610ed8385d88e3c9562eaf19 Parents: 5a02e3a Author: Kazuaki Ishizaki Authored: Mon Nov 27 20:32:01 2017 +0800 Committer: Wenchen Fan Committed: Mon Nov 27 20:32:01 2017 +0800 ---------------------------------------------------------------------- .../expressions/stringExpressions.scala | 28 ++++++++++++++------ .../expressions/StringExpressionsSuite.scala | 8 ++++++ 2 files changed, 28 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/2dbe275b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index 1c599af..d629eb7 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -1372,10 +1372,10 @@ case class FormatString(children: Expression*) extends Expression with ImplicitC val pattern = children.head.genCode(ctx) val argListGen = children.tail.map(x => (x.dataType, x.genCode(ctx))) - val argListCode = argListGen.map(_._2.code + "\n") - - val argListString = argListGen.foldLeft("")((s, v) => { - val nullSafeString = + val argList = ctx.freshName("argLists") + val numArgLists = argListGen.length + val argListCode = argListGen.zipWithIndex.map { case(v, index) => + val value = if (ctx.boxedType(v._1) != ctx.javaType(v._1)) { // Java primitives get boxed in order to allow null values. s"(${v._2.isNull}) ? (${ctx.boxedType(v._1)}) null : " + @@ -1383,8 +1383,19 @@ case class FormatString(children: Expression*) extends Expression with ImplicitC } else { s"(${v._2.isNull}) ? null : ${v._2.value}" } - s + "," + nullSafeString - }) + s""" + ${v._2.code} + $argList[$index] = $value; + """ + } + val argListCodes = if (ctx.INPUT_ROW != null && ctx.currentVars == null) { + ctx.splitExpressions( + expressions = argListCode, + funcName = "valueFormatString", + arguments = ("InternalRow", ctx.INPUT_ROW) :: ("Object[]", argList) :: Nil) + } else { + argListCode.mkString("\n") + } val form = ctx.freshName("formatter") val formatter = classOf[java.util.Formatter].getName @@ -1395,10 +1406,11 @@ case class FormatString(children: Expression*) extends Expression with ImplicitC boolean ${ev.isNull} = ${pattern.isNull}; ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)}; if (!${ev.isNull}) { - ${argListCode.mkString} $stringBuffer $sb = new $stringBuffer(); $formatter $form = new $formatter($sb, ${classOf[Locale].getName}.US); - $form.format(${pattern.value}.toString() $argListString); + Object[] $argList = new Object[$numArgLists]; + $argListCodes + $form.format(${pattern.value}.toString(), $argList); ${ev.value} = UTF8String.fromString($sb.toString()); }""") } http://git-wip-us.apache.org/repos/asf/spark/blob/2dbe275b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala index c761394..54cde77 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala @@ -518,6 +518,14 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { FormatString(Literal("aa%d%s"), 12, Literal.create(null, StringType)), "aa12null") } + test("SPARK-22603: FormatString should not generate codes beyond 64KB") { + val N = 4500 + val args = (1 to N).map(i => Literal.create(i.toString, StringType)) + val format = "%s" * N + val expected = (1 to N).map(i => i.toString).mkString + checkEvaluation(FormatString(Literal(format) +: args: _*), expected) + } + test("INSTR") { val s1 = 'a.string.at(0) val s2 = 'b.string.at(1) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org For additional commands, e-mail: commits-help@spark.apache.org