spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From wenc...@apache.org
Subject spark git commit: [SPARK-22603][SQL] Fix 64KB JVM bytecode limit problem with FormatString
Date Mon, 27 Nov 2017 12:33:52 GMT
Repository: spark
Updated Branches:
  refs/heads/branch-2.2 2cd4898f3 -> eef72d3f0


[SPARK-22603][SQL] Fix 64KB JVM bytecode limit problem with FormatString

## What changes were proposed in this pull request?

This PR changes `FormatString` code generation to place generated code for expressions for
arguments into separated methods if these size could be large.
This PR passes variable arguments by using an `Object` array.

## How was this patch tested?

Added new test cases into `StringExpressionSuite`

Author: Kazuaki Ishizaki <ishizaki@jp.ibm.com>

Closes #19817 from kiszk/SPARK-22603.

(cherry picked from commit 2dbe275b2d26035b610ed8385d88e3c9562eaf19)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/eef72d3f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/eef72d3f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/eef72d3f

Branch: refs/heads/branch-2.2
Commit: eef72d3f037101ea1ddf48d4c2f938fdcbdc5346
Parents: 2cd4898
Author: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Authored: Mon Nov 27 20:32:01 2017 +0800
Committer: Wenchen Fan <wenchen@databricks.com>
Committed: Mon Nov 27 20:33:05 2017 +0800

----------------------------------------------------------------------
 .../expressions/stringExpressions.scala         | 28 ++++++++++++++------
 .../expressions/StringExpressionsSuite.scala    |  8 ++++++
 2 files changed, 28 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/eef72d3f/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index 014ac77..767b59c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -1054,10 +1054,10 @@ case class FormatString(children: Expression*) extends Expression
with ImplicitC
     val pattern = children.head.genCode(ctx)
 
     val argListGen = children.tail.map(x => (x.dataType, x.genCode(ctx)))
-    val argListCode = argListGen.map(_._2.code + "\n")
-
-    val argListString = argListGen.foldLeft("")((s, v) => {
-      val nullSafeString =
+    val argList = ctx.freshName("argLists")
+    val numArgLists = argListGen.length
+    val argListCode = argListGen.zipWithIndex.map { case(v, index) =>
+      val value =
         if (ctx.boxedType(v._1) != ctx.javaType(v._1)) {
           // Java primitives get boxed in order to allow null values.
           s"(${v._2.isNull}) ? (${ctx.boxedType(v._1)}) null : " +
@@ -1065,8 +1065,19 @@ case class FormatString(children: Expression*) extends Expression with
ImplicitC
         } else {
           s"(${v._2.isNull}) ? null : ${v._2.value}"
         }
-      s + "," + nullSafeString
-    })
+      s"""
+         ${v._2.code}
+         $argList[$index] = $value;
+       """
+    }
+    val argListCodes = if (ctx.INPUT_ROW != null && ctx.currentVars == null) {
+      ctx.splitExpressions(
+        expressions = argListCode,
+        funcName = "valueFormatString",
+        arguments = ("InternalRow", ctx.INPUT_ROW) :: ("Object[]", argList) :: Nil)
+    } else {
+      argListCode.mkString("\n")
+    }
 
     val form = ctx.freshName("formatter")
     val formatter = classOf[java.util.Formatter].getName
@@ -1077,10 +1088,11 @@ case class FormatString(children: Expression*) extends Expression
with ImplicitC
       boolean ${ev.isNull} = ${pattern.isNull};
       ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
       if (!${ev.isNull}) {
-        ${argListCode.mkString}
         $stringBuffer $sb = new $stringBuffer();
         $formatter $form = new $formatter($sb, ${classOf[Locale].getName}.US);
-        $form.format(${pattern.value}.toString() $argListString);
+        Object[] $argList = new Object[$numArgLists];
+        $argListCodes
+        $form.format(${pattern.value}.toString(), $argList);
         ${ev.value} = UTF8String.fromString($sb.toString());
       }""")
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/eef72d3f/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
index 7adf967..085d912 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
@@ -434,6 +434,14 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
{
       FormatString(Literal("aa%d%s"), 12, Literal.create(null, StringType)), "aa12null")
   }
 
+  test("SPARK-22603: FormatString should not generate codes beyond 64KB") {
+    val N = 4500
+    val args = (1 to N).map(i => Literal.create(i.toString, StringType))
+    val format = "%s" * N
+    val expected = (1 to N).map(i => i.toString).mkString
+    checkEvaluation(FormatString(Literal(format) +: args: _*), expected)
+  }
+
   test("INSTR") {
     val s1 = 'a.string.at(0)
     val s2 = 'b.string.at(1)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message