spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From r...@apache.org
Subject spark git commit: [SPARK-16514][SQL] Fix various regex codegen bugs
Date Wed, 13 Jul 2016 06:09:11 GMT
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 4303d292b -> 41df62c59


[SPARK-16514][SQL] Fix various regex codegen bugs

## What changes were proposed in this pull request?

RegexExtract and RegexReplace currently crash on non-nullable input due use of a hard-coded
local variable name (e.g. compiles fail with `java.lang.Exception: failed to compile: org.codehaus.commons.compiler.CompileException:
File 'generated.java', Line 85, Column 26: Redefinition of local variable "m" `).

This changes those variables to use fresh names, and also in a few other places.

## How was this patch tested?

Unit tests. rxin

Author: Eric Liang <ekl@databricks.com>

Closes #14168 from ericl/sc-3906.

(cherry picked from commit 1c58fa905b6543d366d00b2e5394dfd633987f6d)
Signed-off-by: Reynold Xin <rxin@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/41df62c5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/41df62c5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/41df62c5

Branch: refs/heads/branch-2.0
Commit: 41df62c595474d7afda6dbe76a558d8cb3be7ff2
Parents: 4303d29
Author: Eric Liang <ekl@databricks.com>
Authored: Tue Jul 12 23:09:02 2016 -0700
Committer: Reynold Xin <rxin@databricks.com>
Committed: Tue Jul 12 23:09:08 2016 -0700

----------------------------------------------------------------------
 .../expressions/regexpExpressions.scala         | 48 ++++++++++++++------
 .../expressions/StringExpressionsSuite.scala    |  6 +++
 2 files changed, 39 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/41df62c5/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index 541b860..be82b3b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -108,10 +108,11 @@ case class Like(left: Expression, right: Expression)
         """)
       }
     } else {
+      val rightStr = ctx.freshName("rightStr")
       nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
         s"""
-          String rightStr = ${eval2}.toString();
-          ${patternClass} $pattern = ${patternClass}.compile($escapeFunc(rightStr));
+          String $rightStr = ${eval2}.toString();
+          ${patternClass} $pattern = ${patternClass}.compile($escapeFunc($rightStr));
           ${ev.value} = $pattern.matcher(${eval1}.toString()).matches();
         """
       })
@@ -157,10 +158,11 @@ case class RLike(left: Expression, right: Expression)
         """)
       }
     } else {
+      val rightStr = ctx.freshName("rightStr")
       nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
         s"""
-          String rightStr = ${eval2}.toString();
-          ${patternClass} $pattern = ${patternClass}.compile(rightStr);
+          String $rightStr = ${eval2}.toString();
+          ${patternClass} $pattern = ${patternClass}.compile($rightStr);
           ${ev.value} = $pattern.matcher(${eval1}.toString()).find(0);
         """
       })
@@ -259,6 +261,8 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep:
Expressio
     val classNamePattern = classOf[Pattern].getCanonicalName
     val classNameStringBuffer = classOf[java.lang.StringBuffer].getCanonicalName
 
+    val matcher = ctx.freshName("matcher")
+
     ctx.addMutableState("UTF8String", termLastRegex, s"${termLastRegex} = null;")
     ctx.addMutableState(classNamePattern, termPattern, s"${termPattern} = null;")
     ctx.addMutableState("String", termLastReplacement, s"${termLastReplacement} = null;")
@@ -267,6 +271,12 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep:
Expressio
     ctx.addMutableState(classNameStringBuffer,
       termResult, s"${termResult} = new $classNameStringBuffer();")
 
+    val setEvNotNull = if (nullable) {
+      s"${ev.isNull} = false;"
+    } else {
+      ""
+    }
+
     nullSafeCodeGen(ctx, ev, (subject, regexp, rep) => {
     s"""
       if (!$regexp.equals(${termLastRegex})) {
@@ -280,14 +290,14 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep:
Expressio
         ${termLastReplacement} = ${termLastReplacementInUTF8}.toString();
       }
       ${termResult}.delete(0, ${termResult}.length());
-      java.util.regex.Matcher m = ${termPattern}.matcher($subject.toString());
+      java.util.regex.Matcher ${matcher} = ${termPattern}.matcher($subject.toString());
 
-      while (m.find()) {
-        m.appendReplacement(${termResult}, ${termLastReplacement});
+      while (${matcher}.find()) {
+        ${matcher}.appendReplacement(${termResult}, ${termLastReplacement});
       }
-      m.appendTail(${termResult});
+      ${matcher}.appendTail(${termResult});
       ${ev.value} = UTF8String.fromString(${termResult}.toString());
-      ${ev.isNull} = false;
+      $setEvNotNull
     """
     })
   }
@@ -334,10 +344,18 @@ case class RegExpExtract(subject: Expression, regexp: Expression, idx:
Expressio
     val termLastRegex = ctx.freshName("lastRegex")
     val termPattern = ctx.freshName("pattern")
     val classNamePattern = classOf[Pattern].getCanonicalName
+    val matcher = ctx.freshName("matcher")
+    val matchResult = ctx.freshName("matchResult")
 
     ctx.addMutableState("UTF8String", termLastRegex, s"${termLastRegex} = null;")
     ctx.addMutableState(classNamePattern, termPattern, s"${termPattern} = null;")
 
+    val setEvNotNull = if (nullable) {
+      s"${ev.isNull} = false;"
+    } else {
+      ""
+    }
+
     nullSafeCodeGen(ctx, ev, (subject, regexp, idx) => {
       s"""
       if (!$regexp.equals(${termLastRegex})) {
@@ -345,15 +363,15 @@ case class RegExpExtract(subject: Expression, regexp: Expression, idx:
Expressio
         ${termLastRegex} = $regexp.clone();
         ${termPattern} = ${classNamePattern}.compile(${termLastRegex}.toString());
       }
-      java.util.regex.Matcher m =
+      java.util.regex.Matcher ${matcher} =
         ${termPattern}.matcher($subject.toString());
-      if (m.find()) {
-        java.util.regex.MatchResult mr = m.toMatchResult();
-        ${ev.value} = UTF8String.fromString(mr.group($idx));
-        ${ev.isNull} = false;
+      if (${matcher}.find()) {
+        java.util.regex.MatchResult ${matchResult} = ${matcher}.toMatchResult();
+        ${ev.value} = UTF8String.fromString(${matchResult}.group($idx));
+        $setEvNotNull
       } else {
         ${ev.value} = UTF8String.EMPTY_UTF8;
-        ${ev.isNull} = false;
+        $setEvNotNull
       }"""
     })
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/41df62c5/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
index 8f7b104..5b9ed83 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
@@ -631,6 +631,9 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
{
     checkEvaluation(expr, null, row4)
     checkEvaluation(expr, null, row5)
     checkEvaluation(expr, null, row6)
+
+    val nonNullExpr = RegExpReplace(Literal("100-200"), Literal("(\\d+)"), Literal("num"))
+    checkEvaluation(nonNullExpr, "num-num", row1)
   }
 
   test("RegexExtract") {
@@ -657,6 +660,9 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
{
 
     val expr1 = new RegExpExtract(s, p)
     checkEvaluation(expr1, "100", row1)
+
+    val nonNullExpr = RegExpExtract(Literal("100-200"), Literal("(\\d+)-(\\d+)"), Literal(1))
+    checkEvaluation(nonNullExpr, "100", row1)
   }
 
   test("SPLIT") {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message