spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From r...@apache.org
Subject spark git commit: [SPARK-9155][SQL] codegen StringSpace
Date Mon, 20 Jul 2015 22:23:31 GMT
Repository: spark
Updated Branches:
  refs/heads/master dde0e12f3 -> 4863c11ea


[SPARK-9155][SQL] codegen StringSpace

Jira https://issues.apache.org/jira/browse/SPARK-9155

Author: Tarek Auel <tarek.auel@googlemail.com>

Closes #7531 from tarekauel/SPARK-9155 and squashes the following commits:

423c426 [Tarek Auel] [SPARK-9155] language typo fix
e34bd1b [Tarek Auel] [SPARK-9155] moved creation of blank string to UTF8String
4bc33e6 [Tarek Auel] [SPARK-9155] codegen StringSpace


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4863c11e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4863c11e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4863c11e

Branch: refs/heads/master
Commit: 4863c11ea9d9f94799fbe6ae5a60860f0740a44d
Parents: dde0e12
Author: Tarek Auel <tarek.auel@googlemail.com>
Authored: Mon Jul 20 15:23:28 2015 -0700
Committer: Reynold Xin <rxin@databricks.com>
Committed: Mon Jul 20 15:23:28 2015 -0700

----------------------------------------------------------------------
 .../sql/catalyst/expressions/stringOperations.scala     | 12 +++++++-----
 .../java/org/apache/spark/unsafe/types/UTF8String.java  | 10 ++++++++++
 .../org/apache/spark/unsafe/types/UTF8StringSuite.java  |  8 ++++++++
 3 files changed, 25 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/4863c11e/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
index 6608036..e42be85 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
@@ -593,17 +593,19 @@ case class StringReverse(child: Expression) extends UnaryExpression
with String2
  * Returns a n spaces string.
  */
 case class StringSpace(child: Expression)
-  extends UnaryExpression with ImplicitCastInputTypes with CodegenFallback {
+  extends UnaryExpression with ImplicitCastInputTypes {
 
   override def dataType: DataType = StringType
   override def inputTypes: Seq[DataType] = Seq(IntegerType)
 
   override def nullSafeEval(s: Any): Any = {
-    val length = s.asInstanceOf[Integer]
+    val length = s.asInstanceOf[Int]
+    UTF8String.blankString(if (length < 0) 0 else length)
+  }
 
-    val spaces = new Array[Byte](if (length < 0) 0 else length)
-    java.util.Arrays.fill(spaces, ' '.asInstanceOf[Byte])
-    UTF8String.fromBytes(spaces)
+  override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
+    nullSafeCodeGen(ctx, ev, (length) =>
+      s"""${ev.primitive} = UTF8String.blankString(($length < 0) ? 0 : $length);""")
   }
 
   override def prettyName: String = "space"

http://git-wip-us.apache.org/repos/asf/spark/blob/4863c11e/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
----------------------------------------------------------------------
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index 3eecd65..819639f 100644
--- a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -20,6 +20,7 @@ package org.apache.spark.unsafe.types;
 import javax.annotation.Nonnull;
 import java.io.Serializable;
 import java.io.UnsupportedEncodingException;
+import java.util.Arrays;
 
 import org.apache.spark.unsafe.PlatformDependent;
 import org.apache.spark.unsafe.array.ByteArrayMethods;
@@ -77,6 +78,15 @@ public final class UTF8String implements Comparable<UTF8String>,
Serializable {
     }
   }
 
+  /**
+   * Creates an UTF8String that contains `length` spaces.
+   */
+  public static UTF8String blankString(int length) {
+    byte[] spaces = new byte[length];
+    Arrays.fill(spaces, (byte) ' ');
+    return fromBytes(spaces);
+  }
+
   protected UTF8String(Object base, long offset, int size) {
     this.base = base;
     this.offset = offset;

http://git-wip-us.apache.org/repos/asf/spark/blob/4863c11e/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
----------------------------------------------------------------------
diff --git a/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java b/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
index 7d0c49e..6a21c27 100644
--- a/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
+++ b/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
@@ -286,4 +286,12 @@ public class UTF8StringSuite {
     assertEquals(
         UTF8String.fromString("世界千世").levenshteinDistance(UTF8String.fromString("千a世b")),4);
   }
+
+  @Test
+  public void createBlankString() {
+    assertEquals(fromString(" "), blankString(1));
+    assertEquals(fromString("  "), blankString(2));
+    assertEquals(fromString("   "), blankString(3));
+    assertEquals(fromString(""), blankString(0));
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message