spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From r...@apache.org
Subject spark git commit: [SPARK-17683][SQL] Support ArrayType in Literal.apply
Date Wed, 02 Nov 2016 18:29:29 GMT
Repository: spark
Updated Branches:
  refs/heads/master f151bd1af -> 4af0ce2d9


[SPARK-17683][SQL] Support ArrayType in Literal.apply

## What changes were proposed in this pull request?

This pr is to add pattern-matching entries for array data in `Literal.apply`.
## How was this patch tested?

Added tests in `LiteralExpressionSuite`.

Author: Takeshi YAMAMURO <linguin.m.s@gmail.com>

Closes #15257 from maropu/SPARK-17683.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4af0ce2d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4af0ce2d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4af0ce2d

Branch: refs/heads/master
Commit: 4af0ce2d96de3397c9bc05684cad290a52486577
Parents: f151bd1
Author: Takeshi YAMAMURO <linguin.m.s@gmail.com>
Authored: Wed Nov 2 11:29:26 2016 -0700
Committer: Reynold Xin <rxin@databricks.com>
Committed: Wed Nov 2 11:29:26 2016 -0700

----------------------------------------------------------------------
 .../sql/catalyst/expressions/literals.scala     | 57 +++++++++++++++++++-
 .../expressions/LiteralExpressionSuite.scala    | 27 +++++++++-
 2 files changed, 82 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/4af0ce2d/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
index a597a17..1985e68 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
@@ -17,14 +17,25 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
+import java.lang.{Boolean => JavaBoolean}
+import java.lang.{Byte => JavaByte}
+import java.lang.{Double => JavaDouble}
+import java.lang.{Float => JavaFloat}
+import java.lang.{Integer => JavaInteger}
+import java.lang.{Long => JavaLong}
+import java.lang.{Short => JavaShort}
+import java.math.{BigDecimal => JavaBigDecimal}
 import java.nio.charset.StandardCharsets
 import java.sql.{Date, Timestamp}
 import java.util
 import java.util.Objects
 import javax.xml.bind.DatatypeConverter
 
+import scala.math.{BigDecimal, BigInt}
+
 import org.json4s.JsonAST._
 
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
@@ -46,12 +57,17 @@ object Literal {
     case s: String => Literal(UTF8String.fromString(s), StringType)
     case b: Boolean => Literal(b, BooleanType)
     case d: BigDecimal => Literal(Decimal(d), DecimalType(Math.max(d.precision, d.scale),
d.scale))
-    case d: java.math.BigDecimal =>
+    case d: JavaBigDecimal =>
       Literal(Decimal(d), DecimalType(Math.max(d.precision, d.scale), d.scale()))
     case d: Decimal => Literal(d, DecimalType(Math.max(d.precision, d.scale), d.scale))
     case t: Timestamp => Literal(DateTimeUtils.fromJavaTimestamp(t), TimestampType)
     case d: Date => Literal(DateTimeUtils.fromJavaDate(d), DateType)
     case a: Array[Byte] => Literal(a, BinaryType)
+    case a: Array[_] =>
+      val elementType = componentTypeToDataType(a.getClass.getComponentType())
+      val dataType = ArrayType(elementType)
+      val convert = CatalystTypeConverters.createToCatalystConverter(dataType)
+      Literal(convert(a), dataType)
     case i: CalendarInterval => Literal(i, CalendarIntervalType)
     case null => Literal(null, NullType)
     case v: Literal => v
@@ -60,6 +76,45 @@ object Literal {
   }
 
   /**
+   * Returns the Spark SQL DataType for a given class object. Since this type needs to be
resolved
+   * in runtime, we use match-case idioms for class objects here. However, there are similar
+   * functions in other files (e.g., HiveInspectors), so these functions need to merged into
one.
+   */
+  private[this] def componentTypeToDataType(clz: Class[_]): DataType = clz match {
+    // primitive types
+    case JavaShort.TYPE => ShortType
+    case JavaInteger.TYPE => IntegerType
+    case JavaLong.TYPE => LongType
+    case JavaDouble.TYPE => DoubleType
+    case JavaByte.TYPE => ByteType
+    case JavaFloat.TYPE => FloatType
+    case JavaBoolean.TYPE => BooleanType
+
+    // java classes
+    case _ if clz == classOf[Date] => DateType
+    case _ if clz == classOf[Timestamp] => TimestampType
+    case _ if clz == classOf[JavaBigDecimal] => DecimalType.SYSTEM_DEFAULT
+    case _ if clz == classOf[Array[Byte]] => BinaryType
+    case _ if clz == classOf[JavaShort] => ShortType
+    case _ if clz == classOf[JavaInteger] => IntegerType
+    case _ if clz == classOf[JavaLong] => LongType
+    case _ if clz == classOf[JavaDouble] => DoubleType
+    case _ if clz == classOf[JavaByte] => ByteType
+    case _ if clz == classOf[JavaFloat] => FloatType
+    case _ if clz == classOf[JavaBoolean] => BooleanType
+
+    // other scala classes
+    case _ if clz == classOf[String] => StringType
+    case _ if clz == classOf[BigInt] => DecimalType.SYSTEM_DEFAULT
+    case _ if clz == classOf[BigDecimal] => DecimalType.SYSTEM_DEFAULT
+    case _ if clz == classOf[CalendarInterval] => CalendarIntervalType
+
+    case _ if clz.isArray => ArrayType(componentTypeToDataType(clz.getComponentType))
+
+    case _ => throw new AnalysisException(s"Unsupported component type $clz in arrays")
+  }
+
+  /**
    * Constructs a [[Literal]] of [[ObjectType]], for example when you need to pass an object
    * into code generation.
    */

http://git-wip-us.apache.org/repos/asf/spark/blob/4af0ce2d/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
index 450222d..4af4da8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
@@ -21,6 +21,7 @@ import java.nio.charset.StandardCharsets
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.CatalystTypeConverters
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.CalendarInterval
@@ -43,6 +44,7 @@ class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
{
     checkEvaluation(Literal.create(null, TimestampType), null)
     checkEvaluation(Literal.create(null, CalendarIntervalType), null)
     checkEvaluation(Literal.create(null, ArrayType(ByteType, true)), null)
+    checkEvaluation(Literal.create(null, ArrayType(StringType, true)), null)
     checkEvaluation(Literal.create(null, MapType(StringType, IntegerType)), null)
     checkEvaluation(Literal.create(null, StructType(Seq.empty)), null)
   }
@@ -122,5 +124,28 @@ class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
{
     }
   }
 
-  // TODO(davies): add tests for ArrayType, MapType and StructType
+  test("array") {
+    def checkArrayLiteral(a: Array[_], elementType: DataType): Unit = {
+      val toCatalyst = (a: Array[_], elementType: DataType) => {
+        CatalystTypeConverters.createToCatalystConverter(ArrayType(elementType))(a)
+      }
+      checkEvaluation(Literal(a), toCatalyst(a, elementType))
+    }
+    checkArrayLiteral(Array(1, 2, 3), IntegerType)
+    checkArrayLiteral(Array("a", "b", "c"), StringType)
+    checkArrayLiteral(Array(1.0, 4.0), DoubleType)
+    checkArrayLiteral(Array(CalendarInterval.MICROS_PER_DAY, CalendarInterval.MICROS_PER_HOUR),
+      CalendarIntervalType)
+  }
+
+  test("unsupported types (map and struct) in literals") {
+    def checkUnsupportedTypeInLiteral(v: Any): Unit = {
+      val errMsgMap = intercept[RuntimeException] {
+        Literal(v)
+      }
+      assert(errMsgMap.getMessage.startsWith("Unsupported literal type"))
+    }
+    checkUnsupportedTypeInLiteral(Map("key1" -> 1, "key2" -> 2))
+    checkUnsupportedTypeInLiteral(("mike", 29, 1.0))
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message