spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From marmb...@apache.org
Subject git commit: [SPARK-3485][SQL] Use GenericUDFUtils.ConversionHelper for Simple UDF type conversions
Date Fri, 19 Sep 2014 22:39:35 GMT
Repository: spark
Updated Branches:
  refs/heads/master 3b9cd13eb -> ba68a51c4


[SPARK-3485][SQL] Use GenericUDFUtils.ConversionHelper for Simple UDF type conversions

This is just another solution to SPARK-3485, in addition to PR #2355
In this patch, we will use ConventionHelper and FunctionRegistry to invoke a simple udf evaluation,
which rely more on hive, but much cleaner and safer.
We can discuss which one is better.

Author: Daoyuan Wang <daoyuan.wang@intel.com>

Closes #2407 from adrian-wang/simpleudf and squashes the following commits:

15762d2 [Daoyuan Wang] add posmod test which would fail the test but now ok
0d69eb4 [Daoyuan Wang] another way to pass to hive simple udf


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ba68a51c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ba68a51c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ba68a51c

Branch: refs/heads/master
Commit: ba68a51c407197d478b330403af8fe24a176bef3
Parents: 3b9cd13
Author: Daoyuan Wang <daoyuan.wang@intel.com>
Authored: Fri Sep 19 15:39:31 2014 -0700
Committer: Michael Armbrust <michael@databricks.com>
Committed: Fri Sep 19 15:39:31 2014 -0700

----------------------------------------------------------------------
 .../hive/execution/HiveCompatibilitySuite.scala |  1 +
 .../org/apache/spark/sql/hive/hiveUdfs.scala    | 55 ++++++--------------
 2 files changed, 17 insertions(+), 39 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/ba68a51c/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index ab487d6..556c984 100644
--- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -801,6 +801,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter
{
     "udf_or",
     "udf_parse_url",
     "udf_PI",
+    "udf_pmod",
     "udf_positive",
     "udf_pow",
     "udf_power",

http://git-wip-us.apache.org/repos/asf/spark/blob/ba68a51c/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
index 7cda0dd..5a0e6c5 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.hive
 
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils.ConversionHelper
+
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.hadoop.hive.common.`type`.HiveDecimal
@@ -105,52 +107,27 @@ private[hive] case class HiveSimpleUdf(functionClassName: String, children:
Seq[
     function.getResolver.getEvalMethod(children.map(_.dataType.toTypeInfo))
 
   @transient
-  lazy val dataType = javaClassToDataType(method.getReturnType)
+  protected lazy val arguments = children.map(c => toInspector(c.dataType)).toArray
 
-  protected lazy val wrappers: Array[(Any) => AnyRef] = method.getParameterTypes.map {
argClass =>
-    val primitiveClasses = Seq(
-      Integer.TYPE, classOf[java.lang.Integer], classOf[java.lang.String], java.lang.Double.TYPE,
-      classOf[java.lang.Double], java.lang.Long.TYPE, classOf[java.lang.Long],
-      classOf[HiveDecimal], java.lang.Byte.TYPE, classOf[java.lang.Byte],
-      classOf[java.sql.Timestamp]
-    )
-    val matchingConstructor = argClass.getConstructors.find { c =>
-      c.getParameterTypes.size == 1 && primitiveClasses.contains(c.getParameterTypes.head)
-    }
+  // Create parameter converters
+  @transient
+  protected lazy val conversionHelper = new ConversionHelper(method, arguments)
 
-    matchingConstructor match {
-      case Some(constructor) =>
-        (a: Any) => {
-          logDebug(
-            s"Wrapping $a of type ${if (a == null) "null" else a.getClass.getName} $constructor.")
-          // We must make sure that primitives get boxed java style.
-          if (a == null) {
-            null
-          } else {
-            constructor.newInstance(a match {
-              case i: Int => i: java.lang.Integer
-              case bd: BigDecimal => new HiveDecimal(bd.underlying())
-              case other: AnyRef => other
-            }).asInstanceOf[AnyRef]
-          }
-        }
-      case None =>
-        (a: Any) => a match {
-          case wrapper => wrap(wrapper)
-        }
-    }
+  @transient
+  lazy val dataType = javaClassToDataType(method.getReturnType)
+
+  def catalystToHive(value: Any): Object = value match {
+    // TODO need more types here? or can we use wrap()
+    case bd: BigDecimal => new HiveDecimal(bd.underlying())
+    case d => d.asInstanceOf[Object]
   }
 
   // TODO: Finish input output types.
   override def eval(input: Row): Any = {
-    val evaluatedChildren = children.map(_.eval(input))
-    // Wrap the function arguments in the expected types.
-    val args = evaluatedChildren.zip(wrappers).map {
-      case (arg, wrapper) => wrapper(arg)
-    }
+    val evaluatedChildren = children.map(c => catalystToHive(c.eval(input)))
 
-    // Invoke the udf and unwrap the result.
-    unwrap(method.invoke(function, args: _*))
+    unwrap(FunctionRegistry.invoke(method, function, conversionHelper
+      .convertIfNecessary(evaluatedChildren: _*): _*))
   }
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message