spark-reviews mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mgaido91 <...@git.apache.org>
Subject [GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Date Fri, 04 May 2018 12:50:14 GMT
Github user mgaido91 commented on a diff in the pull request:

    https://github.com/apache/spark/pull/21021#discussion_r186071434
  
    --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
---
    @@ -191,28 +161,191 @@ case class SortArray(base: Expression, ascendingOrder: Expression)
             if (o1 == null && o2 == null) {
               0
             } else if (o1 == null) {
    -          1
    +          -nullOrder
             } else if (o2 == null) {
    -          -1
    +          nullOrder
             } else {
               -ordering.compare(o1, o2)
             }
           }
         }
       }
     
    -  override def nullSafeEval(array: Any, ascending: Any): Any = {
    -    val elementType = base.dataType.asInstanceOf[ArrayType].elementType
    +  def elementType: DataType = arrayExpression.dataType.asInstanceOf[ArrayType].elementType
    +
    +  def sortEval(array: Any, ascending: Boolean): Any = {
         val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
         if (elementType != NullType) {
    -      java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt)
    +      java.util.Arrays.sort(data, if (ascending) lt else gt)
         }
         new GenericArrayData(data.asInstanceOf[Array[Any]])
       }
     
    +  def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: String): String
= {
    +    val arrayData = classOf[ArrayData].getName
    +    val genericArrayData = classOf[GenericArrayData].getName
    +    val array = ctx.freshName("array")
    +    val c = ctx.freshName("c")
    +    if (elementType == NullType) {
    +      s"${ev.value} = $base.copy();"
    +    } else {
    +      val elementTypeTerm = ctx.addReferenceObj("elementTypeTerm", elementType)
    +      val sortOrder = ctx.freshName("sortOrder")
    +      val o1 = ctx.freshName("o1")
    +      val o2 = ctx.freshName("o2")
    +      val jt = CodeGenerator.javaType(elementType)
    +      val comp = if (CodeGenerator.isPrimitiveType(elementType)) {
    +        val bt = CodeGenerator.boxedType(elementType)
    +        val v1 = ctx.freshName("v1")
    +        val v2 = ctx.freshName("v2")
    +        s"""
    +           |$jt $v1 = (($bt) $o1).${jt}Value();
    +           |$jt $v2 = (($bt) $o2).${jt}Value();
    +           |int $c = ${ctx.genComp(elementType, v1, v2)};
    +         """.stripMargin
    +      } else {
    +        s"int $c = ${ctx.genComp(elementType, s"(($jt) $o1)", s"(($jt) $o2)")};"
    +      }
    +      s"""
    +         |Object[] $array = $base.toObjectArray($elementTypeTerm);
    --- End diff --
    
    Now I added the handling of primitives in the PR you mentioned, so it handles that now.
    
    My sentence meant that probably the main issue is to avoid problems with nulls for primitive
types, since in that case the returned array doesn't contain null but the default value for
null items IIUC. So probably it is some extra effort (we should also check if it is worth).
I am fine having this done in a followup PR.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


Mime
View raw message