spark-reviews mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mn-mikke <...@git.apache.org>
Subject [GitHub] spark pull request #21386: [SPARK-23928][SQL][WIP] Add shuffle collection fu...
Date Mon, 21 May 2018 22:00:08 GMT
Github user mn-mikke commented on a diff in the pull request:

    https://github.com/apache/spark/pull/21386#discussion_r189725334
  
    --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
---
    @@ -555,6 +557,100 @@ case class ArraySort(child: Expression) extends UnaryExpression
with ArraySortLi
       override def prettyName: String = "array_sort"
     }
     
    +
    +/**
    + * Returns a random permutation of the given array..
    + */
    +@ExpressionDescription(
    +  usage = "_FUNC_(array) - Returns a random permutation of the given array.",
    +  examples = """
    +    Examples:
    +      > SELECT _FUNC_(array(1, 20, 3, 5));
    +       [3, 1, 5, 20]
    +      > SELECT _FUNC_(array(1, 20, null, 3));
    +       [20, null, 3, 1]
    +  """, since = "2.4.0")
    +case class Shuffle(child: Expression) extends UnaryExpression with ImplicitCastInputTypes
{
    +
    +  override def nullable: Boolean = true
    +
    +  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType)
    +
    +  override def dataType: DataType = child.dataType
    +
    +  lazy val elementType: DataType = dataType.asInstanceOf[ArrayType].elementType
    +
    +  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    +    nullSafeCodeGen(ctx, ev, c => shuffleArrayCodeGen(ctx, ev, c))
    +  }
    +
    +  private def shuffleArrayCodeGen(ctx: CodegenContext, ev: ExprCode, childName: String):
String = {
    +    val length = ctx.freshName("length")
    +    val javaElementType = CodeGenerator.javaType(elementType)
    +    val isPrimitiveType = CodeGenerator.isPrimitiveType(elementType)
    +
    +    val initialization = if (isPrimitiveType) {
    +      s"${ev.value} = $childName.copy()"
    +    } else {
    +      s"""
    +          |${ev.value} = new ${classOf[GenericArrayData].getName()}(new Object[$length]);
    +          |for (int j = 0; j < $childName.numElements(); j++) {
    +          |  ${ev.value}.update(j, ${CodeGenerator.getValue(childName, elementType, "j")});
    +          |}
    +       """.stripMargin
    +    }
    +
    +    val swapAssigments = if (isPrimitiveType) {
    +      val setFunc = "set" + CodeGenerator.primitiveTypeName(elementType)
    +      val getCall = (index: String) => CodeGenerator.getValue(ev.value, elementType,
index)
    +      s"""
    --- End diff --
    
    I see strong similarities with reverse function. Would it be possible to separate common
code into a new trait or class and subsequently reference it? 


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


Mime
View raw message