spark-reviews mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ueshin <...@git.apache.org>
Subject [GitHub] spark pull request #21073: [SPARK-23936][SQL] Implement map_concat
Date Mon, 25 Jun 2018 02:47:53 GMT
Github user ueshin commented on a diff in the pull request:

    https://github.com/apache/spark/pull/21073#discussion_r197665954
  
    --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
---
    @@ -475,6 +474,231 @@ case class MapEntries(child: Expression) extends UnaryExpression
with ExpectsInp
       override def prettyName: String = "map_entries"
     }
     
    +/**
    + * Returns the union of all the given maps.
    + */
    +@ExpressionDescription(
    +  usage = "_FUNC_(map, ...) - Returns the union of all the given maps",
    +  examples = """
    +    Examples:
    +      > SELECT _FUNC_(map(1, 'a', 2, 'b'), map(2, 'c', 3, 'd'));
    +       [[1 -> "a"], [2 -> "b"], [2 -> "c"], [3 -> "d"]]
    +  """, since = "2.4.0")
    +case class MapConcat(children: Seq[Expression]) extends Expression {
    +
    +  override def checkInputDataTypes(): TypeCheckResult = {
    +    var funcName = s"function $prettyName"
    +    if (children.exists(!_.dataType.isInstanceOf[MapType])) {
    +      TypeCheckResult.TypeCheckFailure(
    +        s"input to $funcName should all be of type map, but it's " +
    +          children.map(_.dataType.simpleString).mkString("[", ", ", "]"))
    +    } else {
    +      TypeUtils.checkForSameTypeInputExpr(children.map(_.dataType), funcName)
    +    }
    +  }
    +
    +  override def dataType: MapType = {
    +    val dt = children.map(_.dataType.asInstanceOf[MapType]).headOption
    +      .getOrElse(MapType(StringType, StringType))
    +    val valueContainsNull = children.map(_.dataType.asInstanceOf[MapType])
    +      .exists(_.valueContainsNull)
    +    if (dt.valueContainsNull != valueContainsNull) {
    +      dt.copy(valueContainsNull = valueContainsNull)
    +    } else {
    +      dt
    +    }
    +  }
    +
    +  override def nullable: Boolean = children.exists(_.nullable)
    +
    +  override def eval(input: InternalRow): Any = {
    +    val maps = children.map(_.eval(input))
    +    if (maps.contains(null)) {
    +      return null
    +    }
    +    val keyArrayDatas = maps.map(_.asInstanceOf[MapData].keyArray())
    +    val valueArrayDatas = maps.map(_.asInstanceOf[MapData].valueArray())
    +
    +    val numElements = keyArrayDatas.foldLeft(0L)((sum, ad) => sum + ad.numElements())
    +    if (numElements > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
    +      throw new RuntimeException(s"Unsuccessful attempt to concat maps with $numElements
" +
    +        s"elements due to exceeding the map size limit " +
    +        s"${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}.")
    +    }
    +    val finalKeyArray = new Array[AnyRef](numElements.toInt)
    +    val finalValueArray = new Array[AnyRef](numElements.toInt)
    +    var position = 0
    +    for (i <- keyArrayDatas.indices) {
    +      val keyArray = keyArrayDatas(i).toObjectArray(dataType.keyType)
    +      val valueArray = valueArrayDatas(i).toObjectArray(dataType.valueType)
    +      Array.copy(keyArray, 0, finalKeyArray, position, keyArray.length)
    +      Array.copy(valueArray, 0, finalValueArray, position, valueArray.length)
    +      position += keyArray.length
    +    }
    +
    +    new ArrayBasedMapData(new GenericArrayData(finalKeyArray),
    +      new GenericArrayData(finalValueArray))
    +  }
    +
    +  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    +    val mapCodes = children.map(_.genCode(ctx))
    +    val keyType = dataType.keyType
    +    val valueType = dataType.valueType
    +    val argsName = ctx.freshName("args")
    +    val keyArgsName = ctx.freshName("keyArgs")
    +    val valArgsName = ctx.freshName("valArgs")
    +
    +    val mapDataClass = classOf[MapData].getName
    +    val arrayBasedMapDataClass = classOf[ArrayBasedMapData].getName
    +    val arrayDataClass = classOf[ArrayData].getName
    +
    +    val init =
    +      s"""
    +        |$mapDataClass[] $argsName = new $mapDataClass[${mapCodes.size}];
    +        |$arrayDataClass[] $keyArgsName = new $arrayDataClass[${mapCodes.size}];
    +        |$arrayDataClass[] $valArgsName = new $arrayDataClass[${mapCodes.size}];
    --- End diff --
    
    We should create these two arrays later?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


Mime
View raw message