spark-reviews mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cloud-fan <...@git.apache.org>
Subject [GitHub] spark pull request #20024: [SPARK-22825][SQL] Fix incorrect results of Casti...
Date Thu, 04 Jan 2018 11:09:01 GMT
Github user cloud-fan commented on a diff in the pull request:

    https://github.com/apache/spark/pull/20024#discussion_r159628763
  
    --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
---
    @@ -199,13 +199,60 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId:
Option[String
     
       // [[func]] assumes the input is no longer null because eval already does the null
check.
       @inline private[this] def buildCast[T](a: Any, func: T => Any): Any = func(a.asInstanceOf[T])
    +  @inline private[this] def buildWriter[T](
    +      a: Any, buffer: UTF8StringBuilder, writer: (T, UTF8StringBuilder) => Unit):
Unit = {
    +    writer(a.asInstanceOf[T], buffer)
    +  }
    +
    +  private[this] def buildElemWriter(
    +      from: DataType): (Any, UTF8StringBuilder) => Unit = from match {
    +    case BinaryType => buildWriter[Array[Byte]](_, _, (b, buf) => buf.append(b))
    +    case StringType => buildWriter[UTF8String](_, _, (b, buf) => buf.append(b))
    +    case DateType => buildWriter[Int](_, _,
    +      (d, buf) => buf.append(DateTimeUtils.dateToString(d)))
    +    case TimestampType => buildWriter[Long](_, _,
    +      (t, buf) => buf.append(DateTimeUtils.timestampToString(t)))
    +    case ar: ArrayType =>
    +      buildWriter[ArrayData](_, _, (array, buf) => {
    +         buf.append("[")
    +        if (array.numElements > 0) {
    +          val writeElemToBuffer = buildElemWriter(ar.elementType)
    +          writeElemToBuffer(array.get(0, ar.elementType), buf)
    +          var i = 1
    +          while (i < array.numElements) {
    +            buf.append(", ")
    +            writeElemToBuffer(array.get(i, ar.elementType), buf)
    +            i += 1
    +          }
    +        }
    +        buf.append("]")
    +      })
    +    case _ => buildWriter[Any](_, _, (o, buf) => buf.append(String.valueOf(o)))
    +  }
     
       // UDFToString
       private[this] def castToString(from: DataType): Any => Any = from match {
         case BinaryType => buildCast[Array[Byte]](_, UTF8String.fromBytes)
         case DateType => buildCast[Int](_, d => UTF8String.fromString(DateTimeUtils.dateToString(d)))
         case TimestampType => buildCast[Long](_,
           t => UTF8String.fromString(DateTimeUtils.timestampToString(t, timeZone)))
    +    case ar: ArrayType =>
    +      buildCast[ArrayData](_, array => {
    +        val res = new UTF8StringBuilder
    +        res.append("[")
    +        if (array.numElements > 0) {
    +          val writeElemToBuffer = buildElemWriter(ar.elementType)
    +          writeElemToBuffer(array.get(0, ar.elementType), res)
    --- End diff --
    
    I don't get it, why do we need the `buildElemWriter`? I think it's as simple as
    ```
    val toUTF8String = castToString(et)
    builder.append(toUTF8String(array.get(0, et)).asInstanceOf[UTF8String])
    var i = 1
    while ...
    ```


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


Mime
View raw message