spark-reviews mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jaceklaskowski <...@git.apache.org>
Subject [GitHub] spark pull request #14313: [SPARK-16674][SQL] Avoid per-record type dispatch...
Date Sat, 23 Jul 2016 19:51:52 GMT
Github user jaceklaskowski commented on a diff in the pull request:

    https://github.com/apache/spark/pull/14313#discussion_r71977329
  
    --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
---
    @@ -322,46 +322,134 @@ private[sql] class JDBCRDD(
         }
       }
     
    -  // Each JDBC-to-Catalyst conversion corresponds to a tag defined here so that
    -  // we don't have to potentially poke around in the Metadata once for every
    -  // row.
    -  // Is there a better way to do this?  I'd rather be using a type that
    -  // contains only the tags I define.
    -  abstract class JDBCConversion
    -  case object BooleanConversion extends JDBCConversion
    -  case object DateConversion extends JDBCConversion
    -  case class  DecimalConversion(precision: Int, scale: Int) extends JDBCConversion
    -  case object DoubleConversion extends JDBCConversion
    -  case object FloatConversion extends JDBCConversion
    -  case object IntegerConversion extends JDBCConversion
    -  case object LongConversion extends JDBCConversion
    -  case object BinaryLongConversion extends JDBCConversion
    -  case object StringConversion extends JDBCConversion
    -  case object TimestampConversion extends JDBCConversion
    -  case object BinaryConversion extends JDBCConversion
    -  case class ArrayConversion(elementConversion: JDBCConversion) extends JDBCConversion
    +  // A `JDBCConversion` is responsible for converting a value from `ResultSet`
    +  // to a value in a field for `InternalRow`.
    +  private type JDBCConversion = (ResultSet, Int) => Any
    +
    +  // This `ArrayElementConversion` is responsible for converting elements in
    +  // an array from `ResultSet`.
    +  private type ArrayElementConversion = (Object) => Any
     
       /**
    -   * Maps a StructType to a type tag list.
    +   * Maps a StructType to conversions for each type.
        */
       def getConversions(schema: StructType): Array[JDBCConversion] =
         schema.fields.map(sf => getConversions(sf.dataType, sf.metadata))
     
       private def getConversions(dt: DataType, metadata: Metadata): JDBCConversion = dt match
{
    -    case BooleanType => BooleanConversion
    -    case DateType => DateConversion
    -    case DecimalType.Fixed(p, s) => DecimalConversion(p, s)
    -    case DoubleType => DoubleConversion
    -    case FloatType => FloatConversion
    -    case IntegerType => IntegerConversion
    -    case LongType => if (metadata.contains("binarylong")) BinaryLongConversion else
LongConversion
    -    case StringType => StringConversion
    -    case TimestampType => TimestampConversion
    -    case BinaryType => BinaryConversion
    -    case ArrayType(et, _) => ArrayConversion(getConversions(et, metadata))
    +    case BooleanType =>
    +      (rs: ResultSet, pos: Int) => rs.getBoolean(pos)
    +
    +    case DateType =>
    +      (rs: ResultSet, pos: Int) =>
    +        // DateTimeUtils.fromJavaDate does not handle null value, so we need to check
it.
    +        val dateVal = rs.getDate(pos)
    +        if (dateVal != null) {
    +          DateTimeUtils.fromJavaDate(dateVal)
    +        } else {
    +          null
    +        }
    +
    +    case DecimalType.Fixed(p, s) =>
    +      (rs: ResultSet, pos: Int) =>
    +        val decimalVal = rs.getBigDecimal(pos)
    +        if (decimalVal == null) {
    --- End diff --
    
    Same as above (plus you're checking equality with `null` opposite to the above -- consistency
violated)


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


Mime
View raw message