spark-reviews mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cloud-fan <...@git.apache.org>
Subject [GitHub] spark pull request #19470: [SPARK-14387][SPARK-18355][SQL] Use Spark schema ...
Date Fri, 13 Oct 2017 03:13:02 GMT
Github user cloud-fan commented on a diff in the pull request:

    https://github.com/apache/spark/pull/19470#discussion_r144457796
  
    --- Diff: sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala ---
    @@ -272,25 +272,35 @@ private[orc] object OrcRelation extends HiveInspectors {
       def unwrapOrcStructs(
           conf: Configuration,
           dataSchema: StructType,
    +      requiredSchema: StructType,
           maybeStructOI: Option[StructObjectInspector],
           iterator: Iterator[Writable]): Iterator[InternalRow] = {
         val deserializer = new OrcSerde
    -    val mutableRow = new SpecificInternalRow(dataSchema.map(_.dataType))
    -    val unsafeProjection = UnsafeProjection.create(dataSchema)
    +    val mutableRow = new SpecificInternalRow(requiredSchema.map(_.dataType))
    +    val unsafeProjection = UnsafeProjection.create(requiredSchema)
     
         def unwrap(oi: StructObjectInspector): Iterator[InternalRow] = {
    -      val (fieldRefs, fieldOrdinals) = dataSchema.zipWithIndex.map {
    -        case (field, ordinal) => oi.getStructFieldRef(field.name) -> ordinal
    +      val (fieldRefs, fieldOrdinals) = requiredSchema.zipWithIndex.map {
    +        case (field, ordinal) =>
    +          var ref = oi.getStructFieldRef(field.name)
    +          if (ref == null) {
    +            val maybeIndex = dataSchema.getFieldIndex(field.name)
    +            if (maybeIndex.isDefined) {
    +              ref = oi.getStructFieldRef("_col" + maybeIndex.get)
    +            }
    +          }
    +          ref -> ordinal
           }.unzip
     
    -      val unwrappers = fieldRefs.map(unwrapperFor)
    +      val unwrappers = fieldRefs.map(r => if (r == null) null else unwrapperFor(r))
     
           iterator.map { value =>
             val raw = deserializer.deserialize(value)
             var i = 0
             val length = fieldRefs.length
             while (i < length) {
    -          val fieldValue = oi.getStructFieldData(raw, fieldRefs(i))
    +          val fieldRef = fieldRefs(i)
    +          val fieldValue = if (fieldRef == null) null else oi.getStructFieldData(raw,
fieldRefs(i))
               if (fieldValue == null) {
    --- End diff --
    
    nit:
    ```
    if (fieldRef == null) {
      row.setNull...
    } else {
      val fieldValue = ...
      ...
    }
    ```


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


Mime
View raw message