Github user cloud-fan commented on a diff in the pull request:
https://github.com/apache/spark/pull/19470#discussion_r144457796
--- Diff: sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala ---
@@ -272,25 +272,35 @@ private[orc] object OrcRelation extends HiveInspectors {
def unwrapOrcStructs(
conf: Configuration,
dataSchema: StructType,
+ requiredSchema: StructType,
maybeStructOI: Option[StructObjectInspector],
iterator: Iterator[Writable]): Iterator[InternalRow] = {
val deserializer = new OrcSerde
- val mutableRow = new SpecificInternalRow(dataSchema.map(_.dataType))
- val unsafeProjection = UnsafeProjection.create(dataSchema)
+ val mutableRow = new SpecificInternalRow(requiredSchema.map(_.dataType))
+ val unsafeProjection = UnsafeProjection.create(requiredSchema)
def unwrap(oi: StructObjectInspector): Iterator[InternalRow] = {
- val (fieldRefs, fieldOrdinals) = dataSchema.zipWithIndex.map {
- case (field, ordinal) => oi.getStructFieldRef(field.name) -> ordinal
+ val (fieldRefs, fieldOrdinals) = requiredSchema.zipWithIndex.map {
+ case (field, ordinal) =>
+ var ref = oi.getStructFieldRef(field.name)
+ if (ref == null) {
+ val maybeIndex = dataSchema.getFieldIndex(field.name)
+ if (maybeIndex.isDefined) {
+ ref = oi.getStructFieldRef("_col" + maybeIndex.get)
+ }
+ }
+ ref -> ordinal
}.unzip
- val unwrappers = fieldRefs.map(unwrapperFor)
+ val unwrappers = fieldRefs.map(r => if (r == null) null else unwrapperFor(r))
iterator.map { value =>
val raw = deserializer.deserialize(value)
var i = 0
val length = fieldRefs.length
while (i < length) {
- val fieldValue = oi.getStructFieldData(raw, fieldRefs(i))
+ val fieldRef = fieldRefs(i)
+ val fieldValue = if (fieldRef == null) null else oi.getStructFieldData(raw,
fieldRefs(i))
if (fieldValue == null) {
--- End diff --
nit:
```
if (fieldRef == null) {
row.setNull...
} else {
val fieldValue = ...
...
}
```
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
|