spark-reviews mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gengliangwang <...@git.apache.org>
Subject [GitHub] spark pull request #21004: [SPARK-23896][SQL]Improve PartitioningAwareFileIn...
Date Sun, 08 Apr 2018 18:57:40 GMT
Github user gengliangwang commented on a diff in the pull request:

    https://github.com/apache/spark/pull/21004#discussion_r179957491
  
    --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
---
    @@ -126,35 +126,35 @@ abstract class PartitioningAwareFileIndex(
         val caseInsensitiveOptions = CaseInsensitiveMap(parameters)
         val timeZoneId = caseInsensitiveOptions.get(DateTimeUtils.TIMEZONE_OPTION)
           .getOrElse(sparkSession.sessionState.conf.sessionLocalTimeZone)
    -
    -    userPartitionSchema match {
    +    val inferredPartitionSpec = PartitioningUtils.parsePartitions(
    +      leafDirs,
    +      typeInference = sparkSession.sessionState.conf.partitionColumnTypeInferenceEnabled,
    +      basePaths = basePaths,
    +      timeZoneId = timeZoneId)
    +    userSpecifiedSchema match {
           case Some(userProvidedSchema) if userProvidedSchema.nonEmpty =>
    -        val spec = PartitioningUtils.parsePartitions(
    -          leafDirs,
    -          typeInference = false,
    -          basePaths = basePaths,
    -          timeZoneId = timeZoneId)
    +        val userPartitionSchema =
    +          combineInferredAndUserSpecifiedPartitionSchema(inferredPartitionSpec)
     
    -        // Without auto inference, all of value in the `row` should be null or in StringType,
             // we need to cast into the data type that user specified.
             def castPartitionValuesToUserSchema(row: InternalRow) = {
               InternalRow((0 until row.numFields).map { i =>
    +            val expr = inferredPartitionSpec.partitionColumns.fields(i).dataType match
{
    +              case StringType => Literal.create(row.getUTF8String(i), StringType)
    +              case otherType => Literal.create(row.get(i, otherType))
    --- End diff --
    
    Here I am not very sure that all the other cases are covered.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


Mime
View raw message