spark-reviews mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ericl <...@git.apache.org>
Subject [GitHub] spark pull request #16944: [SPARK-19611][SQL] Introduce configurable table s...
Date Fri, 24 Feb 2017 00:16:19 GMT
Github user ericl commented on a diff in the pull request:

    https://github.com/apache/spark/pull/16944#discussion_r102852766
  
    --- Diff: sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
---
    @@ -161,22 +164,45 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession)
extends Log
               bucketSpec,
               Some(partitionSchema))
     
    +        val catalogTable = metastoreRelation.catalogTable
             val logicalRelation = cached.getOrElse {
               val sizeInBytes =
                 metastoreRelation.stats(sparkSession.sessionState.conf).sizeInBytes.toLong
               val fileIndex = {
    -            val index = new CatalogFileIndex(
    -              sparkSession, metastoreRelation.catalogTable, sizeInBytes)
    +            val index = new CatalogFileIndex(sparkSession, catalogTable, sizeInBytes)
                 if (lazyPruningEnabled) {
                   index
                 } else {
                   index.filterPartitions(Nil)  // materialize all the partitions in memory
                 }
               }
               val partitionSchemaColumnNames = partitionSchema.map(_.name.toLowerCase).toSet
    -          val dataSchema =
    -            StructType(metastoreSchema
    -              .filterNot(field => partitionSchemaColumnNames.contains(field.name.toLowerCase)))
    +          val filteredMetastoreSchema = StructType(metastoreSchema
    +            .filterNot(field => partitionSchemaColumnNames.contains(field.name.toLowerCase)))
    +
    +          // Infer a case-sensitive schema when the metastore doesn't return one, if
configured.
    +          val inferredSchema = inferSchema(
    +            catalogTable,
    +            metastoreSchema,
    +            options,
    +            defaultSource,
    +            fileType,
    +            fileIndex)
    +
    +          // If configured, save the inferred case-sensitive schema to the table properties
and
    +          // fetch the updated CatalogTable record for use in the LogicalRelation.
    +          val updatedCatalogTable = updateCatalogTable(catalogTable, inferredSchema)
    +
    +          val dataSchema = inferenceMode match {
    +            case (INFER_AND_SAVE | INFER_ONLY) if (!catalogTable.schemaPreservesCase)
=>
    +              inferredSchema.getOrElse {
    +                logWarning(s"Unable to infer schema for table $tableIdentifier from file
format " +
    +                  s"$defaultSource (inference mode: $inferenceMode); using metastore
schema.")
    +                filteredMetastoreSchema
    +              }
    +            case _ =>
    +              filteredMetastoreSchema
    +          }
    --- End diff --
    
    You could return a tuple in that case. I think this would work, though now it is getting
a bit messy too:
    
    ```
    val shouldInferSchema = !catalogTable.schemaPreservesCase && inferenceMode !=
NEVER_INFER
    val (dataSchema, updatedTable) = if (shouldInferSchema) {
      val inferredSchema = inferSchema(...)
      if (inferredSchema.isDefined && inferenceMode == INFER_AND_SAVE) {
        try {
           val newTable = updateCatalogTableSchema(...)
           (inferredSchema, newTable)
        } catch {
           ...
           (inferredSchema, catalogTable)
        }
      } else {
        (filteredMetastoreSchema, catalogTable)
      }
    } else {
      (filteredMetastoreSchema, catalogTable)
    }
    ```


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


Mime
View raw message