spark-reviews mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From GitBox <...@apache.org>
Subject [GitHub] [spark] cloud-fan commented on a change in pull request #27350: [SPARK-30615][SQL] Introduce Analyzer rule for V2 AlterTable column change resolution
Date Fri, 31 Jan 2020 07:45:58 GMT
cloud-fan commented on a change in pull request #27350: [SPARK-30615][SQL] Introduce Analyzer
rule for V2 AlterTable column change resolution
URL: https://github.com/apache/spark/pull/27350#discussion_r373350285
 
 

 ##########
 File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
 ##########
 @@ -3002,6 +3004,160 @@ class Analyzer(
       }
     }
   }
+
+  /** Rule to mostly resolve, normalize and rewrite column names based on case sensitivity.
*/
+  object ResolveAlterTableChanges extends Rule[LogicalPlan] {
+    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
+      case a @ AlterTable(_, _, t: NamedRelation, changes) if t.resolved =>
+        val schema = t.schema
+        val normalizedChanges = changes.flatMap {
+          case add: AddColumn =>
+            val parent = add.fieldNames().init
+            if (parent.nonEmpty) {
+              // Adding a nested field, need to normalize the parent column and position
+              val target = schema.findNestedField(parent, includeCollections = true, conf.resolver)
+              if (target.isEmpty) {
+                // Leave unresolved. Throws error in CheckAnalysis
+                Some(add)
+              } else {
+                val (normalizedName, sf) = target.get
+                sf.dataType match {
+                  case struct: StructType =>
+                    val pos = findColumnPosition(add.position(), parent.quoted, struct)
+                    Some(TableChange.addColumn(
+                      (normalizedName ++ Seq(sf.name, add.fieldNames().last)).toArray,
+                      add.dataType(),
+                      add.isNullable,
+                      add.comment,
+                      pos))
+
+                  case other =>
+                    Some(add)
+                }
+              }
+            } else {
+              // Adding to the root. Just need to normalize position
+              val pos = findColumnPosition(add.position(), "root", schema)
+              Some(TableChange.addColumn(
+                add.fieldNames(),
+                add.dataType(),
+                add.isNullable,
+                add.comment,
+                pos))
+            }
+
+          case typeChange: UpdateColumnType =>
+            // Hive style syntax provides the column type, even if it may not have changed
+            val fieldOpt = schema.findNestedField(
+              typeChange.fieldNames(), includeCollections = true, conf.resolver)
+
+            if (fieldOpt.isEmpty) {
+              // We couldn't resolve the field. Leave it to CheckAnalysis
+              Some(typeChange)
+            } else {
+              val (fieldNames, field) = fieldOpt.get
+              if (field.dataType == typeChange.newDataType()) {
+                // The user didn't want the field to change, so remove this change
 
 Review comment:
   I think we can remove other column changes if they are noop, e.g. `UpdateColumnNullability`
without changing nullability. We can address it in a followup.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


Mime
View raw message