spark-reviews mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cloud-fan <...@git.apache.org>
Subject [GitHub] spark pull request: [SPARK-12720] [SQL] SQL Generation Support for...
Date Thu, 03 Mar 2016 01:06:35 GMT
Github user cloud-fan commented on a diff in the pull request:

    https://github.com/apache/spark/pull/11283#discussion_r54821660
  
    --- Diff: sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala ---
    @@ -211,6 +215,76 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext)
extends Loggi
         )
       }
     
    +  private def groupingSetToSQL(
    +      plan: Aggregate,
    +      expand: Expand,
    +      project: Project): String = {
    +    // The last column of Expand is always grouping ID
    +    val gid = expand.output.last
    +
    +    // In cube/rollup/groupingsets, Analyzer creates new aliases for all group by expressions.
    +    // Since conversion from attribute back SQL ignore expression IDs, the alias of attribute
    +    // references are ignored in aliasMap
    +    val aliasMap = AttributeMap(project.projectList.collect {
    +      case a @ Alias(child, name) if !child.isInstanceOf[AttributeReference] => (a.toAttribute,
a)
    +    })
    +
    +    val groupingExprs = plan.groupingExpressions.filterNot {
    +      // VirtualColumn.groupingIdName is added by Analyzer, and thus remove it.
    +      case a: AttributeReference => a == gid
    +      case o => false
    +    }.map {
    +      case a: AttributeReference if aliasMap.contains(a) => aliasMap(a).child
    +      case o => o
    +    }
    +
    +    val groupingSQL = groupingExprs.map(_.sql).mkString(", ")
    +
    +    val groupingSet = expand.projections.map(_.dropRight(1).filter {
    +      case e: Expression if plan.groupingExpressions.exists(_.semanticEquals(e)) =>
true
    +      case _ => false
    +    }.map {
    +      case a: AttributeReference if aliasMap.contains(a) => aliasMap(a).child
    +      case o => o
    +    })
    +
    +    val aggExprs = plan.aggregateExpressions.map { case expr =>
    +      expr.transformDown {
    +        case a @ Alias(child: AttributeReference, name) if child eq gid =>
    +          // grouping_id() is converted to VirtualColumn.groupingIdName by Analyzer.
Revert it back.
    +          Alias(GroupingID(Nil), name)()
    +        case a @ Alias(_ @ Cast(BitwiseAnd(
    +            ShiftRight(ar: AttributeReference, _ @ Literal(value: Any, IntegerType)),
    +            Literal(1, IntegerType)), ByteType), name) if ar == gid =>
    +          // for converting an expression to its original SQL format grouping(col)
    +          val idx = groupingExprs.length - 1 - value.asInstanceOf[Int]
    +          val groupingCol = groupingExprs.lift(idx)
    +          if (groupingCol.isDefined) {
    +            Grouping(groupingCol.get)
    +          } else {
    +            throw new UnsupportedOperationException(s"unsupported operator $a")
    +          }
    +        case a @ Alias(child: AttributeReference, name) if aliasMap.contains(child) =>
    +          aliasMap(child).child
    +        case o => o
    +      }
    +    }
    +
    +    val groupingSetSQL =
    +      "GROUPING SETS(" +
    +        groupingSet.map(e => s"(${e.map(_.sql).mkString(", ")})").mkString(", ") +
")"
    --- End diff --
    
    if a grouping set is empty, should we generate `()` for it?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


Mime
View raw message