spark-reviews mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From icexelloss <...@git.apache.org>
Subject [GitHub] spark pull request #18732: [SPARK-20396][SQL][PySpark] groupby().apply() wit...
Date Wed, 04 Oct 2017 17:42:48 GMT
Github user icexelloss commented on a diff in the pull request:

    https://github.com/apache/spark/pull/18732#discussion_r142740947
  
    --- Diff: sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
---
    @@ -435,6 +435,33 @@ class RelationalGroupedDataset protected[sql](
               df.logicalPlan.output,
               df.logicalPlan))
       }
    +
    +  private[sql] def flatMapGroupsInPandas(expr: PythonUDF): DataFrame = {
    +    require(expr.vectorized, "Must pass a vectorized python udf")
    +
    +    val output = expr.dataType match {
    +      case s: StructType => s.map {
    +        case StructField(name, dataType, nullable, metadata) =>
    +          AttributeReference(name, dataType, nullable, metadata)()
    +      }
    +    }
    +
    +    val groupingAttributes: Seq[Attribute] = groupingExprs.map {
    +      case ne: NamedExpression => ne.toAttribute
    +    }
    +
    +    val plan = FlatMapGroupsInPandas(
    +      groupingAttributes,
    +      expr,
    +      output,
    +      df.logicalPlan
    +    )
    +
    +    Dataset.ofRows(
    --- End diff --
    
    Fixed.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


Mime
View raw message