spark-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Harsh J <ha...@cloudera.com>
Subject Re: how to reference aggregate columns
Date Thu, 10 Dec 2015 03:01:20 GMT
While the DataFrame lookups can identify that anonymous column name,
SparkSql does not appear to do so. You should use an alias instead:

val rows = Seq (("X", 1), ("Y", 5), ("Z", 4))
val rdd = sc.parallelize(rows)
val dataFrame = rdd.toDF("user","clicks")
val sumDf = dataFrame.groupBy("user").agg(sum("clicks").alias("csum"))
sumDf.registerTempTable("tempTable")
val ttrows2 = sqlContext.sql("select max(csum) from tempTable")

On Thu, Dec 10, 2015 at 7:35 AM skaarthik oss <skaarthik.oss@gmail.com>
wrote:

> I am trying to process an aggregate column. The name of the column is
> "SUM(clicks)" which is automatically assigned when I use SUM operator on a
> column named "clicks". I am trying to find the max value in this aggregated
> column. However, using max operator on this aggregated columns results in
> parser error.
>
> How can I reference an aggregated column when applying operators?
>
> Code to repro this behavior:
> val rows = Seq (("X", 1), ("Y", 5), ("Z", 4))
> val rdd = sc.parallelize(rows)
> val dataFrame = rdd.toDF("user","clicks")
> val sumDf = dataFrame.groupBy("user").agg(("clicks", "sum"))
> sumDf.registerTempTable("tempTable")
> val ttrows = sqlContext.sql("select * from tempTable")
> ttrows.show  /* column names are "user" & "SUM(clicks))" */
> val ttrows2 = sqlContext.sql("select max(SUM(clicks)) from tempTable") /*
> this line results in following error */
> val sumCol = ttrows.select("SUM(clicks)") /* this works fine */
>
>
> 15/12/09 17:50:42.938 INFO ParseDriver: Parsing command: select
> max(SUM(clicks)) from tempTable
> 15/12/09 17:50:42.938 INFO ParseDriver: Parse Completed
> org.apache.spark.sql.AnalysisException: cannot resolve 'clicks' given
> input columns user, SUM(clicks); line 1 pos 15
>         at
> org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42)
>         at
> org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$2.applyOrElse(CheckAnalysis.scala:63)
>         at
> org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$2.applyOrElse(CheckAnalysis.scala:52)
>         at
> org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:286)
>         at
> org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:286)
>         at
> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:51)
>         at
> org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:285)
>         at
> org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:299)
>         at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
>         at scala.collection.Iterator$class.foreach(Iterator.scala:727)
>         at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
>         at
> scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48)
>         at
> scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103)
>         at
> scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47)
>         at scala.collection.TraversableOnce$class.to
> (TraversableOnce.scala:273)
>         at scala.collection.AbstractIterator.to(Iterator.scala:1157)
>         at
> scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265)
>         at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157)
>         at
> scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252)
>         at scala.collection.AbstractIterator.toArray(Iterator.scala:1157)
>         at
> org.apache.spark.sql.catalyst.trees.TreeNode.transformChildrenUp(TreeNode.scala:329)
>         at
> org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:283)
>         at
> org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:299)
>         at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
>         at scala.collection.Iterator$class.foreach(Iterator.scala:727)
>         at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
>         at
> scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48)
>         at
> scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103)
>         at
> scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47)
>         at scala.collection.TraversableOnce$class.to
> (TraversableOnce.scala:273)
>         at scala.collection.AbstractIterator.to(Iterator.scala:1157)
>         at
> scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265)
>         at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157)
>         at
> scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252)
>         at scala.collection.AbstractIterator.toArray(Iterator.scala:1157)
>         at
> org.apache.spark.sql.catalyst.trees.TreeNode.transformChildrenUp(TreeNode.scala:329)
>         at
> org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:283)
>         at
> org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:299)
>         at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
>         at scala.collection.Iterator$class.foreach(Iterator.scala:727)
>         at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
>         at
> scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48)
>         at
> scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103)
>         at
> scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47)
>         at scala.collection.TraversableOnce$class.to
> (TraversableOnce.scala:273)
>         at scala.collection.AbstractIterator.to(Iterator.scala:1157)
>         at
> scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265)
>         at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157)
>         at
> scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252)
>         at scala.collection.AbstractIterator.toArray(Iterator.scala:1157)
>         at
> org.apache.spark.sql.catalyst.trees.TreeNode.transformChildrenUp(TreeNode.scala:329)
>         at
> org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:283)
>         at org.apache.spark.sql.catalyst.plans.QueryPlan.org
> $apache$spark$sql$catalyst$plans$QueryPlan$$transformExpressionUp$1(QueryPlan.scala:108)
>         at
> org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$2$$anonfun$apply$2.apply(QueryPlan.scala:123)
>         at
> scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
>         at
> scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
>         at
> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
>         at
> scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
>         at
> scala.collection.TraversableLike$class.map(TraversableLike.scala:244)
>         at scala.collection.AbstractTraversable.map(Traversable.scala:105)
>         at
> org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$2.apply(QueryPlan.scala:122)
>         at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
>         at scala.collection.Iterator$class.foreach(Iterator.scala:727)
>         at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
>         at
> scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48)
>         at
> scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103)
>         at
> scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47)
>         at scala.collection.TraversableOnce$class.to
> (TraversableOnce.scala:273)
>         at scala.collection.AbstractIterator.to(Iterator.scala:1157)
>         at
> scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265)
>         at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157)
>         at
> scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252)
>         at scala.collection.AbstractIterator.toArray(Iterator.scala:1157)
>         at
> org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpressionsUp(QueryPlan.scala:127)
>         at
> org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:52)
>         at
> org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:50)
>         at
> org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:98)
>         at
> org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:50)
>         at
> org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:42)
>         at
> org.apache.spark.sql.SQLContext$QueryExecution.assertAnalyzed(SQLContext.scala:931)
>         at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:131)
>         at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:51)
>         at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:755)
>         at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:19)
>         at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:24)
>         at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:26)
>         at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:28)
>         at $iwC$$iwC$$iwC$$iwC.<init>(<console>:30)
>         at $iwC$$iwC$$iwC.<init>(<console>:32)
>         at $iwC$$iwC.<init>(<console>:34)
>         at $iwC.<init>(<console>:36)
>         at <init>(<console>:38)
>         at .<init>(<console>:42)
>         at .<clinit>(<console>)
>         at .<init>(<console>:7)
>         at .<clinit>(<console>)
>         at $print(<console>)
>         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>         at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
>         at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>         at java.lang.reflect.Method.invoke(Method.java:606)
>         at
> org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)
>         at
> org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1338)
>         at
> org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)
>         at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871)
>         at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819)
>         at
> org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857)
>         at
> org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902)
>         at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814)
>         at
> org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657)
>         at
> org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665)
>         at org.apache.spark.repl.SparkILoop.org
> $apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:670)
>         at
> org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:997)
>         at
> org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
>         at
> org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
>         at
> scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
>         at org.apache.spark.repl.SparkILoop.org
> $apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945)
>         at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059)
>         at org.apache.spark.repl.Main$.main(Main.scala:31)
>         at org.apache.spark.repl.Main.main(Main.scala)
>         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>         at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
>         at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>         at java.lang.reflect.Method.invoke(Method.java:606)
>         at
> org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:665)
>         at
> org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:170)
>         at
> org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:193)
>         at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:112)
>         at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
>

Mime
View raw message