Return-Path: X-Original-To: apmail-spark-issues-archive@minotaur.apache.org Delivered-To: apmail-spark-issues-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id B9B4018702 for ; Wed, 23 Mar 2016 21:07:25 +0000 (UTC) Received: (qmail 43598 invoked by uid 500); 23 Mar 2016 21:07:25 -0000 Delivered-To: apmail-spark-issues-archive@spark.apache.org Received: (qmail 43565 invoked by uid 500); 23 Mar 2016 21:07:25 -0000 Mailing-List: contact issues-help@spark.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list issues@spark.apache.org Received: (qmail 43555 invoked by uid 99); 23 Mar 2016 21:07:25 -0000 Received: from arcas.apache.org (HELO arcas) (140.211.11.28) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 23 Mar 2016 21:07:25 +0000 Received: from arcas.apache.org (localhost [127.0.0.1]) by arcas (Postfix) with ESMTP id 7F43E2C14F3 for ; Wed, 23 Mar 2016 21:07:25 +0000 (UTC) Date: Wed, 23 Mar 2016 21:07:25 +0000 (UTC) From: "Jacek Laskowski (JIRA)" To: issues@spark.apache.org Message-ID: In-Reply-To: References: Subject: [jira] [Reopened] (SPARK-13456) Cannot create encoders for case classes defined in Spark shell after upgrading to Scala 2.11 MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 7bit X-JIRA-FingerPrint: 30527f35849b9dde25b450d4833f0394 [ https://issues.apache.org/jira/browse/SPARK-13456?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Jacek Laskowski reopened SPARK-13456: ------------------------------------- In today's Spark 2.0.0-SNAPSHOT: {code} scala> :pa // Entering paste mode (ctrl-D to finish) import sqlContext.implicits._ case class Token(name: String, productId: Int, score: Double) val data = Token("aaa", 100, 0.12) :: Token("aaa", 200, 0.29) :: Token("bbb", 200, 0.53) :: Token("bbb", 300, 0.42) :: Nil val ds = data.toDS // Exiting paste mode, now interpreting. java.lang.NullPointerException at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.spark.sql.catalyst.encoders.OuterScopes$.getOuterScope(OuterScopes.scala:64) at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$resolveDeserializer$1.applyOrElse(Analyzer.scala:588) at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$resolveDeserializer$1.applyOrElse(Analyzer.scala:580) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:259) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:259) at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:67) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:258) at org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:248) at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.resolveDeserializer(Analyzer.scala:580) at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.resolve(ExpressionEncoder.scala:321) at org.apache.spark.sql.Dataset.(Dataset.scala:197) at org.apache.spark.sql.Dataset.(Dataset.scala:164) at org.apache.spark.sql.Dataset$.apply(Dataset.scala:53) at org.apache.spark.sql.SQLContext.createDataset(SQLContext.scala:448) at org.apache.spark.sql.SQLImplicits.localSeqToDatasetHolder(SQLImplicits.scala:152) ... 47 elided {code} > Cannot create encoders for case classes defined in Spark shell after upgrading to Scala 2.11 > -------------------------------------------------------------------------------------------- > > Key: SPARK-13456 > URL: https://issues.apache.org/jira/browse/SPARK-13456 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 2.0.0 > Reporter: Cheng Lian > Assignee: Wenchen Fan > Priority: Blocker > Fix For: 2.0.0 > > > Spark 2.0 started to use Scala 2.11 by default since [PR #10608|https://github.com/apache/spark/pull/10608]. Unfortunately, after this upgrade, Spark fails to create encoders for case classes defined in REPL: > {code} > import sqlContext.implicits._ > case class T(a: Int, b: Double) > val ds = Seq(1 -> T(1, 1D), 2 -> T(2, 2D)).toDS() > {code} > Exception thrown: > {noformat} > org.apache.spark.sql.AnalysisException: Unable to generate an encoder for inner class `T` without access to the scope that this class was defined in. > Try moving this class out of its parent class.; > at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$resolveDeserializer$1.applyOrElse(Analyzer.scala:565) > at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$resolveDeserializer$1.applyOrElse(Analyzer.scala:561) > at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:262) > at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:262) > at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70) > at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:261) > at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:267) > at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:267) > at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:304) > at scala.collection.Iterator$$anon$11.next(Iterator.scala:370) > at scala.collection.Iterator$class.foreach(Iterator.scala:742) > at scala.collection.AbstractIterator.foreach(Iterator.scala:1194) > at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59) > at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104) > at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48) > at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:308) > at scala.collection.AbstractIterator.to(Iterator.scala:1194) > at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:300) > at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1194) > at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:287) > at scala.collection.AbstractIterator.toArray(Iterator.scala:1194) > at org.apache.spark.sql.catalyst.trees.TreeNode.transformChildren(TreeNode.scala:353) > at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:267) > at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:267) > at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:267) > at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5$$anonfun$apply$11.apply(TreeNode.scala:333) > at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:245) > at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:245) > at scala.collection.immutable.List.foreach(List.scala:381) > at scala.collection.TraversableLike$class.map(TraversableLike.scala:245) > at scala.collection.immutable.List.map(List.scala:285) > at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:331) > at scala.collection.Iterator$$anon$11.next(Iterator.scala:370) > at scala.collection.Iterator$class.foreach(Iterator.scala:742) > at scala.collection.AbstractIterator.foreach(Iterator.scala:1194) > at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59) > at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104) > at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48) > at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:308) > at scala.collection.AbstractIterator.to(Iterator.scala:1194) > at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:300) > at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1194) > at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:287) > at scala.collection.AbstractIterator.toArray(Iterator.scala:1194) > at org.apache.spark.sql.catalyst.trees.TreeNode.transformChildren(TreeNode.scala:353) > at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:267) > at org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:251) > at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.resolveDeserializer(Analyzer.scala:561) > at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.resolve(ExpressionEncoder.scala:315) > at org.apache.spark.sql.Dataset.(Dataset.scala:81) > at org.apache.spark.sql.Dataset.(Dataset.scala:92) > at org.apache.spark.sql.SQLContext.createDataset(SQLContext.scala:482) > at org.apache.spark.sql.SQLImplicits.localSeqToDatasetHolder(SQLImplicits.scala:140) > ... 51 elided > {noformat} > However, existing Dataset REPL test case does pass: > {code} > test("SPARK-2576 importing SQLContext.implicits._") { > // We need to use local-cluster to test this case. > val output = runInterpreter("local-cluster[1,1,1024]", > """ > |val sqlContext = new org.apache.spark.sql.SQLContext(sc) > |import sqlContext.implicits._ > |case class TestCaseClass(value: Int) > |sc.parallelize(1 to 10).map(x => TestCaseClass(x)).toDF().collect() > | > |// Test Dataset Serialization in the REPL > |Seq(TestCaseClass(1)).toDS().collect() > """.stripMargin) > assertDoesNotContain("error:", output) > assertDoesNotContain("Exception", output) > } > {code} > One possible clue is that, {{ReplSuite}} calls {{SparkILoop}} directly, while Spark shell is started by {{o.a.s.repl.Main}}, which also sets option {{-Yrepl-class-based}}. -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org For additional commands, e-mail: issues-help@spark.apache.org