spark-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Vida Ha (JIRA)" <j...@apache.org>
Subject [jira] [Updated] (SPARK-3390) sqlContext.jsonRDD fails on a complex structure of array and hashmap nesting
Date Thu, 04 Sep 2014 00:52:51 GMT

     [ https://issues.apache.org/jira/browse/SPARK-3390?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]

Vida Ha updated SPARK-3390:
---------------------------
    Description: 
I found a valid JSON string, but which Spark SQL fails to correctly parse

Try running these lines in a spark-shell:

{code:borderStyle=solid}
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
val badJson = "{\"foo\": [[{\"bar\": 0}]]}"
val rdd = sc.parallelize(badJson :: Nil)
sqlContext.jsonRDD(rdd).count()
{code}

I've tried running these lines on the 1.0.2 release as well latest Spark1.1 release candidate,
and I get this stack trace:

{panel}
org.apache.spark.SparkException: Job aborted due to stage failure: Task 2.0:3 failed 1 times,
most recent failure: Exception failure in TID 7 on host localhost: scala.MatchError: StructType(List())
(of class org.apache.spark.sql.catalyst.types.StructType)
        org.apache.spark.sql.json.JsonRDD$.enforceCorrectType(JsonRDD.scala:333)
        org.apache.spark.sql.json.JsonRDD$$anonfun$enforceCorrectType$1.apply(JsonRDD.scala:335)
        scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
        scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
        scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
        scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
        scala.collection.TraversableLike$class.map(TraversableLike.scala:244)
        scala.collection.AbstractTraversable.map(Traversable.scala:105)
        org.apache.spark.sql.json.JsonRDD$.enforceCorrectType(JsonRDD.scala:335)
        org.apache.spark.sql.json.JsonRDD$$anonfun$enforceCorrectType$1.apply(JsonRDD.scala:335)
        scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
        scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
        scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
        scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
        scala.collection.TraversableLike$class.map(TraversableLike.scala:244)
        scala.collection.AbstractTraversable.map(Traversable.scala:105)
        org.apache.spark.sql.json.JsonRDD$.enforceCorrectType(JsonRDD.scala:335)
        org.apache.spark.sql.json.JsonRDD$$anonfun$org$apache$spark$sql$json$JsonRDD$$asRow$1$$anonfun$apply$12.apply(JsonRDD.scala:365)
        scala.Option.map(Option.scala:145)
        org.apache.spark.sql.json.JsonRDD$$anonfun$org$apache$spark$sql$json$JsonRDD$$asRow$1.apply(JsonRDD.scala:364)
        org.apache.spark.sql.json.JsonRDD$$anonfun$org$apache$spark$sql$json$JsonRDD$$asRow$1.apply(JsonRDD.scala:349)
        scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
        scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
        org.apache.spark.sql.json.JsonRDD$.org$apache$spark$sql$json$JsonRDD$$asRow(JsonRDD.scala:349)
        org.apache.spark.sql.json.JsonRDD$$anonfun$createLogicalPlan$1.apply(JsonRDD.scala:51)
        org.apache.spark.sql.json.JsonRDD$$anonfun$createLogicalPlan$1.apply(JsonRDD.scala:51)
        scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
        scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
....
{panel}

  was:
I found a valid JSON string, but which Spark SQL fails to correctly parse

Try running these lines in a spark-shell:

{code:borderStyle=solid}
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
val badJson = "{\"foo\": [[{\"bar\": 0}]]}"
val rdd = sc.parallelize(badJson :: Nil)
sqlContext.jsonRDD(rdd).count()
{code}

I've tried running these lines on the 1.0.2 release as well latest Spark1.1 release candidate,
and I get this stack trace:

org.apache.spark.SparkException: Job aborted due to stage failure: Task 2.0:3 failed 1 times,
most recent failure: Exception failure in TID 7 on host localhost: scala.MatchError: StructType(List())
(of class org.apache.spark.sql.catalyst.types.StructType)
        org.apache.spark.sql.json.JsonRDD$.enforceCorrectType(JsonRDD.scala:333)
        org.apache.spark.sql.json.JsonRDD$$anonfun$enforceCorrectType$1.apply(JsonRDD.scala:335)
        scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
        scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
        scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
        scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
        scala.collection.TraversableLike$class.map(TraversableLike.scala:244)
        scala.collection.AbstractTraversable.map(Traversable.scala:105)
        org.apache.spark.sql.json.JsonRDD$.enforceCorrectType(JsonRDD.scala:335)
        org.apache.spark.sql.json.JsonRDD$$anonfun$enforceCorrectType$1.apply(JsonRDD.scala:335)
        scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
        scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
        scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
        scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
        scala.collection.TraversableLike$class.map(TraversableLike.scala:244)
        scala.collection.AbstractTraversable.map(Traversable.scala:105)
        org.apache.spark.sql.json.JsonRDD$.enforceCorrectType(JsonRDD.scala:335)
        org.apache.spark.sql.json.JsonRDD$$anonfun$org$apache$spark$sql$json$JsonRDD$$asRow$1$$anonfun$apply$12.apply(JsonRDD.scala:365)
        scala.Option.map(Option.scala:145)
        org.apache.spark.sql.json.JsonRDD$$anonfun$org$apache$spark$sql$json$JsonRDD$$asRow$1.apply(JsonRDD.scala:364)
        org.apache.spark.sql.json.JsonRDD$$anonfun$org$apache$spark$sql$json$JsonRDD$$asRow$1.apply(JsonRDD.scala:349)
        scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
        scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
        org.apache.spark.sql.json.JsonRDD$.org$apache$spark$sql$json$JsonRDD$$asRow(JsonRDD.scala:349)
        org.apache.spark.sql.json.JsonRDD$$anonfun$createLogicalPlan$1.apply(JsonRDD.scala:51)
        org.apache.spark.sql.json.JsonRDD$$anonfun$createLogicalPlan$1.apply(JsonRDD.scala:51)
        scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
        scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
....


> sqlContext.jsonRDD fails on a complex structure of array and hashmap nesting
> ----------------------------------------------------------------------------
>
>                 Key: SPARK-3390
>                 URL: https://issues.apache.org/jira/browse/SPARK-3390
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 1.0.2
>            Reporter: Vida Ha
>            Assignee: Yin Huai
>            Priority: Critical
>
> I found a valid JSON string, but which Spark SQL fails to correctly parse
> Try running these lines in a spark-shell:
> {code:borderStyle=solid}
> val sqlContext = new org.apache.spark.sql.SQLContext(sc)
> val badJson = "{\"foo\": [[{\"bar\": 0}]]}"
> val rdd = sc.parallelize(badJson :: Nil)
> sqlContext.jsonRDD(rdd).count()
> {code}
> I've tried running these lines on the 1.0.2 release as well latest Spark1.1 release candidate,
and I get this stack trace:
> {panel}
> org.apache.spark.SparkException: Job aborted due to stage failure: Task 2.0:3 failed
1 times, most recent failure: Exception failure in TID 7 on host localhost: scala.MatchError:
StructType(List()) (of class org.apache.spark.sql.catalyst.types.StructType)
>         org.apache.spark.sql.json.JsonRDD$.enforceCorrectType(JsonRDD.scala:333)
>         org.apache.spark.sql.json.JsonRDD$$anonfun$enforceCorrectType$1.apply(JsonRDD.scala:335)
>         scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
>         scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
>         scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
>         scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
>         scala.collection.TraversableLike$class.map(TraversableLike.scala:244)
>         scala.collection.AbstractTraversable.map(Traversable.scala:105)
>         org.apache.spark.sql.json.JsonRDD$.enforceCorrectType(JsonRDD.scala:335)
>         org.apache.spark.sql.json.JsonRDD$$anonfun$enforceCorrectType$1.apply(JsonRDD.scala:335)
>         scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
>         scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
>         scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
>         scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
>         scala.collection.TraversableLike$class.map(TraversableLike.scala:244)
>         scala.collection.AbstractTraversable.map(Traversable.scala:105)
>         org.apache.spark.sql.json.JsonRDD$.enforceCorrectType(JsonRDD.scala:335)
>         org.apache.spark.sql.json.JsonRDD$$anonfun$org$apache$spark$sql$json$JsonRDD$$asRow$1$$anonfun$apply$12.apply(JsonRDD.scala:365)
>         scala.Option.map(Option.scala:145)
>         org.apache.spark.sql.json.JsonRDD$$anonfun$org$apache$spark$sql$json$JsonRDD$$asRow$1.apply(JsonRDD.scala:364)
>         org.apache.spark.sql.json.JsonRDD$$anonfun$org$apache$spark$sql$json$JsonRDD$$asRow$1.apply(JsonRDD.scala:349)
>         scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
>         scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
>         org.apache.spark.sql.json.JsonRDD$.org$apache$spark$sql$json$JsonRDD$$asRow(JsonRDD.scala:349)
>         org.apache.spark.sql.json.JsonRDD$$anonfun$createLogicalPlan$1.apply(JsonRDD.scala:51)
>         org.apache.spark.sql.json.JsonRDD$$anonfun$createLogicalPlan$1.apply(JsonRDD.scala:51)
>         scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
>         scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
> ....
> {panel}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org


Mime
View raw message