spark-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Alin Treznai (JIRA)" <j...@apache.org>
Subject [jira] [Updated] (SPARK-13230) HashMap.merged not working properly with Spark
Date Mon, 08 Feb 2016 06:40:39 GMT

     [ https://issues.apache.org/jira/browse/SPARK-13230?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]

Alin Treznai updated SPARK-13230:
---------------------------------
    Description: 
Using HashMap.merged with Spark fails with NullPointerException.

{noformat}
import org.apache.spark.{SparkConf, SparkContext}

import scala.collection.immutable.HashMap

object MergeTest {

  def mergeFn:(HashMap[String, Long], HashMap[String, Long]) => HashMap[String, Long] =
{
    case (m1, m2) => m1.merged(m2){ case (x,y) => (x._1, x._2 + y._2) }
  }

  def empty = HashMap.empty[String,Long]

  def main(args: Array[String]) = {
    val input = Seq(HashMap("A" -> 1L), HashMap("A" -> 2L, "B" -> 3L),HashMap("A"
-> 2L, "C" -> 4L))
    val conf = new SparkConf().setAppName("MergeTest").setMaster("local[*]")
    val sc = new SparkContext(conf)
    val result = sc.parallelize(input).reduce(mergeFn)
    println(s"Result=$result")
    sc.stop()
  }

}
{noformat}

Error message:

org.apache.spark.SparkDriverExecutionException: Execution error
        at org.apache.spark.scheduler.DAGScheduler.handleTaskCompletion(DAGScheduler.scala:1169)
        at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1637)
        at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1599)
        at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1588)
        at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
        at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:620)
        at org.apache.spark.SparkContext.runJob(SparkContext.scala:1832)
        at org.apache.spark.SparkContext.runJob(SparkContext.scala:1952)
        at org.apache.spark.rdd.RDD$$anonfun$reduce$1.apply(RDD.scala:1025)
        at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
        at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
        at org.apache.spark.rdd.RDD.withScope(RDD.scala:316)
        at org.apache.spark.rdd.RDD.reduce(RDD.scala:1007)
        at MergeTest$.main(MergeTest.scala:21)
        at MergeTest.main(MergeTest.scala)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:498)
Caused by: java.lang.NullPointerException
        at MergeTest$$anonfun$mergeFn$1$$anonfun$apply$1.apply(MergeTest.scala:12)
        at MergeTest$$anonfun$mergeFn$1$$anonfun$apply$1.apply(MergeTest.scala:12)
        at scala.collection.immutable.HashMap$$anon$2.apply(HashMap.scala:148)
        at scala.collection.immutable.HashMap$HashMap1.updated0(HashMap.scala:200)
        at scala.collection.immutable.HashMap$HashTrieMap.updated0(HashMap.scala:322)
        at scala.collection.immutable.HashMap$HashTrieMap.merge0(HashMap.scala:463)
        at scala.collection.immutable.HashMap.merged(HashMap.scala:117)
        at MergeTest$$anonfun$mergeFn$1.apply(MergeTest.scala:12)
        at MergeTest$$anonfun$mergeFn$1.apply(MergeTest.scala:11)
        at org.apache.spark.rdd.RDD$$anonfun$reduce$1$$anonfun$15.apply(RDD.scala:1020)
        at org.apache.spark.rdd.RDD$$anonfun$reduce$1$$anonfun$15.apply(RDD.scala:1017)
        at org.apache.spark.scheduler.JobWaiter.taskSucceeded(JobWaiter.scala:56)
        at org.apache.spark.scheduler.DAGScheduler.handleTaskCompletion(DAGScheduler.scala:1165)
        at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1637)
        at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1599)
        at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1588)
        at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)



  was:
Using HashMap.merged with Spark fails with NullPointerException.

import org.apache.spark.{SparkConf, SparkContext}

import scala.collection.immutable.HashMap

object MergeTest {

  def mergeFn:(HashMap[String, Long], HashMap[String, Long]) => HashMap[String, Long] =
{\\
    case (m1, m2) => m1.merged(m2){ case (x,y) => (x._1, x._2 + y._2) }\\
  }\\

  def empty = HashMap.empty[String,Long]

  def main(args: Array[String]) = {
    val input = Seq(HashMap("A" -> 1L), HashMap("A" -> 2L, "B" -> 3L),HashMap("A"
-> 2L, "C" -> 4L))
    val conf = new SparkConf().setAppName("MergeTest").setMaster("local[*]")
    val sc = new SparkContext(conf)
    val result = sc.parallelize(input).reduce(mergeFn)
    println(s"Result=$result")
    sc.stop()
  }

}

Error message:

org.apache.spark.SparkDriverExecutionException: Execution error
        at org.apache.spark.scheduler.DAGScheduler.handleTaskCompletion(DAGScheduler.scala:1169)
        at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1637)
        at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1599)
        at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1588)
        at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
        at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:620)
        at org.apache.spark.SparkContext.runJob(SparkContext.scala:1832)
        at org.apache.spark.SparkContext.runJob(SparkContext.scala:1952)
        at org.apache.spark.rdd.RDD$$anonfun$reduce$1.apply(RDD.scala:1025)
        at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
        at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
        at org.apache.spark.rdd.RDD.withScope(RDD.scala:316)
        at org.apache.spark.rdd.RDD.reduce(RDD.scala:1007)
        at MergeTest$.main(MergeTest.scala:21)
        at MergeTest.main(MergeTest.scala)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:498)
Caused by: java.lang.NullPointerException
        at MergeTest$$anonfun$mergeFn$1$$anonfun$apply$1.apply(MergeTest.scala:12)
        at MergeTest$$anonfun$mergeFn$1$$anonfun$apply$1.apply(MergeTest.scala:12)
        at scala.collection.immutable.HashMap$$anon$2.apply(HashMap.scala:148)
        at scala.collection.immutable.HashMap$HashMap1.updated0(HashMap.scala:200)
        at scala.collection.immutable.HashMap$HashTrieMap.updated0(HashMap.scala:322)
        at scala.collection.immutable.HashMap$HashTrieMap.merge0(HashMap.scala:463)
        at scala.collection.immutable.HashMap.merged(HashMap.scala:117)
        at MergeTest$$anonfun$mergeFn$1.apply(MergeTest.scala:12)
        at MergeTest$$anonfun$mergeFn$1.apply(MergeTest.scala:11)
        at org.apache.spark.rdd.RDD$$anonfun$reduce$1$$anonfun$15.apply(RDD.scala:1020)
        at org.apache.spark.rdd.RDD$$anonfun$reduce$1$$anonfun$15.apply(RDD.scala:1017)
        at org.apache.spark.scheduler.JobWaiter.taskSucceeded(JobWaiter.scala:56)
        at org.apache.spark.scheduler.DAGScheduler.handleTaskCompletion(DAGScheduler.scala:1165)
        at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1637)
        at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1599)
        at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1588)
        at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)




> HashMap.merged not working properly with Spark
> ----------------------------------------------
>
>                 Key: SPARK-13230
>                 URL: https://issues.apache.org/jira/browse/SPARK-13230
>             Project: Spark
>          Issue Type: Bug
>          Components: Spark Core
>    Affects Versions: 1.6.0
>         Environment: Ubuntu 14.04.3, Scala 2.11.7, Spark 1.6.0
>            Reporter: Alin Treznai
>
> Using HashMap.merged with Spark fails with NullPointerException.
> {noformat}
> import org.apache.spark.{SparkConf, SparkContext}
> import scala.collection.immutable.HashMap
> object MergeTest {
>   def mergeFn:(HashMap[String, Long], HashMap[String, Long]) => HashMap[String, Long]
= {
>     case (m1, m2) => m1.merged(m2){ case (x,y) => (x._1, x._2 + y._2) }
>   }
>   def empty = HashMap.empty[String,Long]
>   def main(args: Array[String]) = {
>     val input = Seq(HashMap("A" -> 1L), HashMap("A" -> 2L, "B" -> 3L),HashMap("A"
-> 2L, "C" -> 4L))
>     val conf = new SparkConf().setAppName("MergeTest").setMaster("local[*]")
>     val sc = new SparkContext(conf)
>     val result = sc.parallelize(input).reduce(mergeFn)
>     println(s"Result=$result")
>     sc.stop()
>   }
> }
> {noformat}
> Error message:
> org.apache.spark.SparkDriverExecutionException: Execution error
>         at org.apache.spark.scheduler.DAGScheduler.handleTaskCompletion(DAGScheduler.scala:1169)
>         at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1637)
>         at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1599)
>         at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1588)
>         at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
>         at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:620)
>         at org.apache.spark.SparkContext.runJob(SparkContext.scala:1832)
>         at org.apache.spark.SparkContext.runJob(SparkContext.scala:1952)
>         at org.apache.spark.rdd.RDD$$anonfun$reduce$1.apply(RDD.scala:1025)
>         at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
>         at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
>         at org.apache.spark.rdd.RDD.withScope(RDD.scala:316)
>         at org.apache.spark.rdd.RDD.reduce(RDD.scala:1007)
>         at MergeTest$.main(MergeTest.scala:21)
>         at MergeTest.main(MergeTest.scala)
>         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>         at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>         at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>         at java.lang.reflect.Method.invoke(Method.java:498)
> Caused by: java.lang.NullPointerException
>         at MergeTest$$anonfun$mergeFn$1$$anonfun$apply$1.apply(MergeTest.scala:12)
>         at MergeTest$$anonfun$mergeFn$1$$anonfun$apply$1.apply(MergeTest.scala:12)
>         at scala.collection.immutable.HashMap$$anon$2.apply(HashMap.scala:148)
>         at scala.collection.immutable.HashMap$HashMap1.updated0(HashMap.scala:200)
>         at scala.collection.immutable.HashMap$HashTrieMap.updated0(HashMap.scala:322)
>         at scala.collection.immutable.HashMap$HashTrieMap.merge0(HashMap.scala:463)
>         at scala.collection.immutable.HashMap.merged(HashMap.scala:117)
>         at MergeTest$$anonfun$mergeFn$1.apply(MergeTest.scala:12)
>         at MergeTest$$anonfun$mergeFn$1.apply(MergeTest.scala:11)
>         at org.apache.spark.rdd.RDD$$anonfun$reduce$1$$anonfun$15.apply(RDD.scala:1020)
>         at org.apache.spark.rdd.RDD$$anonfun$reduce$1$$anonfun$15.apply(RDD.scala:1017)
>         at org.apache.spark.scheduler.JobWaiter.taskSucceeded(JobWaiter.scala:56)
>         at org.apache.spark.scheduler.DAGScheduler.handleTaskCompletion(DAGScheduler.scala:1165)
>         at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1637)
>         at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1599)
>         at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1588)
>         at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org


Mime
View raw message