spark-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Bob DuCharme <...@snee.com>
Subject Populating a HashMap from a GraphX connectedComponents graph
Date Thu, 26 Mar 2015 13:24:22 GMT
The Scala code below was based on 
https://www.sics.se/~amir/files/download/dic/answers6.pdf. I extended it 
by adding a HashMap called componentLists that I populated with each 
component's starting node as the key and then a ListBuffer of the 
component's members. As the output below the code shows, it seems to do 
all that just fine, but then the HashMap size is back down to 0 when 
it's done (flag3), so I assume there's some scoping issue related to the 
use(s) of the case keyword. (I'm new to Scala and still don't completely 
understand that.)

Can anyone tell me how to modify this so that I still have a populated 
componentLists when it's all done, i.e. when it reaches flag3?

Thanks,

Bob

///////////////
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.graphx._
import org.apache.spark.rdd.RDD
import scala.collection.mutable.ListBuffer
import scala.collection.mutable.HashMap

object problemDemo {
     def main(args: Array[String]) {
          val sc = new SparkContext("local", "ProblemDemo", "127.0.0.1")

         val vertexArray = Array(
             (1L, "Alice"), (2L, "Bob"), (3L, "Charlie"),
             (4L, "David"), (5L, "Ed"),  (6L, "Fran")
         )
         val edgeArray = Array(
             Edge(2L, 1L, "knows"), Edge(2L, 3L, "knows"),
             Edge(3L, 1L, "knows"), Edge(4L, 5L, "knows"),
             Edge(5L, 6L, "knows")
         )
         val vertexRDD: RDD[(Long, String)] = sc.parallelize(vertexArray)
         val edgeRDD: RDD[Edge[String]] = sc.parallelize(edgeArray)
         val graph: Graph[String, String] = Graph(vertexRDD, edgeRDD)

         var componentLists = HashMap[VertexId, ListBuffer[VertexId]]()

         val cc = graph.connectedComponents
         graph.vertices.leftJoin(cc.vertices) {
         case (id, u, comp) => (id, u, comp)
         }.foreach{ case (id, u) =>
                    {
                        // Add id to the list of components with a key
                        // of u._3.get (the starting node)
                        if (!(componentLists.contains(u._3.get))) {
                            componentLists(u._3.get) = new 
ListBuffer[VertexId]
                        }
                        componentLists(u._3.get) += id
                        println(s"just added ${id} to ${u._3.get}")
                        println(s"flag1 length of componentLists 
${componentLists.size}")
                    }
                    println(s"flag2 length of componentLists 
${componentLists.size}")
                  }
         println(s"flag3 length of componentLists ${componentLists.size}")
     }
}

////////// output /////////////

just added 4 to 4
flag1 length of componentLists 1
flag2 length of componentLists 1
just added 2 to 1
flag1 length of componentLists 2
flag2 length of componentLists 2
just added 6 to 4
flag1 length of componentLists 2
flag2 length of componentLists 2
just added 5 to 4
flag1 length of componentLists 2
flag2 length of componentLists 2
just added 3 to 1
flag1 length of componentLists 2
flag2 length of componentLists 2
just added 1 to 1
flag1 length of componentLists 2
flag2 length of componentLists 2
flag3 length of componentLists 0


---------------------------------------------------------------------
To unsubscribe, e-mail: user-unsubscribe@spark.apache.org
For additional commands, e-mail: user-help@spark.apache.org


Mime
View raw message