spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ma...@apache.org
Subject [04/69] [abbrv] [partial] Initial work to rename package to org.apache.spark
Date Sun, 01 Sep 2013 21:58:48 GMT
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/46eecd11/core/src/test/scala/spark/storage/BlockManagerSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/spark/storage/BlockManagerSuite.scala
deleted file mode 100644
index b719d65..0000000
--- a/core/src/test/scala/spark/storage/BlockManagerSuite.scala
+++ /dev/null
@@ -1,665 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package spark.storage
-
-import java.nio.ByteBuffer
-
-import akka.actor._
-
-import org.scalatest.FunSuite
-import org.scalatest.BeforeAndAfter
-import org.scalatest.PrivateMethodTester
-import org.scalatest.concurrent.Eventually._
-import org.scalatest.concurrent.Timeouts._
-import org.scalatest.matchers.ShouldMatchers._
-import org.scalatest.time.SpanSugar._
-
-import spark.JavaSerializer
-import spark.KryoSerializer
-import spark.SizeEstimator
-import spark.util.AkkaUtils
-import spark.util.ByteBufferInputStream
-
-
-class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodTester {
-  var store: BlockManager = null
-  var store2: BlockManager = null
-  var actorSystem: ActorSystem = null
-  var master: BlockManagerMaster = null
-  var oldArch: String = null
-  var oldOops: String = null
-  var oldHeartBeat: String = null
-
-  // Reuse a serializer across tests to avoid creating a new thread-local buffer on each test
-  System.setProperty("spark.kryoserializer.buffer.mb", "1")
-  val serializer = new KryoSerializer
-
-  before {
-    val (actorSystem, boundPort) = AkkaUtils.createActorSystem("test", "localhost", 0)
-    this.actorSystem = actorSystem
-    System.setProperty("spark.driver.port", boundPort.toString)
-    System.setProperty("spark.hostPort", "localhost:" + boundPort)
-
-    master = new BlockManagerMaster(
-      actorSystem.actorOf(Props(new spark.storage.BlockManagerMasterActor(true))))
-
-    // Set the arch to 64-bit and compressedOops to true to get a deterministic test-case
-    oldArch = System.setProperty("os.arch", "amd64")
-    oldOops = System.setProperty("spark.test.useCompressedOops", "true")
-    oldHeartBeat = System.setProperty("spark.storage.disableBlockManagerHeartBeat", "true")
-    val initialize = PrivateMethod[Unit]('initialize)
-    SizeEstimator invokePrivate initialize()
-    // Set some value ...
-    System.setProperty("spark.hostPort", spark.Utils.localHostName() + ":" + 1111)
-  }
-
-  after {
-    System.clearProperty("spark.driver.port")
-    System.clearProperty("spark.hostPort")
-
-    if (store != null) {
-      store.stop()
-      store = null
-    }
-    if (store2 != null) {
-      store2.stop()
-      store2 = null
-    }
-    actorSystem.shutdown()
-    actorSystem.awaitTermination()
-    actorSystem = null
-    master = null
-
-    if (oldArch != null) {
-      System.setProperty("os.arch", oldArch)
-    } else {
-      System.clearProperty("os.arch")
-    }
-
-    if (oldOops != null) {
-      System.setProperty("spark.test.useCompressedOops", oldOops)
-    } else {
-      System.clearProperty("spark.test.useCompressedOops")
-    }
-  }
-
-  test("StorageLevel object caching") {
-    val level1 = StorageLevel(false, false, false, 3)
-    val level2 = StorageLevel(false, false, false, 3) // this should return the same object as level1
-    val level3 = StorageLevel(false, false, false, 2) // this should return a different object
-    assert(level2 === level1, "level2 is not same as level1")
-    assert(level2.eq(level1), "level2 is not the same object as level1")
-    assert(level3 != level1, "level3 is same as level1")
-    val bytes1 = spark.Utils.serialize(level1)
-    val level1_ = spark.Utils.deserialize[StorageLevel](bytes1)
-    val bytes2 = spark.Utils.serialize(level2)
-    val level2_ = spark.Utils.deserialize[StorageLevel](bytes2)
-    assert(level1_ === level1, "Deserialized level1 not same as original level1")
-    assert(level1_.eq(level1), "Deserialized level1 not the same object as original level2")
-    assert(level2_ === level2, "Deserialized level2 not same as original level2")
-    assert(level2_.eq(level1), "Deserialized level2 not the same object as original level1")
-  }
-
-  test("BlockManagerId object caching") {
-    val id1 = BlockManagerId("e1", "XXX", 1, 0)
-    val id2 = BlockManagerId("e1", "XXX", 1, 0) // this should return the same object as id1
-    val id3 = BlockManagerId("e1", "XXX", 2, 0) // this should return a different object
-    assert(id2 === id1, "id2 is not same as id1")
-    assert(id2.eq(id1), "id2 is not the same object as id1")
-    assert(id3 != id1, "id3 is same as id1")
-    val bytes1 = spark.Utils.serialize(id1)
-    val id1_ = spark.Utils.deserialize[BlockManagerId](bytes1)
-    val bytes2 = spark.Utils.serialize(id2)
-    val id2_ = spark.Utils.deserialize[BlockManagerId](bytes2)
-    assert(id1_ === id1, "Deserialized id1 is not same as original id1")
-    assert(id1_.eq(id1), "Deserialized id1 is not the same object as original id1")
-    assert(id2_ === id2, "Deserialized id2 is not same as original id2")
-    assert(id2_.eq(id1), "Deserialized id2 is not the same object as original id1")
-  }
-
-  test("master + 1 manager interaction") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 2000)
-    val a1 = new Array[Byte](400)
-    val a2 = new Array[Byte](400)
-    val a3 = new Array[Byte](400)
-
-    // Putting a1, a2  and a3 in memory and telling master only about a1 and a2
-    store.putSingle("a1", a1, StorageLevel.MEMORY_ONLY)
-    store.putSingle("a2", a2, StorageLevel.MEMORY_ONLY)
-    store.putSingle("a3", a3, StorageLevel.MEMORY_ONLY, tellMaster = false)
-
-    // Checking whether blocks are in memory
-    assert(store.getSingle("a1") != None, "a1 was not in store")
-    assert(store.getSingle("a2") != None, "a2 was not in store")
-    assert(store.getSingle("a3") != None, "a3 was not in store")
-
-    // Checking whether master knows about the blocks or not
-    assert(master.getLocations("a1").size > 0, "master was not told about a1")
-    assert(master.getLocations("a2").size > 0, "master was not told about a2")
-    assert(master.getLocations("a3").size === 0, "master was told about a3")
-
-    // Drop a1 and a2 from memory; this should be reported back to the master
-    store.dropFromMemory("a1", null)
-    store.dropFromMemory("a2", null)
-    assert(store.getSingle("a1") === None, "a1 not removed from store")
-    assert(store.getSingle("a2") === None, "a2 not removed from store")
-    assert(master.getLocations("a1").size === 0, "master did not remove a1")
-    assert(master.getLocations("a2").size === 0, "master did not remove a2")
-  }
-
-  test("master + 2 managers interaction") {
-    store = new BlockManager("exec1", actorSystem, master, serializer, 2000)
-    store2 = new BlockManager("exec2", actorSystem, master, new KryoSerializer, 2000)
-
-    val peers = master.getPeers(store.blockManagerId, 1)
-    assert(peers.size === 1, "master did not return the other manager as a peer")
-    assert(peers.head === store2.blockManagerId, "peer returned by master is not the other manager")
-
-    val a1 = new Array[Byte](400)
-    val a2 = new Array[Byte](400)
-    store.putSingle("a1", a1, StorageLevel.MEMORY_ONLY_2)
-    store2.putSingle("a2", a2, StorageLevel.MEMORY_ONLY_2)
-    assert(master.getLocations("a1").size === 2, "master did not report 2 locations for a1")
-    assert(master.getLocations("a2").size === 2, "master did not report 2 locations for a2")
-  }
-
-  test("removing block") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 2000)
-    val a1 = new Array[Byte](400)
-    val a2 = new Array[Byte](400)
-    val a3 = new Array[Byte](400)
-
-    // Putting a1, a2 and a3 in memory and telling master only about a1 and a2
-    store.putSingle("a1-to-remove", a1, StorageLevel.MEMORY_ONLY)
-    store.putSingle("a2-to-remove", a2, StorageLevel.MEMORY_ONLY)
-    store.putSingle("a3-to-remove", a3, StorageLevel.MEMORY_ONLY, tellMaster = false)
-
-    // Checking whether blocks are in memory and memory size
-    val memStatus = master.getMemoryStatus.head._2
-    assert(memStatus._1 == 2000L, "total memory " + memStatus._1 + " should equal 2000")
-    assert(memStatus._2 <= 1200L, "remaining memory " + memStatus._2 + " should <= 1200")
-    assert(store.getSingle("a1-to-remove") != None, "a1 was not in store")
-    assert(store.getSingle("a2-to-remove") != None, "a2 was not in store")
-    assert(store.getSingle("a3-to-remove") != None, "a3 was not in store")
-
-    // Checking whether master knows about the blocks or not
-    assert(master.getLocations("a1-to-remove").size > 0, "master was not told about a1")
-    assert(master.getLocations("a2-to-remove").size > 0, "master was not told about a2")
-    assert(master.getLocations("a3-to-remove").size === 0, "master was told about a3")
-
-    // Remove a1 and a2 and a3. Should be no-op for a3.
-    master.removeBlock("a1-to-remove")
-    master.removeBlock("a2-to-remove")
-    master.removeBlock("a3-to-remove")
-
-    eventually(timeout(1000 milliseconds), interval(10 milliseconds)) {
-      store.getSingle("a1-to-remove") should be (None)
-      master.getLocations("a1-to-remove") should have size 0
-    }
-    eventually(timeout(1000 milliseconds), interval(10 milliseconds)) {
-      store.getSingle("a2-to-remove") should be (None)
-      master.getLocations("a2-to-remove") should have size 0
-    }
-    eventually(timeout(1000 milliseconds), interval(10 milliseconds)) {
-      store.getSingle("a3-to-remove") should not be (None)
-      master.getLocations("a3-to-remove") should have size 0
-    }
-    eventually(timeout(1000 milliseconds), interval(10 milliseconds)) {
-      val memStatus = master.getMemoryStatus.head._2
-      memStatus._1 should equal (2000L)
-      memStatus._2 should equal (2000L)
-    }
-  }
-
-  test("removing rdd") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 2000)
-    val a1 = new Array[Byte](400)
-    val a2 = new Array[Byte](400)
-    val a3 = new Array[Byte](400)
-    // Putting a1, a2 and a3 in memory.
-    store.putSingle("rdd_0_0", a1, StorageLevel.MEMORY_ONLY)
-    store.putSingle("rdd_0_1", a2, StorageLevel.MEMORY_ONLY)
-    store.putSingle("nonrddblock", a3, StorageLevel.MEMORY_ONLY)
-    master.removeRdd(0, blocking = false)
-
-    eventually(timeout(1000 milliseconds), interval(10 milliseconds)) {
-      store.getSingle("rdd_0_0") should be (None)
-      master.getLocations("rdd_0_0") should have size 0
-    }
-    eventually(timeout(1000 milliseconds), interval(10 milliseconds)) {
-      store.getSingle("rdd_0_1") should be (None)
-      master.getLocations("rdd_0_1") should have size 0
-    }
-    eventually(timeout(1000 milliseconds), interval(10 milliseconds)) {
-      store.getSingle("nonrddblock") should not be (None)
-      master.getLocations("nonrddblock") should have size (1)
-    }
-
-    store.putSingle("rdd_0_0", a1, StorageLevel.MEMORY_ONLY)
-    store.putSingle("rdd_0_1", a2, StorageLevel.MEMORY_ONLY)
-    master.removeRdd(0, blocking = true)
-    store.getSingle("rdd_0_0") should be (None)
-    master.getLocations("rdd_0_0") should have size 0
-    store.getSingle("rdd_0_1") should be (None)
-    master.getLocations("rdd_0_1") should have size 0
-  }
-
-  test("reregistration on heart beat") {
-    val heartBeat = PrivateMethod[Unit]('heartBeat)
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 2000)
-    val a1 = new Array[Byte](400)
-
-    store.putSingle("a1", a1, StorageLevel.MEMORY_ONLY)
-
-    assert(store.getSingle("a1") != None, "a1 was not in store")
-    assert(master.getLocations("a1").size > 0, "master was not told about a1")
-
-    master.removeExecutor(store.blockManagerId.executorId)
-    assert(master.getLocations("a1").size == 0, "a1 was not removed from master")
-
-    store invokePrivate heartBeat()
-    assert(master.getLocations("a1").size > 0, "a1 was not reregistered with master")
-  }
-
-  test("reregistration on block update") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 2000)
-    val a1 = new Array[Byte](400)
-    val a2 = new Array[Byte](400)
-
-    store.putSingle("a1", a1, StorageLevel.MEMORY_ONLY)
-    assert(master.getLocations("a1").size > 0, "master was not told about a1")
-
-    master.removeExecutor(store.blockManagerId.executorId)
-    assert(master.getLocations("a1").size == 0, "a1 was not removed from master")
-
-    store.putSingle("a2", a2, StorageLevel.MEMORY_ONLY)
-    store.waitForAsyncReregister()
-
-    assert(master.getLocations("a1").size > 0, "a1 was not reregistered with master")
-    assert(master.getLocations("a2").size > 0, "master was not told about a2")
-  }
-
-  test("reregistration doesn't dead lock") {
-    val heartBeat = PrivateMethod[Unit]('heartBeat)
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 2000)
-    val a1 = new Array[Byte](400)
-    val a2 = List(new Array[Byte](400))
-
-    // try many times to trigger any deadlocks
-    for (i <- 1 to 100) {
-      master.removeExecutor(store.blockManagerId.executorId)
-      val t1 = new Thread {
-        override def run() {
-          store.put("a2", a2.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
-        }
-      }
-      val t2 = new Thread {
-        override def run() {
-          store.putSingle("a1", a1, StorageLevel.MEMORY_ONLY)
-        }
-      }
-      val t3 = new Thread {
-        override def run() {
-          store invokePrivate heartBeat()
-        }
-      }
-
-      t1.start()
-      t2.start()
-      t3.start()
-      t1.join()
-      t2.join()
-      t3.join()
-
-      store.dropFromMemory("a1", null)
-      store.dropFromMemory("a2", null)
-      store.waitForAsyncReregister()
-    }
-  }
-
-  test("in-memory LRU storage") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200)
-    val a1 = new Array[Byte](400)
-    val a2 = new Array[Byte](400)
-    val a3 = new Array[Byte](400)
-    store.putSingle("a1", a1, StorageLevel.MEMORY_ONLY)
-    store.putSingle("a2", a2, StorageLevel.MEMORY_ONLY)
-    store.putSingle("a3", a3, StorageLevel.MEMORY_ONLY)
-    assert(store.getSingle("a2") != None, "a2 was not in store")
-    assert(store.getSingle("a3") != None, "a3 was not in store")
-    assert(store.getSingle("a1") === None, "a1 was in store")
-    assert(store.getSingle("a2") != None, "a2 was not in store")
-    // At this point a2 was gotten last, so LRU will getSingle rid of a3
-    store.putSingle("a1", a1, StorageLevel.MEMORY_ONLY)
-    assert(store.getSingle("a1") != None, "a1 was not in store")
-    assert(store.getSingle("a2") != None, "a2 was not in store")
-    assert(store.getSingle("a3") === None, "a3 was in store")
-  }
-
-  test("in-memory LRU storage with serialization") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200)
-    val a1 = new Array[Byte](400)
-    val a2 = new Array[Byte](400)
-    val a3 = new Array[Byte](400)
-    store.putSingle("a1", a1, StorageLevel.MEMORY_ONLY_SER)
-    store.putSingle("a2", a2, StorageLevel.MEMORY_ONLY_SER)
-    store.putSingle("a3", a3, StorageLevel.MEMORY_ONLY_SER)
-    assert(store.getSingle("a2") != None, "a2 was not in store")
-    assert(store.getSingle("a3") != None, "a3 was not in store")
-    assert(store.getSingle("a1") === None, "a1 was in store")
-    assert(store.getSingle("a2") != None, "a2 was not in store")
-    // At this point a2 was gotten last, so LRU will getSingle rid of a3
-    store.putSingle("a1", a1, StorageLevel.MEMORY_ONLY_SER)
-    assert(store.getSingle("a1") != None, "a1 was not in store")
-    assert(store.getSingle("a2") != None, "a2 was not in store")
-    assert(store.getSingle("a3") === None, "a3 was in store")
-  }
-
-  test("in-memory LRU for partitions of same RDD") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200)
-    val a1 = new Array[Byte](400)
-    val a2 = new Array[Byte](400)
-    val a3 = new Array[Byte](400)
-    store.putSingle("rdd_0_1", a1, StorageLevel.MEMORY_ONLY)
-    store.putSingle("rdd_0_2", a2, StorageLevel.MEMORY_ONLY)
-    store.putSingle("rdd_0_3", a3, StorageLevel.MEMORY_ONLY)
-    // Even though we accessed rdd_0_3 last, it should not have replaced partitions 1 and 2
-    // from the same RDD
-    assert(store.getSingle("rdd_0_3") === None, "rdd_0_3 was in store")
-    assert(store.getSingle("rdd_0_2") != None, "rdd_0_2 was not in store")
-    assert(store.getSingle("rdd_0_1") != None, "rdd_0_1 was not in store")
-    // Check that rdd_0_3 doesn't replace them even after further accesses
-    assert(store.getSingle("rdd_0_3") === None, "rdd_0_3 was in store")
-    assert(store.getSingle("rdd_0_3") === None, "rdd_0_3 was in store")
-    assert(store.getSingle("rdd_0_3") === None, "rdd_0_3 was in store")
-  }
-
-  test("in-memory LRU for partitions of multiple RDDs") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200)
-    store.putSingle("rdd_0_1", new Array[Byte](400), StorageLevel.MEMORY_ONLY)
-    store.putSingle("rdd_0_2", new Array[Byte](400), StorageLevel.MEMORY_ONLY)
-    store.putSingle("rdd_1_1", new Array[Byte](400), StorageLevel.MEMORY_ONLY)
-    // At this point rdd_1_1 should've replaced rdd_0_1
-    assert(store.memoryStore.contains("rdd_1_1"), "rdd_1_1 was not in store")
-    assert(!store.memoryStore.contains("rdd_0_1"), "rdd_0_1 was in store")
-    assert(store.memoryStore.contains("rdd_0_2"), "rdd_0_2 was not in store")
-    // Do a get() on rdd_0_2 so that it is the most recently used item
-    assert(store.getSingle("rdd_0_2") != None, "rdd_0_2 was not in store")
-    // Put in more partitions from RDD 0; they should replace rdd_1_1
-    store.putSingle("rdd_0_3", new Array[Byte](400), StorageLevel.MEMORY_ONLY)
-    store.putSingle("rdd_0_4", new Array[Byte](400), StorageLevel.MEMORY_ONLY)
-    // Now rdd_1_1 should be dropped to add rdd_0_3, but then rdd_0_2 should *not* be dropped
-    // when we try to add rdd_0_4.
-    assert(!store.memoryStore.contains("rdd_1_1"), "rdd_1_1 was in store")
-    assert(!store.memoryStore.contains("rdd_0_1"), "rdd_0_1 was in store")
-    assert(!store.memoryStore.contains("rdd_0_4"), "rdd_0_4 was in store")
-    assert(store.memoryStore.contains("rdd_0_2"), "rdd_0_2 was not in store")
-    assert(store.memoryStore.contains("rdd_0_3"), "rdd_0_3 was not in store")
-  }
-
-  test("on-disk storage") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200)
-    val a1 = new Array[Byte](400)
-    val a2 = new Array[Byte](400)
-    val a3 = new Array[Byte](400)
-    store.putSingle("a1", a1, StorageLevel.DISK_ONLY)
-    store.putSingle("a2", a2, StorageLevel.DISK_ONLY)
-    store.putSingle("a3", a3, StorageLevel.DISK_ONLY)
-    assert(store.getSingle("a2") != None, "a2 was in store")
-    assert(store.getSingle("a3") != None, "a3 was in store")
-    assert(store.getSingle("a1") != None, "a1 was in store")
-  }
-
-  test("disk and memory storage") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200)
-    val a1 = new Array[Byte](400)
-    val a2 = new Array[Byte](400)
-    val a3 = new Array[Byte](400)
-    store.putSingle("a1", a1, StorageLevel.MEMORY_AND_DISK)
-    store.putSingle("a2", a2, StorageLevel.MEMORY_AND_DISK)
-    store.putSingle("a3", a3, StorageLevel.MEMORY_AND_DISK)
-    assert(store.getSingle("a2") != None, "a2 was not in store")
-    assert(store.getSingle("a3") != None, "a3 was not in store")
-    assert(store.memoryStore.getValues("a1") == None, "a1 was in memory store")
-    assert(store.getSingle("a1") != None, "a1 was not in store")
-    assert(store.memoryStore.getValues("a1") != None, "a1 was not in memory store")
-  }
-
-  test("disk and memory storage with getLocalBytes") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200)
-    val a1 = new Array[Byte](400)
-    val a2 = new Array[Byte](400)
-    val a3 = new Array[Byte](400)
-    store.putSingle("a1", a1, StorageLevel.MEMORY_AND_DISK)
-    store.putSingle("a2", a2, StorageLevel.MEMORY_AND_DISK)
-    store.putSingle("a3", a3, StorageLevel.MEMORY_AND_DISK)
-    assert(store.getLocalBytes("a2") != None, "a2 was not in store")
-    assert(store.getLocalBytes("a3") != None, "a3 was not in store")
-    assert(store.memoryStore.getValues("a1") == None, "a1 was in memory store")
-    assert(store.getLocalBytes("a1") != None, "a1 was not in store")
-    assert(store.memoryStore.getValues("a1") != None, "a1 was not in memory store")
-  }
-
-  test("disk and memory storage with serialization") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200)
-    val a1 = new Array[Byte](400)
-    val a2 = new Array[Byte](400)
-    val a3 = new Array[Byte](400)
-    store.putSingle("a1", a1, StorageLevel.MEMORY_AND_DISK_SER)
-    store.putSingle("a2", a2, StorageLevel.MEMORY_AND_DISK_SER)
-    store.putSingle("a3", a3, StorageLevel.MEMORY_AND_DISK_SER)
-    assert(store.getSingle("a2") != None, "a2 was not in store")
-    assert(store.getSingle("a3") != None, "a3 was not in store")
-    assert(store.memoryStore.getValues("a1") == None, "a1 was in memory store")
-    assert(store.getSingle("a1") != None, "a1 was not in store")
-    assert(store.memoryStore.getValues("a1") != None, "a1 was not in memory store")
-  }
-
-  test("disk and memory storage with serialization and getLocalBytes") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200)
-    val a1 = new Array[Byte](400)
-    val a2 = new Array[Byte](400)
-    val a3 = new Array[Byte](400)
-    store.putSingle("a1", a1, StorageLevel.MEMORY_AND_DISK_SER)
-    store.putSingle("a2", a2, StorageLevel.MEMORY_AND_DISK_SER)
-    store.putSingle("a3", a3, StorageLevel.MEMORY_AND_DISK_SER)
-    assert(store.getLocalBytes("a2") != None, "a2 was not in store")
-    assert(store.getLocalBytes("a3") != None, "a3 was not in store")
-    assert(store.memoryStore.getValues("a1") == None, "a1 was in memory store")
-    assert(store.getLocalBytes("a1") != None, "a1 was not in store")
-    assert(store.memoryStore.getValues("a1") != None, "a1 was not in memory store")
-  }
-
-  test("LRU with mixed storage levels") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200)
-    val a1 = new Array[Byte](400)
-    val a2 = new Array[Byte](400)
-    val a3 = new Array[Byte](400)
-    val a4 = new Array[Byte](400)
-    // First store a1 and a2, both in memory, and a3, on disk only
-    store.putSingle("a1", a1, StorageLevel.MEMORY_ONLY_SER)
-    store.putSingle("a2", a2, StorageLevel.MEMORY_ONLY_SER)
-    store.putSingle("a3", a3, StorageLevel.DISK_ONLY)
-    // At this point LRU should not kick in because a3 is only on disk
-    assert(store.getSingle("a1") != None, "a2 was not in store")
-    assert(store.getSingle("a2") != None, "a3 was not in store")
-    assert(store.getSingle("a3") != None, "a1 was not in store")
-    assert(store.getSingle("a1") != None, "a2 was not in store")
-    assert(store.getSingle("a2") != None, "a3 was not in store")
-    assert(store.getSingle("a3") != None, "a1 was not in store")
-    // Now let's add in a4, which uses both disk and memory; a1 should drop out
-    store.putSingle("a4", a4, StorageLevel.MEMORY_AND_DISK_SER)
-    assert(store.getSingle("a1") == None, "a1 was in store")
-    assert(store.getSingle("a2") != None, "a2 was not in store")
-    assert(store.getSingle("a3") != None, "a3 was not in store")
-    assert(store.getSingle("a4") != None, "a4 was not in store")
-  }
-
-  test("in-memory LRU with streams") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200)
-    val list1 = List(new Array[Byte](200), new Array[Byte](200))
-    val list2 = List(new Array[Byte](200), new Array[Byte](200))
-    val list3 = List(new Array[Byte](200), new Array[Byte](200))
-    store.put("list1", list1.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
-    store.put("list2", list2.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
-    store.put("list3", list3.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
-    assert(store.get("list2") != None, "list2 was not in store")
-    assert(store.get("list2").get.size == 2)
-    assert(store.get("list3") != None, "list3 was not in store")
-    assert(store.get("list3").get.size == 2)
-    assert(store.get("list1") === None, "list1 was in store")
-    assert(store.get("list2") != None, "list2 was not in store")
-    assert(store.get("list2").get.size == 2)
-    // At this point list2 was gotten last, so LRU will getSingle rid of list3
-    store.put("list1", list1.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
-    assert(store.get("list1") != None, "list1 was not in store")
-    assert(store.get("list1").get.size == 2)
-    assert(store.get("list2") != None, "list2 was not in store")
-    assert(store.get("list2").get.size == 2)
-    assert(store.get("list3") === None, "list1 was in store")
-  }
-
-  test("LRU with mixed storage levels and streams") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200)
-    val list1 = List(new Array[Byte](200), new Array[Byte](200))
-    val list2 = List(new Array[Byte](200), new Array[Byte](200))
-    val list3 = List(new Array[Byte](200), new Array[Byte](200))
-    val list4 = List(new Array[Byte](200), new Array[Byte](200))
-    // First store list1 and list2, both in memory, and list3, on disk only
-    store.put("list1", list1.iterator, StorageLevel.MEMORY_ONLY_SER, tellMaster = true)
-    store.put("list2", list2.iterator, StorageLevel.MEMORY_ONLY_SER, tellMaster = true)
-    store.put("list3", list3.iterator, StorageLevel.DISK_ONLY, tellMaster = true)
-    // At this point LRU should not kick in because list3 is only on disk
-    assert(store.get("list1") != None, "list2 was not in store")
-    assert(store.get("list1").get.size === 2)
-    assert(store.get("list2") != None, "list3 was not in store")
-    assert(store.get("list2").get.size === 2)
-    assert(store.get("list3") != None, "list1 was not in store")
-    assert(store.get("list3").get.size === 2)
-    assert(store.get("list1") != None, "list2 was not in store")
-    assert(store.get("list1").get.size === 2)
-    assert(store.get("list2") != None, "list3 was not in store")
-    assert(store.get("list2").get.size === 2)
-    assert(store.get("list3") != None, "list1 was not in store")
-    assert(store.get("list3").get.size === 2)
-    // Now let's add in list4, which uses both disk and memory; list1 should drop out
-    store.put("list4", list4.iterator, StorageLevel.MEMORY_AND_DISK_SER, tellMaster = true)
-    assert(store.get("list1") === None, "list1 was in store")
-    assert(store.get("list2") != None, "list3 was not in store")
-    assert(store.get("list2").get.size === 2)
-    assert(store.get("list3") != None, "list1 was not in store")
-    assert(store.get("list3").get.size === 2)
-    assert(store.get("list4") != None, "list4 was not in store")
-    assert(store.get("list4").get.size === 2)
-  }
-
-  test("negative byte values in ByteBufferInputStream") {
-    val buffer = ByteBuffer.wrap(Array[Int](254, 255, 0, 1, 2).map(_.toByte).toArray)
-    val stream = new ByteBufferInputStream(buffer)
-    val temp = new Array[Byte](10)
-    assert(stream.read() === 254, "unexpected byte read")
-    assert(stream.read() === 255, "unexpected byte read")
-    assert(stream.read() === 0, "unexpected byte read")
-    assert(stream.read(temp, 0, temp.length) === 2, "unexpected number of bytes read")
-    assert(stream.read() === -1, "end of stream not signalled")
-    assert(stream.read(temp, 0, temp.length) === -1, "end of stream not signalled")
-  }
-
-  test("overly large block") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 500)
-    store.putSingle("a1", new Array[Byte](1000), StorageLevel.MEMORY_ONLY)
-    assert(store.getSingle("a1") === None, "a1 was in store")
-    store.putSingle("a2", new Array[Byte](1000), StorageLevel.MEMORY_AND_DISK)
-    assert(store.memoryStore.getValues("a2") === None, "a2 was in memory store")
-    assert(store.getSingle("a2") != None, "a2 was not in store")
-  }
-
-  test("block compression") {
-    try {
-      System.setProperty("spark.shuffle.compress", "true")
-      store = new BlockManager("exec1", actorSystem, master, serializer, 2000)
-      store.putSingle("shuffle_0_0_0", new Array[Byte](1000), StorageLevel.MEMORY_ONLY_SER)
-      assert(store.memoryStore.getSize("shuffle_0_0_0") <= 100, "shuffle_0_0_0 was not compressed")
-      store.stop()
-      store = null
-
-      System.setProperty("spark.shuffle.compress", "false")
-      store = new BlockManager("exec2", actorSystem, master, serializer, 2000)
-      store.putSingle("shuffle_0_0_0", new Array[Byte](1000), StorageLevel.MEMORY_ONLY_SER)
-      assert(store.memoryStore.getSize("shuffle_0_0_0") >= 1000, "shuffle_0_0_0 was compressed")
-      store.stop()
-      store = null
-
-      System.setProperty("spark.broadcast.compress", "true")
-      store = new BlockManager("exec3", actorSystem, master, serializer, 2000)
-      store.putSingle("broadcast_0", new Array[Byte](1000), StorageLevel.MEMORY_ONLY_SER)
-      assert(store.memoryStore.getSize("broadcast_0") <= 100, "broadcast_0 was not compressed")
-      store.stop()
-      store = null
-
-      System.setProperty("spark.broadcast.compress", "false")
-      store = new BlockManager("exec4", actorSystem, master, serializer, 2000)
-      store.putSingle("broadcast_0", new Array[Byte](1000), StorageLevel.MEMORY_ONLY_SER)
-      assert(store.memoryStore.getSize("broadcast_0") >= 1000, "broadcast_0 was compressed")
-      store.stop()
-      store = null
-
-      System.setProperty("spark.rdd.compress", "true")
-      store = new BlockManager("exec5", actorSystem, master, serializer, 2000)
-      store.putSingle("rdd_0_0", new Array[Byte](1000), StorageLevel.MEMORY_ONLY_SER)
-      assert(store.memoryStore.getSize("rdd_0_0") <= 100, "rdd_0_0 was not compressed")
-      store.stop()
-      store = null
-
-      System.setProperty("spark.rdd.compress", "false")
-      store = new BlockManager("exec6", actorSystem, master, serializer, 2000)
-      store.putSingle("rdd_0_0", new Array[Byte](1000), StorageLevel.MEMORY_ONLY_SER)
-      assert(store.memoryStore.getSize("rdd_0_0") >= 1000, "rdd_0_0 was compressed")
-      store.stop()
-      store = null
-
-      // Check that any other block types are also kept uncompressed
-      store = new BlockManager("exec7", actorSystem, master, serializer, 2000)
-      store.putSingle("other_block", new Array[Byte](1000), StorageLevel.MEMORY_ONLY)
-      assert(store.memoryStore.getSize("other_block") >= 1000, "other_block was compressed")
-      store.stop()
-      store = null
-    } finally {
-      System.clearProperty("spark.shuffle.compress")
-      System.clearProperty("spark.broadcast.compress")
-      System.clearProperty("spark.rdd.compress")
-    }
-  }
-
-  test("block store put failure") {
-    // Use Java serializer so we can create an unserializable error.
-    store = new BlockManager("<driver>", actorSystem, master, new JavaSerializer, 1200)
-
-    // The put should fail since a1 is not serializable.
-    class UnserializableClass
-    val a1 = new UnserializableClass
-    intercept[java.io.NotSerializableException] {
-      store.putSingle("a1", a1, StorageLevel.DISK_ONLY)
-    }
-
-    // Make sure get a1 doesn't hang and returns None.
-    failAfter(1 second) {
-      assert(store.getSingle("a1") == None, "a1 should not be in store")
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/46eecd11/core/src/test/scala/spark/ui/UISuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/spark/ui/UISuite.scala b/core/src/test/scala/spark/ui/UISuite.scala
deleted file mode 100644
index 735a794..0000000
--- a/core/src/test/scala/spark/ui/UISuite.scala
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package spark.ui
-
-import scala.util.{Failure, Success, Try}
-import java.net.ServerSocket
-import org.scalatest.FunSuite
-import org.eclipse.jetty.server.Server
-
-class UISuite extends FunSuite {
-  test("jetty port increases under contention") {
-    val startPort = 3030
-    val server = new Server(startPort)
-    server.start()
-    val (jettyServer1, boundPort1) = JettyUtils.startJettyServer("localhost", startPort, Seq())
-    val (jettyServer2, boundPort2) = JettyUtils.startJettyServer("localhost", startPort, Seq())
-
-    // Allow some wiggle room in case ports on the machine are under contention
-    assert(boundPort1 > startPort && boundPort1 < startPort + 10)
-    assert(boundPort2 > boundPort1 && boundPort2 < boundPort1 + 10)
-  }
-
-  test("jetty binds to port 0 correctly") {
-    val (jettyServer, boundPort) = JettyUtils.startJettyServer("localhost", 0, Seq())
-    assert(jettyServer.getState === "STARTED")
-    assert(boundPort != 0)
-    Try {new ServerSocket(boundPort)} match {
-      case Success(s) => fail("Port %s doesn't seem used by jetty server".format(boundPort))
-      case Failure  (e) =>
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/46eecd11/core/src/test/scala/spark/util/DistributionSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/spark/util/DistributionSuite.scala b/core/src/test/scala/spark/util/DistributionSuite.scala
deleted file mode 100644
index 6578b55..0000000
--- a/core/src/test/scala/spark/util/DistributionSuite.scala
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package spark.util
-
-import org.scalatest.FunSuite
-import org.scalatest.matchers.ShouldMatchers
-
-/**
- *
- */
-
-class DistributionSuite extends FunSuite with ShouldMatchers {
-  test("summary") {
-    val d = new Distribution((1 to 100).toArray.map{_.toDouble})
-    val stats = d.statCounter
-    stats.count should be (100)
-    stats.mean should be (50.5)
-    stats.sum should be (50 * 101)
-
-    val quantiles = d.getQuantiles()
-    quantiles(0) should be (1)
-    quantiles(1) should be (26)
-    quantiles(2) should be (51)
-    quantiles(3) should be (76)
-    quantiles(4) should be (100)
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/46eecd11/core/src/test/scala/spark/util/FakeClock.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/spark/util/FakeClock.scala b/core/src/test/scala/spark/util/FakeClock.scala
deleted file mode 100644
index 2367063..0000000
--- a/core/src/test/scala/spark/util/FakeClock.scala
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package spark.util
-
-class FakeClock extends Clock {
-  private var time = 0L
-
-  def advance(millis: Long): Unit = time += millis
-
-  def getTime(): Long = time
-}

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/46eecd11/core/src/test/scala/spark/util/NextIteratorSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/spark/util/NextIteratorSuite.scala b/core/src/test/scala/spark/util/NextIteratorSuite.scala
deleted file mode 100644
index fdbd43d..0000000
--- a/core/src/test/scala/spark/util/NextIteratorSuite.scala
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package spark.util
-
-import org.scalatest.FunSuite
-import org.scalatest.matchers.ShouldMatchers
-import scala.collection.mutable.Buffer
-import java.util.NoSuchElementException
-
-class NextIteratorSuite extends FunSuite with ShouldMatchers {
-  test("one iteration") {
-    val i = new StubIterator(Buffer(1))
-    i.hasNext should be === true
-    i.next should be === 1
-    i.hasNext should be === false
-    intercept[NoSuchElementException] { i.next() }
-  }
-  
-  test("two iterations") {
-    val i = new StubIterator(Buffer(1, 2))
-    i.hasNext should be === true
-    i.next should be === 1
-    i.hasNext should be === true
-    i.next should be === 2
-    i.hasNext should be === false
-    intercept[NoSuchElementException] { i.next() }
-  }
-
-  test("empty iteration") {
-    val i = new StubIterator(Buffer())
-    i.hasNext should be === false
-    intercept[NoSuchElementException] { i.next() }
-  }
-
-  test("close is called once for empty iterations") {
-    val i = new StubIterator(Buffer())
-    i.hasNext should be === false
-    i.hasNext should be === false
-    i.closeCalled should be === 1
-  }
-
-  test("close is called once for non-empty iterations") {
-    val i = new StubIterator(Buffer(1, 2))
-    i.next should be === 1
-    i.next should be === 2
-    // close isn't called until we check for the next element
-    i.closeCalled should be === 0
-    i.hasNext should be === false
-    i.closeCalled should be === 1
-    i.hasNext should be === false
-    i.closeCalled should be === 1
-  }
-
-  class StubIterator(ints: Buffer[Int])  extends NextIterator[Int] {
-    var closeCalled = 0
-    
-    override def getNext() = {
-      if (ints.size == 0) {
-        finished = true
-        0
-      } else {
-        ints.remove(0)
-      }
-    }
-
-    override def close() {
-      closeCalled += 1
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/46eecd11/core/src/test/scala/spark/util/RateLimitedOutputStreamSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/spark/util/RateLimitedOutputStreamSuite.scala b/core/src/test/scala/spark/util/RateLimitedOutputStreamSuite.scala
deleted file mode 100644
index 4c00442..0000000
--- a/core/src/test/scala/spark/util/RateLimitedOutputStreamSuite.scala
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package spark.util
-
-import org.scalatest.FunSuite
-import java.io.ByteArrayOutputStream
-import java.util.concurrent.TimeUnit._
-
-class RateLimitedOutputStreamSuite extends FunSuite {
-
-  private def benchmark[U](f: => U): Long = {
-    val start = System.nanoTime
-    f
-    System.nanoTime - start
-  }
-
-  test("write") {
-    val underlying = new ByteArrayOutputStream
-    val data = "X" * 41000
-    val stream = new RateLimitedOutputStream(underlying, 10000)
-    val elapsedNs = benchmark { stream.write(data.getBytes("UTF-8")) }
-    assert(SECONDS.convert(elapsedNs, NANOSECONDS) == 4)
-    assert(underlying.toString("UTF-8") == data)
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/46eecd11/docs/_layouts/global.html
----------------------------------------------------------------------
diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html
index 84749fd..349eb92 100755
--- a/docs/_layouts/global.html
+++ b/docs/_layouts/global.html
@@ -100,7 +100,7 @@
                                 <li><a href="tuning.html">Tuning Guide</a></li>
                                 <li><a href="hardware-provisioning.html">Hardware Provisioning</a></li>
                                 <li><a href="building-with-maven.html">Building Spark with Maven</a></li>
-                                <li><a href="contributing-to-spark.html">Contributing to Spark</a></li>
+                                <li><a href="contributing-to-org.apache.spark.html">Contributing to Spark</a></li>
                             </ul>
                         </li>
                     </ul>

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/46eecd11/examples/pom.xml
----------------------------------------------------------------------
diff --git a/examples/pom.xml b/examples/pom.xml
index 687fbcc..13b5531 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -19,13 +19,13 @@
 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
   <modelVersion>4.0.0</modelVersion>
   <parent>
-    <groupId>org.spark-project</groupId>
+    <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
     <version>0.8.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
-  <groupId>org.spark-project</groupId>
+  <groupId>org.apache.spark</groupId>
   <artifactId>spark-examples</artifactId>
   <packaging>jar</packaging>
   <name>Spark Project Examples</name>
@@ -33,25 +33,25 @@
 
   <dependencies>
     <dependency>
-      <groupId>org.spark-project</groupId>
+      <groupId>org.apache.spark</groupId>
       <artifactId>spark-core</artifactId>
       <version>${project.version}</version>
       <scope>provided</scope>
     </dependency>
     <dependency>
-      <groupId>org.spark-project</groupId>
+      <groupId>org.apache.spark</groupId>
       <artifactId>spark-streaming</artifactId>
       <version>${project.version}</version>
       <scope>provided</scope>
     </dependency>
     <dependency>
-      <groupId>org.spark-project</groupId>
+      <groupId>org.apache.spark</groupId>
       <artifactId>spark-mllib</artifactId>
       <version>${project.version}</version>
       <scope>provided</scope>
     </dependency>
     <dependency>
-      <groupId>org.spark-project</groupId>
+      <groupId>org.apache.spark</groupId>
       <artifactId>spark-bagel</artifactId>
       <version>${project.version}</version>
       <scope>provided</scope>
@@ -132,7 +132,7 @@
       <id>hadoop2-yarn</id>
       <dependencies>
         <dependency>
-          <groupId>org.spark-project</groupId>
+          <groupId>org.apache.spark</groupId>
           <artifactId>spark-yarn</artifactId>
           <version>${project.version}</version>
           <scope>provided</scope>

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/46eecd11/examples/src/main/java/org/apache/spark/examples/JavaHdfsLR.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/JavaHdfsLR.java b/examples/src/main/java/org/apache/spark/examples/JavaHdfsLR.java
new file mode 100644
index 0000000..be0d385
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/JavaHdfsLR.java
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.api.java.function.Function2;
+
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.StringTokenizer;
+import java.util.Random;
+
+/**
+ * Logistic regression based classification.
+ */
+public class JavaHdfsLR {
+
+  static int D = 10;   // Number of dimensions
+  static Random rand = new Random(42);
+
+  static class DataPoint implements Serializable {
+    public DataPoint(double[] x, double y) {
+      this.x = x;
+      this.y = y;
+    }
+
+    double[] x;
+    double y;
+  }
+
+  static class ParsePoint extends Function<String, DataPoint> {
+    public DataPoint call(String line) {
+      StringTokenizer tok = new StringTokenizer(line, " ");
+      double y = Double.parseDouble(tok.nextToken());
+      double[] x = new double[D];
+      int i = 0;
+      while (i < D) {
+        x[i] = Double.parseDouble(tok.nextToken());
+        i += 1;
+      }
+      return new DataPoint(x, y);
+    }
+  }
+
+  static class VectorSum extends Function2<double[], double[], double[]> {
+    public double[] call(double[] a, double[] b) {
+      double[] result = new double[D];
+      for (int j = 0; j < D; j++) {
+        result[j] = a[j] + b[j];
+      }
+      return result;
+    }
+  }
+
+  static class ComputeGradient extends Function<DataPoint, double[]> {
+    double[] weights;
+
+    public ComputeGradient(double[] weights) {
+      this.weights = weights;
+    }
+
+    public double[] call(DataPoint p) {
+      double[] gradient = new double[D];
+      for (int i = 0; i < D; i++) {
+        double dot = dot(weights, p.x);
+        gradient[i] = (1 / (1 + Math.exp(-p.y * dot)) - 1) * p.y * p.x[i];
+      }
+      return gradient;
+    }
+  }
+
+  public static double dot(double[] a, double[] b) {
+    double x = 0;
+    for (int i = 0; i < D; i++) {
+      x += a[i] * b[i];
+    }
+    return x;
+  }
+
+  public static void printWeights(double[] a) {
+    System.out.println(Arrays.toString(a));
+  }
+
+  public static void main(String[] args) {
+
+    if (args.length < 3) {
+      System.err.println("Usage: JavaHdfsLR <master> <file> <iters>");
+      System.exit(1);
+    }
+
+    JavaSparkContext sc = new JavaSparkContext(args[0], "JavaHdfsLR",
+        System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR"));
+    JavaRDD<String> lines = sc.textFile(args[1]);
+    JavaRDD<DataPoint> points = lines.map(new ParsePoint()).cache();
+    int ITERATIONS = Integer.parseInt(args[2]);
+
+    // Initialize w to a random value
+    double[] w = new double[D];
+    for (int i = 0; i < D; i++) {
+      w[i] = 2 * rand.nextDouble() - 1;
+    }
+
+    System.out.print("Initial w: ");
+    printWeights(w);
+
+    for (int i = 1; i <= ITERATIONS; i++) {
+      System.out.println("On iteration " + i);
+
+      double[] gradient = points.map(
+        new ComputeGradient(w)
+      ).reduce(new VectorSum());
+
+      for (int j = 0; j < D; j++) {
+        w[j] -= gradient[j];
+      }
+
+    }
+
+    System.out.print("Final w: ");
+    printWeights(w);
+    System.exit(0);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/46eecd11/examples/src/main/java/org/apache/spark/examples/JavaKMeans.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/JavaKMeans.java b/examples/src/main/java/org/apache/spark/examples/JavaKMeans.java
new file mode 100644
index 0000000..5a6afe7
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/JavaKMeans.java
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples;
+
+import scala.Tuple2;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.api.java.function.PairFunction;
+import org.apache.spark.util.Vector;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ * K-means clustering using Java API.
+ */
+public class JavaKMeans {
+
+  /** Parses numbers split by whitespace to a vector */
+  static Vector parseVector(String line) {
+    String[] splits = line.split(" ");
+    double[] data = new double[splits.length];
+    int i = 0;
+    for (String s : splits)
+      data[i] = Double.parseDouble(splits[i++]);
+    return new Vector(data);
+  }
+
+  /** Computes the vector to which the input vector is closest using squared distance */
+  static int closestPoint(Vector p, List<Vector> centers) {
+    int bestIndex = 0;
+    double closest = Double.POSITIVE_INFINITY;
+    for (int i = 0; i < centers.size(); i++) {
+      double tempDist = p.squaredDist(centers.get(i));
+      if (tempDist < closest) {
+        closest = tempDist;
+        bestIndex = i;
+      }
+    }
+    return bestIndex;
+  }
+
+  /** Computes the mean across all vectors in the input set of vectors */
+  static Vector average(List<Vector> ps) {
+    int numVectors = ps.size();
+    Vector out = new Vector(ps.get(0).elements());
+    // start from i = 1 since we already copied index 0 above
+    for (int i = 1; i < numVectors; i++) {
+      out.addInPlace(ps.get(i));
+    }
+    return out.divide(numVectors);
+  }
+
+  public static void main(String[] args) throws Exception {
+    if (args.length < 4) {
+      System.err.println("Usage: JavaKMeans <master> <file> <k> <convergeDist>");
+      System.exit(1);
+    }
+    JavaSparkContext sc = new JavaSparkContext(args[0], "JavaKMeans",
+      System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR"));
+    String path = args[1];
+    int K = Integer.parseInt(args[2]);
+    double convergeDist = Double.parseDouble(args[3]);
+
+    JavaRDD<Vector> data = sc.textFile(path).map(
+      new Function<String, Vector>() {
+        @Override
+        public Vector call(String line) throws Exception {
+          return parseVector(line);
+        }
+      }
+    ).cache();
+
+    final List<Vector> centroids = data.takeSample(false, K, 42);
+
+    double tempDist;
+    do {
+      // allocate each vector to closest centroid
+      JavaPairRDD<Integer, Vector> closest = data.map(
+        new PairFunction<Vector, Integer, Vector>() {
+          @Override
+          public Tuple2<Integer, Vector> call(Vector vector) throws Exception {
+            return new Tuple2<Integer, Vector>(
+              closestPoint(vector, centroids), vector);
+          }
+        }
+      );
+
+      // group by cluster id and average the vectors within each cluster to compute centroids
+      JavaPairRDD<Integer, List<Vector>> pointsGroup = closest.groupByKey();
+      Map<Integer, Vector> newCentroids = pointsGroup.mapValues(
+        new Function<List<Vector>, Vector>() {
+          public Vector call(List<Vector> ps) throws Exception {
+            return average(ps);
+          }
+        }).collectAsMap();
+      tempDist = 0.0;
+      for (int i = 0; i < K; i++) {
+        tempDist += centroids.get(i).squaredDist(newCentroids.get(i));
+      }
+      for (Map.Entry<Integer, Vector> t: newCentroids.entrySet()) {
+        centroids.set(t.getKey(), t.getValue());
+      }
+      System.out.println("Finished iteration (delta = " + tempDist + ")");
+    } while (tempDist > convergeDist);
+
+    System.out.println("Final centers:");
+    for (Vector c : centroids)
+      System.out.println(c);
+
+    System.exit(0);
+
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/46eecd11/examples/src/main/java/org/apache/spark/examples/JavaLogQuery.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/JavaLogQuery.java b/examples/src/main/java/org/apache/spark/examples/JavaLogQuery.java
new file mode 100644
index 0000000..152f029
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/JavaLogQuery.java
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples;
+
+import com.google.common.collect.Lists;
+import scala.Tuple2;
+import scala.Tuple3;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.api.java.function.PairFunction;
+
+import java.io.Serializable;
+import java.util.Collections;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Executes a roll up-style query against Apache logs.
+ */
+public class JavaLogQuery {
+
+  public static List<String> exampleApacheLogs = Lists.newArrayList(
+    "10.10.10.10 - \"FRED\" [18/Jan/2013:17:56:07 +1100] \"GET http://images.com/2013/Generic.jpg " +
+      "HTTP/1.1\" 304 315 \"http://referall.com/\" \"Mozilla/4.0 (compatible; MSIE 7.0; " +
+      "Windows NT 5.1; GTB7.4; .NET CLR 2.0.50727; .NET CLR 3.0.04506.30; .NET CLR 3.0.04506.648; " +
+      ".NET CLR 3.5.21022; .NET CLR 3.0.4506.2152; .NET CLR 1.0.3705; .NET CLR 1.1.4322; .NET CLR " +
+      "3.5.30729; Release=ARP)\" \"UD-1\" - \"image/jpeg\" \"whatever\" 0.350 \"-\" - \"\" 265 923 934 \"\" " +
+      "62.24.11.25 images.com 1358492167 - Whatup",
+    "10.10.10.10 - \"FRED\" [18/Jan/2013:18:02:37 +1100] \"GET http://images.com/2013/Generic.jpg " +
+      "HTTP/1.1\" 304 306 \"http:/referall.com\" \"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; " +
+      "GTB7.4; .NET CLR 2.0.50727; .NET CLR 3.0.04506.30; .NET CLR 3.0.04506.648; .NET CLR " +
+      "3.5.21022; .NET CLR 3.0.4506.2152; .NET CLR 1.0.3705; .NET CLR 1.1.4322; .NET CLR  " +
+      "3.5.30729; Release=ARP)\" \"UD-1\" - \"image/jpeg\" \"whatever\" 0.352 \"-\" - \"\" 256 977 988 \"\" " +
+      "0 73.23.2.15 images.com 1358492557 - Whatup");
+
+  public static Pattern apacheLogRegex = Pattern.compile(
+    "^([\\d.]+) (\\S+) (\\S+) \\[([\\w\\d:/]+\\s[+\\-]\\d{4})\\] \"(.+?)\" (\\d{3}) ([\\d\\-]+) \"([^\"]+)\" \"([^\"]+)\".*");
+
+  /** Tracks the total query count and number of aggregate bytes for a particular group. */
+  public static class Stats implements Serializable {
+
+    private int count;
+    private int numBytes;
+
+    public Stats(int count, int numBytes) {
+      this.count = count;
+      this.numBytes = numBytes;
+    }
+    public Stats merge(Stats other) {
+      return new Stats(count + other.count, numBytes + other.numBytes);
+    }
+
+    public String toString() {
+      return String.format("bytes=%s\tn=%s", numBytes, count);
+    }
+  }
+
+  public static Tuple3<String, String, String> extractKey(String line) {
+    Matcher m = apacheLogRegex.matcher(line);
+    List<String> key = Collections.emptyList();
+    if (m.find()) {
+      String ip = m.group(1);
+      String user = m.group(3);
+      String query = m.group(5);
+      if (!user.equalsIgnoreCase("-")) {
+        return new Tuple3<String, String, String>(ip, user, query);
+      }
+    }
+    return new Tuple3<String, String, String>(null, null, null);
+  }
+
+  public static Stats extractStats(String line) {
+    Matcher m = apacheLogRegex.matcher(line);
+    if (m.find()) {
+      int bytes = Integer.parseInt(m.group(7));
+      return new Stats(1, bytes);
+    }
+    else
+      return new Stats(1, 0);
+  }
+
+  public static void main(String[] args) throws Exception {
+    if (args.length == 0) {
+      System.err.println("Usage: JavaLogQuery <master> [logFile]");
+      System.exit(1);
+    }
+
+    JavaSparkContext jsc = new JavaSparkContext(args[0], "JavaLogQuery",
+      System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR"));
+
+    JavaRDD<String> dataSet = (args.length == 2) ? jsc.textFile(args[1]) : jsc.parallelize(exampleApacheLogs);
+
+    JavaPairRDD<Tuple3<String, String, String>, Stats> extracted = dataSet.map(new PairFunction<String, Tuple3<String, String, String>, Stats>() {
+      @Override
+      public Tuple2<Tuple3<String, String, String>, Stats> call(String s) throws Exception {
+        return new Tuple2<Tuple3<String, String, String>, Stats>(extractKey(s), extractStats(s));
+      }
+    });
+
+    JavaPairRDD<Tuple3<String, String, String>, Stats> counts = extracted.reduceByKey(new Function2<Stats, Stats, Stats>() {
+      @Override
+      public Stats call(Stats stats, Stats stats2) throws Exception {
+        return stats.merge(stats2);
+      }
+    });
+
+    List<Tuple2<Tuple3<String, String, String>, Stats>> output = counts.collect();
+    for (Tuple2 t : output) {
+      System.out.println(t._1 + "\t" + t._2);
+    }
+    System.exit(0);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/46eecd11/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java b/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java
new file mode 100644
index 0000000..c5603a6
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples;
+
+import scala.Tuple2;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.FlatMapFunction;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.api.java.function.PairFlatMapFunction;
+import org.apache.spark.api.java.function.PairFunction;
+
+import java.util.List;
+import java.util.ArrayList;
+
+/**
+ * Computes the PageRank of URLs from an input file. Input file should
+ * be in format of:
+ * URL         neighbor URL
+ * URL         neighbor URL
+ * URL         neighbor URL
+ * ...
+ * where URL and their neighbors are separated by space(s).
+ */
+public class JavaPageRank {
+  private static class Sum extends Function2<Double, Double, Double> {
+    @Override
+    public Double call(Double a, Double b) {
+      return a + b;
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    if (args.length < 3) {
+      System.err.println("Usage: JavaPageRank <master> <file> <number_of_iterations>");
+      System.exit(1);
+    }
+
+    JavaSparkContext ctx = new JavaSparkContext(args[0], "JavaPageRank",
+      System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR"));
+
+    // Loads in input file. It should be in format of:
+    //     URL         neighbor URL
+    //     URL         neighbor URL
+    //     URL         neighbor URL
+    //     ...
+    JavaRDD<String> lines = ctx.textFile(args[1], 1);
+
+    // Loads all URLs from input file and initialize their neighbors.
+    JavaPairRDD<String, List<String>> links = lines.map(new PairFunction<String, String, String>() {
+      @Override
+      public Tuple2<String, String> call(String s) {
+        String[] parts = s.split("\\s+");
+        return new Tuple2<String, String>(parts[0], parts[1]);
+      }
+    }).distinct().groupByKey().cache();
+
+    // Loads all URLs with other URL(s) link to from input file and initialize ranks of them to one.
+    JavaPairRDD<String, Double> ranks = links.mapValues(new Function<List<String>, Double>() {
+      @Override
+      public Double call(List<String> rs) throws Exception {
+        return 1.0;
+      }
+    });
+
+    // Calculates and updates URL ranks continuously using PageRank algorithm.
+    for (int current = 0; current < Integer.parseInt(args[2]); current++) {
+      // Calculates URL contributions to the rank of other URLs.
+      JavaPairRDD<String, Double> contribs = links.join(ranks).values()
+        .flatMap(new PairFlatMapFunction<Tuple2<List<String>, Double>, String, Double>() {
+          @Override
+          public Iterable<Tuple2<String, Double>> call(Tuple2<List<String>, Double> s) {
+            List<Tuple2<String, Double>> results = new ArrayList<Tuple2<String, Double>>();
+            for (String n : s._1) {
+              results.add(new Tuple2<String, Double>(n, s._2 / s._1.size()));
+            }
+            return results;
+          }
+      });
+
+      // Re-calculates URL ranks based on neighbor contributions.
+      ranks = contribs.reduceByKey(new Sum()).mapValues(new Function<Double, Double>() {
+        @Override
+        public Double call(Double sum) throws Exception {
+          return 0.15 + sum * 0.85;
+        }
+      });
+    }
+
+    // Collects all URL ranks and dump them to console.
+    List<Tuple2<String, Double>> output = ranks.collect();
+    for (Tuple2 tuple : output) {
+        System.out.println(tuple._1 + " has rank: " + tuple._2 + ".");
+    }
+
+    System.exit(0);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/46eecd11/examples/src/main/java/org/apache/spark/examples/JavaSparkPi.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/JavaSparkPi.java b/examples/src/main/java/org/apache/spark/examples/JavaSparkPi.java
new file mode 100644
index 0000000..4a2380c
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/JavaSparkPi.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.api.java.function.Function2;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/** Computes an approximation to pi */
+public class JavaSparkPi {
+
+
+  public static void main(String[] args) throws Exception {
+    if (args.length == 0) {
+      System.err.println("Usage: JavaLogQuery <master> [slices]");
+      System.exit(1);
+    }
+
+    JavaSparkContext jsc = new JavaSparkContext(args[0], "JavaLogQuery",
+      System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR"));
+
+    int slices = (args.length == 2) ? Integer.parseInt(args[1]) : 2;
+    int n = 100000 * slices;
+    List<Integer> l = new ArrayList<Integer>(n);
+    for (int i = 0; i < n; i++)
+      l.add(i);
+
+    JavaRDD<Integer> dataSet = jsc.parallelize(l, slices);
+
+    int count = dataSet.map(new Function<Integer, Integer>() {
+      @Override
+      public Integer call(Integer integer) throws Exception {
+        double x = Math.random() * 2 - 1;
+        double y = Math.random() * 2 - 1;
+        return (x * x + y * y < 1) ? 1 : 0;
+      }
+    }).reduce(new Function2<Integer, Integer, Integer>() {
+      @Override
+      public Integer call(Integer integer, Integer integer2) throws Exception {
+        return integer + integer2;
+      }
+    });
+
+    System.out.println("Pi is roughly " + 4.0 * count / n);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/46eecd11/examples/src/main/java/org/apache/spark/examples/JavaTC.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/JavaTC.java b/examples/src/main/java/org/apache/spark/examples/JavaTC.java
new file mode 100644
index 0000000..17f21f6
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/JavaTC.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples;
+
+import scala.Tuple2;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.PairFunction;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+
+/**
+ * Transitive closure on a graph, implemented in Java.
+ */
+public class JavaTC {
+
+  static int numEdges = 200;
+  static int numVertices = 100;
+  static Random rand = new Random(42);
+
+  static List<Tuple2<Integer, Integer>> generateGraph() {
+    Set<Tuple2<Integer, Integer>> edges = new HashSet<Tuple2<Integer, Integer>>(numEdges);
+    while (edges.size() < numEdges) {
+      int from = rand.nextInt(numVertices);
+      int to = rand.nextInt(numVertices);
+      Tuple2<Integer, Integer> e = new Tuple2<Integer, Integer>(from, to);
+      if (from != to) edges.add(e);
+    }
+    return new ArrayList<Tuple2<Integer, Integer>>(edges);
+  }
+
+  static class ProjectFn extends PairFunction<Tuple2<Integer, Tuple2<Integer, Integer>>,
+      Integer, Integer> {
+    static ProjectFn INSTANCE = new ProjectFn();
+
+    public Tuple2<Integer, Integer> call(Tuple2<Integer, Tuple2<Integer, Integer>> triple) {
+      return new Tuple2<Integer, Integer>(triple._2()._2(), triple._2()._1());
+    }
+  }
+
+  public static void main(String[] args) {
+    if (args.length == 0) {
+      System.err.println("Usage: JavaTC <host> [<slices>]");
+      System.exit(1);
+    }
+
+    JavaSparkContext sc = new JavaSparkContext(args[0], "JavaTC",
+        System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR"));
+    Integer slices = (args.length > 1) ? Integer.parseInt(args[1]): 2;
+    JavaPairRDD<Integer, Integer> tc = sc.parallelizePairs(generateGraph(), slices).cache();
+
+    // Linear transitive closure: each round grows paths by one edge,
+    // by joining the graph's edges with the already-discovered paths.
+    // e.g. join the path (y, z) from the TC with the edge (x, y) from
+    // the graph to obtain the path (x, z).
+
+    // Because join() joins on keys, the edges are stored in reversed order.
+    JavaPairRDD<Integer, Integer> edges = tc.map(
+      new PairFunction<Tuple2<Integer, Integer>, Integer, Integer>() {
+        public Tuple2<Integer, Integer> call(Tuple2<Integer, Integer> e) {
+          return new Tuple2<Integer, Integer>(e._2(), e._1());
+        }
+    });
+
+    long oldCount = 0;
+    long nextCount = tc.count();
+    do {
+      oldCount = nextCount;
+      // Perform the join, obtaining an RDD of (y, (z, x)) pairs,
+      // then project the result to obtain the new (x, z) paths.
+      tc = tc.union(tc.join(edges).map(ProjectFn.INSTANCE)).distinct().cache();
+      nextCount = tc.count();
+    } while (nextCount != oldCount);
+
+    System.out.println("TC has " + tc.count() + " edges.");
+    System.exit(0);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/46eecd11/examples/src/main/java/org/apache/spark/examples/JavaWordCount.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/JavaWordCount.java b/examples/src/main/java/org/apache/spark/examples/JavaWordCount.java
new file mode 100644
index 0000000..07d32ad
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/JavaWordCount.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples;
+
+import scala.Tuple2;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.FlatMapFunction;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.api.java.function.PairFunction;
+
+import java.util.Arrays;
+import java.util.List;
+
+public class JavaWordCount {
+  public static void main(String[] args) throws Exception {
+    if (args.length < 2) {
+      System.err.println("Usage: JavaWordCount <master> <file>");
+      System.exit(1);
+    }
+
+    JavaSparkContext ctx = new JavaSparkContext(args[0], "JavaWordCount",
+        System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR"));
+    JavaRDD<String> lines = ctx.textFile(args[1], 1);
+
+    JavaRDD<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
+      public Iterable<String> call(String s) {
+        return Arrays.asList(s.split(" "));
+      }
+    });
+    
+    JavaPairRDD<String, Integer> ones = words.map(new PairFunction<String, String, Integer>() {
+      public Tuple2<String, Integer> call(String s) {
+        return new Tuple2<String, Integer>(s, 1);
+      }
+    });
+    
+    JavaPairRDD<String, Integer> counts = ones.reduceByKey(new Function2<Integer, Integer, Integer>() {
+      public Integer call(Integer i1, Integer i2) {
+        return i1 + i2;
+      }
+    });
+
+    List<Tuple2<String, Integer>> output = counts.collect();
+    for (Tuple2 tuple : output) {
+      System.out.println(tuple._1 + ": " + tuple._2);
+    }
+    System.exit(0);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/46eecd11/examples/src/main/java/org/apache/spark/mllib/examples/JavaALS.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/mllib/examples/JavaALS.java b/examples/src/main/java/org/apache/spark/mllib/examples/JavaALS.java
new file mode 100644
index 0000000..628cb89
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/mllib/examples/JavaALS.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.examples;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+
+import org.apache.spark.mllib.recommendation.ALS;
+import org.apache.spark.mllib.recommendation.MatrixFactorizationModel;
+import org.apache.spark.mllib.recommendation.Rating;
+
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.StringTokenizer;
+
+import scala.Tuple2;
+
+/**
+ * Example using MLLib ALS from Java.
+ */
+public class  JavaALS {
+
+  static class ParseRating extends Function<String, Rating> {
+    public Rating call(String line) {
+      StringTokenizer tok = new StringTokenizer(line, ",");
+      int x = Integer.parseInt(tok.nextToken());
+      int y = Integer.parseInt(tok.nextToken());
+      double rating = Double.parseDouble(tok.nextToken());
+      return new Rating(x, y, rating);
+    }
+  }
+
+  static class FeaturesToString extends Function<Tuple2<Object, double[]>, String> {
+    public String call(Tuple2<Object, double[]> element) {
+      return element._1().toString() + "," + Arrays.toString(element._2());
+    }
+  }
+
+  public static void main(String[] args) {
+
+    if (args.length != 5 && args.length != 6) {
+      System.err.println(
+          "Usage: JavaALS <master> <ratings_file> <rank> <iterations> <output_dir> [<blocks>]");
+      System.exit(1);
+    }
+
+    int rank = Integer.parseInt(args[2]);
+    int iterations = Integer.parseInt(args[3]);
+    String outputDir = args[4];
+    int blocks = -1;
+    if (args.length == 6) {
+      blocks = Integer.parseInt(args[5]);
+    }
+
+    JavaSparkContext sc = new JavaSparkContext(args[0], "JavaALS",
+        System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR"));
+    JavaRDD<String> lines = sc.textFile(args[1]);
+
+    JavaRDD<Rating> ratings = lines.map(new ParseRating());
+
+    MatrixFactorizationModel model = ALS.train(ratings.rdd(), rank, iterations, 0.01, blocks);
+
+    model.userFeatures().toJavaRDD().map(new FeaturesToString()).saveAsTextFile(
+        outputDir + "/userFeatures");
+    model.productFeatures().toJavaRDD().map(new FeaturesToString()).saveAsTextFile(
+        outputDir + "/productFeatures");
+    System.out.println("Final user/product features written to " + outputDir);
+
+    System.exit(0);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/46eecd11/examples/src/main/java/org/apache/spark/mllib/examples/JavaKMeans.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/mllib/examples/JavaKMeans.java b/examples/src/main/java/org/apache/spark/mllib/examples/JavaKMeans.java
new file mode 100644
index 0000000..cd59a13
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/mllib/examples/JavaKMeans.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.examples;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+
+import org.apache.spark.mllib.clustering.KMeans;
+import org.apache.spark.mllib.clustering.KMeansModel;
+
+import java.util.Arrays;
+import java.util.StringTokenizer;
+
+/**
+ * Example using MLLib KMeans from Java.
+ */
+public class JavaKMeans {
+
+  static class ParsePoint extends Function<String, double[]> {
+    public double[] call(String line) {
+      StringTokenizer tok = new StringTokenizer(line, " ");
+      int numTokens = tok.countTokens();
+      double[] point = new double[numTokens];
+      for (int i = 0; i < numTokens; ++i) {
+        point[i] = Double.parseDouble(tok.nextToken());
+      }
+      return point;
+    }
+  }
+
+  public static void main(String[] args) {
+
+    if (args.length < 4) {
+      System.err.println(
+          "Usage: JavaKMeans <master> <input_file> <k> <max_iterations> [<runs>]");
+      System.exit(1);
+    }
+
+    String inputFile = args[1];
+    int k = Integer.parseInt(args[2]);
+    int iterations = Integer.parseInt(args[3]);
+    int runs = 1;
+
+    if (args.length >= 5) {
+      runs = Integer.parseInt(args[4]);
+    }
+
+    JavaSparkContext sc = new JavaSparkContext(args[0], "JavaKMeans",
+        System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR"));
+    JavaRDD<String> lines = sc.textFile(args[1]);
+
+    JavaRDD<double[]> points = lines.map(new ParsePoint());
+
+    KMeansModel model = KMeans.train(points.rdd(), k, iterations, runs);
+
+    System.out.println("Cluster centers:");
+    for (double[] center : model.clusterCenters()) {
+      System.out.println(" " + Arrays.toString(center));
+    }
+    double cost = model.computeCost(points.rdd());
+    System.out.println("Cost: " + cost);
+
+    System.exit(0);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/46eecd11/examples/src/main/java/org/apache/spark/mllib/examples/JavaLR.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/mllib/examples/JavaLR.java b/examples/src/main/java/org/apache/spark/mllib/examples/JavaLR.java
new file mode 100644
index 0000000..258061c
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/mllib/examples/JavaLR.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.examples;
+
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+
+import org.apache.spark.mllib.classification.LogisticRegressionWithSGD;
+import org.apache.spark.mllib.classification.LogisticRegressionModel;
+import org.apache.spark.mllib.regression.LabeledPoint;
+
+import java.util.Arrays;
+import java.util.StringTokenizer;
+
+/**
+ * Logistic regression based classification using ML Lib.
+ */
+public class JavaLR {
+
+  static class ParsePoint extends Function<String, LabeledPoint> {
+    public LabeledPoint call(String line) {
+      String[] parts = line.split(",");
+      double y = Double.parseDouble(parts[0]);
+      StringTokenizer tok = new StringTokenizer(parts[1], " ");
+      int numTokens = tok.countTokens();
+      double[] x = new double[numTokens];
+      for (int i = 0; i < numTokens; ++i) {
+        x[i] = Double.parseDouble(tok.nextToken());
+      }
+      return new LabeledPoint(y, x);
+    }
+  }
+
+  public static void printWeights(double[] a) {
+    System.out.println(Arrays.toString(a));
+  }
+
+  public static void main(String[] args) {
+    if (args.length != 4) {
+      System.err.println("Usage: JavaLR <master> <input_dir> <step_size> <niters>");
+      System.exit(1);
+    }
+
+    JavaSparkContext sc = new JavaSparkContext(args[0], "JavaLR",
+        System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR"));
+    JavaRDD<String> lines = sc.textFile(args[1]);
+    JavaRDD<LabeledPoint> points = lines.map(new ParsePoint()).cache();
+    double stepSize = Double.parseDouble(args[2]);
+    int iterations = Integer.parseInt(args[3]);
+
+    // Another way to configure LogisticRegression
+    //
+    // LogisticRegressionWithSGD lr = new LogisticRegressionWithSGD();
+    // lr.optimizer().setNumIterations(iterations)
+    //               .setStepSize(stepSize)
+    //               .setMiniBatchFraction(1.0);
+    // lr.setIntercept(true);
+    // LogisticRegressionModel model = lr.train(points.rdd());
+
+    LogisticRegressionModel model = LogisticRegressionWithSGD.train(points.rdd(),
+        iterations, stepSize);
+
+    System.out.print("Final w: ");
+    printWeights(model.weights());
+
+    System.exit(0);
+  }
+}


Mime
View raw message