horn-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From zjaf...@apache.org
Subject [1/3] incubator-horn git commit: 1. Separate master worker from slave workers. 2. Make master worker a dedicated Merger. 3. Fails when peer count < 2.
Date Tue, 02 Feb 2016 23:59:03 GMT
Repository: incubator-horn
Updated Branches:
  refs/heads/master 99c3f4e8a -> 91c0c796e


1. Separate master worker from slave workers. 2. Make master worker a dedicated Merger. 3.
Fails when peer count < 2.


Project: http://git-wip-us.apache.org/repos/asf/incubator-horn/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-horn/commit/a8e7ff85
Tree: http://git-wip-us.apache.org/repos/asf/incubator-horn/tree/a8e7ff85
Diff: http://git-wip-us.apache.org/repos/asf/incubator-horn/diff/a8e7ff85

Branch: refs/heads/master
Commit: a8e7ff85e395855e4e292979e11732f207430e38
Parents: 99c3f4e
Author: Lee Dongjin <dongjin.lee.kr@gmail.com>
Authored: Sun Jan 31 22:11:56 2016 +0900
Committer: Lee Dongjin <dongjin.lee.kr@gmail.com>
Committed: Mon Feb 1 00:07:37 2016 +0900

----------------------------------------------------------------------
 .../bsp/SmallLayeredNeuralNetworkTrainer.java   | 31 +++++++++++++++-----
 1 file changed, 24 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-horn/blob/a8e7ff85/src/main/java/org/apache/horn/bsp/SmallLayeredNeuralNetworkTrainer.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/horn/bsp/SmallLayeredNeuralNetworkTrainer.java b/src/main/java/org/apache/horn/bsp/SmallLayeredNeuralNetworkTrainer.java
index 132ec8c..002a9e5 100644
--- a/src/main/java/org/apache/horn/bsp/SmallLayeredNeuralNetworkTrainer.java
+++ b/src/main/java/org/apache/horn/bsp/SmallLayeredNeuralNetworkTrainer.java
@@ -17,7 +17,7 @@
  */
 package org.apache.horn.bsp;
 
-import java.io.IOException;
+import com.google.common.base.Preconditions;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.LongWritable;
@@ -31,6 +31,8 @@ import org.apache.hama.commons.math.DoubleMatrix;
 import org.apache.hama.commons.math.DoubleVector;
 import org.mortbay.log.Log;
 
+import java.io.IOException;
+
 /**
  * The trainer that train the {@link SmallLayeredNeuralNetwork} based on BSP
  * framework.
@@ -55,13 +57,26 @@ public final class SmallLayeredNeuralNetworkTrainer
 
   private String modelPath;
 
+  /**
+   * Returns true if this worker is master worker.
+   *
+   * @param peer
+   * */
+  private boolean isMaster(
+      BSPPeer<LongWritable, VectorWritable, NullWritable, NullWritable, SmallLayeredNeuralNetworkMessage>
peer) {
+    return peer.getPeerIndex() == 0;
+  }
+
   @Override
   /**
    * If the model path is specified, load the existing from storage location.
    */
   public void setup(
       BSPPeer<LongWritable, VectorWritable, NullWritable, NullWritable, SmallLayeredNeuralNetworkMessage>
peer) {
-    if (peer.getPeerIndex() == 0) {
+    // At least one master & slave worker exist.
+    Preconditions.checkArgument(peer.getNumPeers() >= 2);
+
+    if (isMaster(peer)) {
       Log.info("Begin to train");
     }
     this.isConverge = false;
@@ -84,7 +99,7 @@ public final class SmallLayeredNeuralNetworkTrainer
   public void cleanup(
       BSPPeer<LongWritable, VectorWritable, NullWritable, NullWritable, SmallLayeredNeuralNetworkMessage>
peer) {
     // write model to modelPath
-    if (peer.getPeerIndex() == 0) {
+    if (isMaster(peer)) {
       try {
         Log.info(String.format("End of training, number of iterations: %d.\n",
             this.iterations));
@@ -102,12 +117,14 @@ public final class SmallLayeredNeuralNetworkTrainer
       BSPPeer<LongWritable, VectorWritable, NullWritable, NullWritable, SmallLayeredNeuralNetworkMessage>
peer)
       throws IOException, SyncException, InterruptedException {
     while (this.iterations++ < maxIterations) {
-      // each groom calculate the matrices updates according to local data
-      calculateUpdates(peer);
+      // each slave-worker calculate the matrices updates according to local data
+      if (!isMaster(peer)) {
+        calculateUpdates(peer);
+      }
       peer.sync();
 
       // master merge the updates model
-      if (peer.getPeerIndex() == 0) {
+      if (isMaster(peer)) {
         mergeUpdates(peer);
       }
       peer.sync();
@@ -188,7 +205,7 @@ public final class SmallLayeredNeuralNetworkTrainer
     int numMessages = peer.getNumCurrentMessages();
     boolean isConverge = false;
     if (numMessages == 0) { // converges
-      isConverge = true;
+      this.isConverge = true;
       return;
     }
 


Mime
View raw message