Return-Path: X-Original-To: apmail-horn-dev-archive@minotaur.apache.org Delivered-To: apmail-horn-dev-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id E772B184FC for ; Tue, 2 Feb 2016 23:59:06 +0000 (UTC) Received: (qmail 56551 invoked by uid 500); 2 Feb 2016 23:59:06 -0000 Delivered-To: apmail-horn-dev-archive@horn.apache.org Received: (qmail 56513 invoked by uid 500); 2 Feb 2016 23:59:06 -0000 Mailing-List: contact dev-help@horn.incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@horn.incubator.apache.org Delivered-To: mailing list dev@horn.incubator.apache.org Received: (qmail 56502 invoked by uid 99); 2 Feb 2016 23:59:06 -0000 Received: from Unknown (HELO spamd2-us-west.apache.org) (209.188.14.142) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 02 Feb 2016 23:59:06 +0000 Received: from localhost (localhost [127.0.0.1]) by spamd2-us-west.apache.org (ASF Mail Server at spamd2-us-west.apache.org) with ESMTP id 6AA3F1A0317 for ; Tue, 2 Feb 2016 23:59:06 +0000 (UTC) X-Virus-Scanned: Debian amavisd-new at spamd2-us-west.apache.org X-Spam-Flag: NO X-Spam-Score: -2.121 X-Spam-Level: X-Spam-Status: No, score=-2.121 tagged_above=-999 required=6.31 tests=[KAM_ASCII_DIVIDERS=0.8, KAM_LAZY_DOMAIN_SECURITY=1, RCVD_IN_BRBL_LASTEXT=1.644, RCVD_IN_DNSWL_HI=-5, RCVD_IN_MSPIKE_H3=-0.01, RCVD_IN_MSPIKE_WL=-0.01, RP_MATCHES_RCVD=-0.545] autolearn=disabled Received: from mx1-eu-west.apache.org ([10.40.0.8]) by localhost (spamd2-us-west.apache.org [10.40.0.9]) (amavisd-new, port 10024) with ESMTP id J-FiQahveV4u for ; Tue, 2 Feb 2016 23:59:05 +0000 (UTC) Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx1-eu-west.apache.org (ASF Mail Server at mx1-eu-west.apache.org) with SMTP id 1EAF431AA9 for ; Tue, 2 Feb 2016 23:59:03 +0000 (UTC) Received: (qmail 56394 invoked by uid 99); 2 Feb 2016 23:59:03 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 02 Feb 2016 23:59:03 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 1B91ADFC90; Tue, 2 Feb 2016 23:59:03 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: zjaffee@apache.org To: dev@horn.incubator.apache.org Date: Tue, 02 Feb 2016 23:59:03 -0000 Message-Id: <3852446eb0384f76aa748df0f93c9640@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [1/3] incubator-horn git commit: 1. Separate master worker from slave workers. 2. Make master worker a dedicated Merger. 3. Fails when peer count < 2. Repository: incubator-horn Updated Branches: refs/heads/master 99c3f4e8a -> 91c0c796e 1. Separate master worker from slave workers. 2. Make master worker a dedicated Merger. 3. Fails when peer count < 2. Project: http://git-wip-us.apache.org/repos/asf/incubator-horn/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-horn/commit/a8e7ff85 Tree: http://git-wip-us.apache.org/repos/asf/incubator-horn/tree/a8e7ff85 Diff: http://git-wip-us.apache.org/repos/asf/incubator-horn/diff/a8e7ff85 Branch: refs/heads/master Commit: a8e7ff85e395855e4e292979e11732f207430e38 Parents: 99c3f4e Author: Lee Dongjin Authored: Sun Jan 31 22:11:56 2016 +0900 Committer: Lee Dongjin Committed: Mon Feb 1 00:07:37 2016 +0900 ---------------------------------------------------------------------- .../bsp/SmallLayeredNeuralNetworkTrainer.java | 31 +++++++++++++++----- 1 file changed, 24 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-horn/blob/a8e7ff85/src/main/java/org/apache/horn/bsp/SmallLayeredNeuralNetworkTrainer.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/horn/bsp/SmallLayeredNeuralNetworkTrainer.java b/src/main/java/org/apache/horn/bsp/SmallLayeredNeuralNetworkTrainer.java index 132ec8c..002a9e5 100644 --- a/src/main/java/org/apache/horn/bsp/SmallLayeredNeuralNetworkTrainer.java +++ b/src/main/java/org/apache/horn/bsp/SmallLayeredNeuralNetworkTrainer.java @@ -17,7 +17,7 @@ */ package org.apache.horn.bsp; -import java.io.IOException; +import com.google.common.base.Preconditions; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.LongWritable; @@ -31,6 +31,8 @@ import org.apache.hama.commons.math.DoubleMatrix; import org.apache.hama.commons.math.DoubleVector; import org.mortbay.log.Log; +import java.io.IOException; + /** * The trainer that train the {@link SmallLayeredNeuralNetwork} based on BSP * framework. @@ -55,13 +57,26 @@ public final class SmallLayeredNeuralNetworkTrainer private String modelPath; + /** + * Returns true if this worker is master worker. + * + * @param peer + * */ + private boolean isMaster( + BSPPeer peer) { + return peer.getPeerIndex() == 0; + } + @Override /** * If the model path is specified, load the existing from storage location. */ public void setup( BSPPeer peer) { - if (peer.getPeerIndex() == 0) { + // At least one master & slave worker exist. + Preconditions.checkArgument(peer.getNumPeers() >= 2); + + if (isMaster(peer)) { Log.info("Begin to train"); } this.isConverge = false; @@ -84,7 +99,7 @@ public final class SmallLayeredNeuralNetworkTrainer public void cleanup( BSPPeer peer) { // write model to modelPath - if (peer.getPeerIndex() == 0) { + if (isMaster(peer)) { try { Log.info(String.format("End of training, number of iterations: %d.\n", this.iterations)); @@ -102,12 +117,14 @@ public final class SmallLayeredNeuralNetworkTrainer BSPPeer peer) throws IOException, SyncException, InterruptedException { while (this.iterations++ < maxIterations) { - // each groom calculate the matrices updates according to local data - calculateUpdates(peer); + // each slave-worker calculate the matrices updates according to local data + if (!isMaster(peer)) { + calculateUpdates(peer); + } peer.sync(); // master merge the updates model - if (peer.getPeerIndex() == 0) { + if (isMaster(peer)) { mergeUpdates(peer); } peer.sync(); @@ -188,7 +205,7 @@ public final class SmallLayeredNeuralNetworkTrainer int numMessages = peer.getNumCurrentMessages(); boolean isConverge = false; if (numMessages == 0) { // converges - isConverge = true; + this.isConverge = true; return; }