singa-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From wang...@apache.org
Subject [2/2] incubator-singa git commit: SINGA-33 Automatically launch a number of processes in the cluster
Date Tue, 14 Jul 2015 11:26:17 GMT
SINGA-33 Automatically launch a number of processes in the cluster

1. add conf/hostfile
  This is the global list of hosts that singa can use.
2. add tool/gen_hosts.py
  This scripts is to generate a host list for a specific singa job.
3. all hostfiles in examples are removed

By running singa-run.sh script, the gen_hosts.py will read
the cluster.conf and generate a job.hosts in the same dir.
The job.hosts contains the host list that runs this job.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/acb96d4c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/acb96d4c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/acb96d4c

Branch: refs/heads/master
Commit: acb96d4cd9682c4a9f2b85d529ca5d4ce97a0605
Parents: 9a6e09f
Author: wang sheng <wangsheng1001@gmail.com>
Authored: Wed Jul 15 00:56:16 2015 +0800
Committer: wang sheng <wangsheng1001@gmail.com>
Committed: Wed Jul 15 02:55:01 2015 +0800

----------------------------------------------------------------------
 .gitignore                |  9 +++++---
 bin/singa-run.sh          | 22 ++++++++----------
 bin/singa-stop.sh         |  4 ++--
 conf/hostfile             |  1 +
 examples/cifar10/hostfile |  1 -
 examples/mnist/hostfile   |  8 -------
 tool/gen_hosts.py         | 52 ++++++++++++++++++++++++++++++++++++++++++
 tool/plot/__init__.py     |  0
 8 files changed, 71 insertions(+), 26 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acb96d4c/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index 962f6b9..a419725 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,14 +14,17 @@
 *.cproject
 *.log
 *.nfs*
+*_pb2.py
+*.pyc
+*.pb.h
+*.pb.cc
+*.hosts
+*.out
 src/test/data/*
 tmp
 log*
 build/
 tmp/
-include/proto/*.h
-src/proto/*.cc
-src/proto/*.pb.h
 .sync
 *lmdb
 *.binaryproto

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acb96d4c/bin/singa-run.sh
----------------------------------------------------------------------
diff --git a/bin/singa-run.sh b/bin/singa-run.sh
index 46ed715..45be0a1 100755
--- a/bin/singa-run.sh
+++ b/bin/singa-run.sh
@@ -39,7 +39,7 @@ if [ $# = 1 ] ; then
   if [[ $1 = "-conf="* ]] ; then
     valid_args=true
     conf_path=${1:6}
-    host_path=$conf_path/hostfile
+    host_path=$conf_path/job.hosts
   fi
 elif [ $# = 2 ] ; then
   if [[ $1 = "-cluster="* ]] && [[ $2 = "-model="*  ]] ; then
@@ -62,11 +62,7 @@ BASE=`cd "$BIN/..">/dev/null; pwd`
 cd $BASE
 
 # clenup singa data
-if [ -z $host_path ] ; then
-  $BIN/singa-stop.sh 
-else
-  $BIN/singa-stop.sh $host_path
-fi
+$BIN/singa-stop.sh conf/hostfile
 
 # start zookeeper
 $BIN/zk-service.sh start 2>/dev/null
@@ -76,12 +72,18 @@ sleep 3
 
 # check mode
 if [ $# = 2 ] ; then
-  # start singa process
+  # start single singa process
   cmd="./singa "$@
   echo starting singa ...
   echo executing : $cmd
   $cmd
 elif [ $# = 1 ] ; then
+  # start multiple singa processes
+  # generate host file
+  cmd=" python tool/gen_hosts.py -conf=$conf_path/cluster.conf \
+    -src=conf/hostfile -dst=$host_path"
+  echo $cmd
+  $cmd
   # ssh and start singa processes
   ssh_options="-oStrictHostKeyChecking=no \
   -oUserKnownHostsFile=/dev/null \
@@ -102,8 +104,4 @@ elif [ $# = 1 ] ; then
 fi
 
 # cleanup singa data
-if [ -z $host_path ] ; then
-  $BIN/singa-stop.sh
-else
-  $BIN/singa-stop.sh $host_path
-fi
+$BIN/singa-stop.sh conf/hostfile

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acb96d4c/bin/singa-stop.sh
----------------------------------------------------------------------
diff --git a/bin/singa-stop.sh b/bin/singa-stop.sh
index ebd74e8..1b36675 100755
--- a/bin/singa-stop.sh
+++ b/bin/singa-stop.sh
@@ -37,7 +37,7 @@ BIN=`cd "$BIN">/dev/null; pwd`
 BASE=`cd "$BIN/..">/dev/null; pwd`
 ZKDATA_DIR="/tmp/zookeeper"
 
-PROC_NAME="lt-singa"
+PROC_NAME="*singa"
 HOST_FILE=$1
 
 
@@ -52,7 +52,7 @@ elif [ $# = 1 ] ; then
   -oLogLevel=quiet"
   hosts=(`cat $HOST_FILE |cut -d ' ' -f 1`)
   for i in ${hosts[@]} ; do
-    cmd="killall -s SIGKILL "$PROC_NAME
+    cmd="killall -s SIGKILL -r "$PROC_NAME
     echo kill singa @ $i ...
     if [ $i == localhost ] ; then
       $cmd

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acb96d4c/conf/hostfile
----------------------------------------------------------------------
diff --git a/conf/hostfile b/conf/hostfile
new file mode 100644
index 0000000..2fbb50c
--- /dev/null
+++ b/conf/hostfile
@@ -0,0 +1 @@
+localhost

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acb96d4c/examples/cifar10/hostfile
----------------------------------------------------------------------
diff --git a/examples/cifar10/hostfile b/examples/cifar10/hostfile
deleted file mode 100644
index 2fbb50c..0000000
--- a/examples/cifar10/hostfile
+++ /dev/null
@@ -1 +0,0 @@
-localhost

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acb96d4c/examples/mnist/hostfile
----------------------------------------------------------------------
diff --git a/examples/mnist/hostfile b/examples/mnist/hostfile
deleted file mode 100644
index 1781444..0000000
--- a/examples/mnist/hostfile
+++ /dev/null
@@ -1,8 +0,0 @@
-192.168.26.10
-192.168.26.11
-192.168.26.12
-192.168.26.13
-192.168.26.15
-192.168.26.16
-192.168.26.17
-192.168.26.18

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acb96d4c/tool/gen_hosts.py
----------------------------------------------------------------------
diff --git a/tool/gen_hosts.py b/tool/gen_hosts.py
new file mode 100755
index 0000000..e2ed29d
--- /dev/null
+++ b/tool/gen_hosts.py
@@ -0,0 +1,52 @@
+#!/usr/bin/env python
+
+import argparse
+import os
+import sys
+from google.protobuf import text_format
+from plot.cluster_pb2 import ClusterProto
+
+# parse command line
+parser = argparse.ArgumentParser(description='Generate host list from host file for a SINGA
job')
+parser.add_argument('-conf', dest='conf', metavar='CONF_FILE', required=True, help='cluster.conf
file')
+parser.add_argument('-src', dest='src', metavar='SRC_FILE', required=True, help='global host
file')
+parser.add_argument('-dst', dest='dst', metavar='DST_FILE', required=True, help='generated
list')
+args = parser.parse_args();
+
+# change to SINGA_HOME
+abspath = os.path.abspath(__file__)
+dname = os.path.dirname(abspath)
+os.chdir(dname+'/..')
+
+# read from .conf file
+fd_conf = open(args.conf, 'r')
+cluster = ClusterProto()
+text_format.Merge(str(fd_conf.read()), cluster)
+nworker_procs = cluster.nworker_groups * cluster.nworkers_per_group / cluster.nworkers_per_procs
+nserver_procs = cluster.nserver_groups * cluster.nservers_per_group / cluster.nservers_per_procs
+nprocs = 0
+if (cluster.server_worker_separate) :
+  nprocs = nworker_procs+nserver_procs
+else:
+  nprocs = max(nworker_procs, nserver_procs)
+fd_conf.close()
+
+# read from source host file
+fd_src = open(args.src, 'r')
+hosts = []
+for line in fd_src:
+  line = line.strip()
+  if len(line) == 0 or line[0] == '#':
+    continue
+  hosts.append(line)
+fd_src.close()
+
+# write to dst file
+num_hosts = len(hosts)
+if (num_hosts == 0):
+  print 'source host file is empty'
+  sys.exit()
+fd_dst = open(args.dst, 'w')
+for i in range(nprocs):
+  fd_dst.write(hosts[i % num_hosts] + '\n')
+fd_dst.close()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acb96d4c/tool/plot/__init__.py
----------------------------------------------------------------------
diff --git a/tool/plot/__init__.py b/tool/plot/__init__.py
new file mode 100644
index 0000000..e69de29


Mime
View raw message