spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From andrewo...@apache.org
Subject spark git commit: [Spark-4848] Allow different Worker configurations in standalone cluster
Date Tue, 14 Apr 2015 01:21:31 GMT
Repository: spark
Updated Branches:
  refs/heads/master 4898dfa46 -> 435b8779d


[Spark-4848] Allow different Worker configurations in standalone cluster

This refixes #3699 with the latest code.
This fixes SPARK-4848

I've changed the stand-alone cluster scripts to allow different workers to have different
numbers of instances, with both port and web-ui port following allong appropriately.

I did this by moving the loop over instances from start-slaves and stop-slaves (on the master)
to start-slave and stop-slave (on the worker).

Wile I was at it, I changed SPARK_WORKER_PORT to work the same way as SPARK_WORKER_WEBUI_PORT,
since the new methods work fine for both.

Author: Nathan Kronenfeld <nkronenfeld@oculusinfo.com>

Closes #5140 from nkronenfeld/feature/spark-4848 and squashes the following commits:

cf5f47e [Nathan Kronenfeld] Merge remote branch 'upstream/master' into feature/spark-4848
044ca6f [Nathan Kronenfeld] Documentation and formatting as requested by by andrewor14
d739640 [Nathan Kronenfeld] Move looping through instances from the master to the workers,
so that each worker respects its own number of instances and web-ui port


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/435b8779
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/435b8779
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/435b8779

Branch: refs/heads/master
Commit: 435b8779df01a7477addecb1023605957bca4e9b
Parents: 4898dfa
Author: Nathan Kronenfeld <nkronenfeld@oculusinfo.com>
Authored: Mon Apr 13 18:21:16 2015 -0700
Committer: Andrew Or <andrew@databricks.com>
Committed: Mon Apr 13 18:21:16 2015 -0700

----------------------------------------------------------------------
 sbin/start-slave.sh  | 59 ++++++++++++++++++++++++++++++++++++++++++++---
 sbin/start-slaves.sh | 11 +--------
 sbin/stop-slave.sh   | 43 ++++++++++++++++++++++++++++++++++
 sbin/stop-slaves.sh  | 12 +++-------
 4 files changed, 103 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/435b8779/sbin/start-slave.sh
----------------------------------------------------------------------
diff --git a/sbin/start-slave.sh b/sbin/start-slave.sh
index 5a6de11..4c919ff 100755
--- a/sbin/start-slave.sh
+++ b/sbin/start-slave.sh
@@ -18,15 +18,68 @@
 #
 
 # Starts a slave on the machine this script is executed on.
+#
+# Environment Variables
+#
+#   SPARK_WORKER_INSTANCES  The number of worker instances to run on this 
+#                           slave.  Default is 1.
+#   SPARK_WORKER_PORT       The base port number for the first worker. If set, 
+#                           subsequent workers will increment this number.  If
+#                           unset, Spark will find a valid port number, but
+#                           with no guarantee of a predictable pattern.
+#   SPARK_WORKER_WEBUI_PORT The base port for the web interface of the first
+#                           worker.  Subsequent workers will increment this 
+#                           number.  Default is 8081.
 
-usage="Usage: start-slave.sh <worker#> <spark-master-URL> where <spark-master-URL>
is like spark://localhost:7077"
+usage="Usage: start-slave.sh <spark-master-URL> where <spark-master-URL> is like
spark://localhost:7077"
 
-if [ $# -lt 2 ]; then
+if [ $# -lt 1 ]; then
   echo $usage
+  echo Called as start-slave.sh $*
   exit 1
 fi
 
 sbin="`dirname "$0"`"
 sbin="`cd "$sbin"; pwd`"
 
-"$sbin"/spark-daemon.sh start org.apache.spark.deploy.worker.Worker "$@"
+. "$sbin/spark-config.sh"
+
+. "$SPARK_PREFIX/bin/load-spark-env.sh"
+
+# First argument should be the master; we need to store it aside because we may
+# need to insert arguments between it and the other arguments
+MASTER=$1
+shift
+
+# Determine desired worker port
+if [ "$SPARK_WORKER_WEBUI_PORT" = "" ]; then
+  SPARK_WORKER_WEBUI_PORT=8081
+fi
+
+# Start up the appropriate number of workers on this machine.
+# quick local function to start a worker
+function start_instance {
+  WORKER_NUM=$1
+  shift
+
+  if [ "$SPARK_WORKER_PORT" = "" ]; then
+    PORT_FLAG=
+    PORT_NUM=
+  else
+    PORT_FLAG="--port"
+    PORT_NUM=$(( $SPARK_WORKER_PORT + $WORKER_NUM - 1 ))
+  fi
+  WEBUI_PORT=$(( $SPARK_WORKER_WEBUI_PORT + $WORKER_NUM - 1 ))
+
+  "$sbin"/spark-daemon.sh start org.apache.spark.deploy.worker.Worker $WORKER_NUM \
+     --webui-port "$WEBUI_PORT" $PORT_FLAG $PORT_NUM $MASTER "$@"
+}
+
+if [ "$SPARK_WORKER_INSTANCES" = "" ]; then
+  start_instance 1 "$@"
+else
+  for ((i=0; i<$SPARK_WORKER_INSTANCES; i++)); do
+    start_instance $(( 1 + $i )) "$@"
+  done
+fi
+

http://git-wip-us.apache.org/repos/asf/spark/blob/435b8779/sbin/start-slaves.sh
----------------------------------------------------------------------
diff --git a/sbin/start-slaves.sh b/sbin/start-slaves.sh
index 4356c03..24d6268 100755
--- a/sbin/start-slaves.sh
+++ b/sbin/start-slaves.sh
@@ -59,13 +59,4 @@ if [ "$START_TACHYON" == "true" ]; then
 fi
 
 # Launch the slaves
-if [ "$SPARK_WORKER_INSTANCES" = "" ]; then
-  exec "$sbin/slaves.sh" cd "$SPARK_HOME" \; "$sbin/start-slave.sh" 1 "spark://$SPARK_MASTER_IP:$SPARK_MASTER_PORT"
-else
-  if [ "$SPARK_WORKER_WEBUI_PORT" = "" ]; then
-    SPARK_WORKER_WEBUI_PORT=8081
-  fi
-  for ((i=0; i<$SPARK_WORKER_INSTANCES; i++)); do
-    "$sbin/slaves.sh" cd "$SPARK_HOME" \; "$sbin/start-slave.sh" $(( $i + 1 )) --webui-port
$(( $SPARK_WORKER_WEBUI_PORT + $i )) "spark://$SPARK_MASTER_IP:$SPARK_MASTER_PORT"
-  done
-fi
+"$sbin/slaves.sh" cd "$SPARK_HOME" \; "$sbin/start-slave.sh" "spark://$SPARK_MASTER_IP:$SPARK_MASTER_PORT"

http://git-wip-us.apache.org/repos/asf/spark/blob/435b8779/sbin/stop-slave.sh
----------------------------------------------------------------------
diff --git a/sbin/stop-slave.sh b/sbin/stop-slave.sh
new file mode 100755
index 0000000..3d1da5b
--- /dev/null
+++ b/sbin/stop-slave.sh
@@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# A shell script to stop all workers on a single slave
+#
+# Environment variables
+#
+#   SPARK_WORKER_INSTANCES The number of worker instances that should be 
+#                          running on this slave.  Default is 1.
+
+# Usage: stop-slave.sh
+#   Stops all slaves on this worker machine
+
+sbin="`dirname "$0"`"
+sbin="`cd "$sbin"; pwd`"
+
+. "$sbin/spark-config.sh"
+
+. "$SPARK_PREFIX/bin/load-spark-env.sh"
+
+if [ "$SPARK_WORKER_INSTANCES" = "" ]; then
+  "$sbin"/spark-daemon.sh stop org.apache.spark.deploy.worker.Worker 1
+else
+  for ((i=0; i<$SPARK_WORKER_INSTANCES; i++)); do
+    "$sbin"/spark-daemon.sh stop org.apache.spark.deploy.worker.Worker $(( $i + 1 ))
+  done
+fi

http://git-wip-us.apache.org/repos/asf/spark/blob/435b8779/sbin/stop-slaves.sh
----------------------------------------------------------------------
diff --git a/sbin/stop-slaves.sh b/sbin/stop-slaves.sh
index 7c22011..54c9bd4 100755
--- a/sbin/stop-slaves.sh
+++ b/sbin/stop-slaves.sh
@@ -17,8 +17,8 @@
 # limitations under the License.
 #
 
-sbin=`dirname "$0"`
-sbin=`cd "$sbin"; pwd`
+sbin="`dirname "$0"`"
+sbin="`cd "$sbin"; pwd`"
 
 . "$sbin/spark-config.sh"
 
@@ -29,10 +29,4 @@ if [ -e "$sbin"/../tachyon/bin/tachyon ]; then
   "$sbin/slaves.sh" cd "$SPARK_HOME" \; "$sbin"/../tachyon/bin/tachyon killAll tachyon.worker.Worker
 fi
 
-if [ "$SPARK_WORKER_INSTANCES" = "" ]; then
-  "$sbin"/spark-daemons.sh stop org.apache.spark.deploy.worker.Worker 1
-else
-  for ((i=0; i<$SPARK_WORKER_INSTANCES; i++)); do
-    "$sbin"/spark-daemons.sh stop org.apache.spark.deploy.worker.Worker $(( $i + 1 ))
-  done
-fi
+"$sbin/slaves.sh" cd "$SPARK_HOME" \; "$sbin"/stop-slave.sh


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message