hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From a.@apache.org
Subject [2/2] hadoop git commit: HADOOP-13209. replace slaves with workers (John Smith via aw)
Date Tue, 28 Jun 2016 12:54:09 GMT
HADOOP-13209. replace slaves with workers (John Smith via aw)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/23c3ff85
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/23c3ff85
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/23c3ff85

Branch: refs/heads/trunk
Commit: 23c3ff85a9e73d8f0755e14f12cc7c89b72acddd
Parents: 2a0082c
Author: Allen Wittenauer <aw@apache.org>
Authored: Tue Jun 28 05:53:03 2016 -0700
Committer: Allen Wittenauer <aw@apache.org>
Committed: Tue Jun 28 05:53:03 2016 -0700

----------------------------------------------------------------------
 .../hadoop-common/src/main/bin/hadoop           |   6 +-
 .../src/main/bin/hadoop-config.cmd              |   4 +-
 .../hadoop-common/src/main/bin/hadoop-config.sh |   4 +
 .../src/main/bin/hadoop-daemons.sh              |   6 +-
 .../src/main/bin/hadoop-functions.sh            |  75 ++++++------
 .../hadoop-common/src/main/bin/slaves.sh        |  60 ----------
 .../hadoop-common/src/main/bin/workers.sh       |  60 ++++++++++
 .../hadoop-common/src/main/conf/hadoop-env.sh   |   4 +-
 .../main/conf/hadoop-user-functions.sh.example  |   4 +-
 .../hadoop-common/src/main/conf/workers         |   1 +
 .../hadoop-common/src/main/java/overview.html   |  46 +++----
 .../src/site/markdown/ClusterSetup.md           |  18 +--
 .../src/site/markdown/CommandsManual.md         |   6 +-
 .../src/site/markdown/RackAwareness.md          |   4 +-
 .../src/test/scripts/hadoop_slaves.bats         |  37 ------
 .../src/test/scripts/hadoop_ssh.bats            |  18 +--
 .../src/test/scripts/hadoop_workers.bats        |  37 ++++++
 .../hadoop-hdfs/src/main/bin/hdfs               |  10 +-
 .../hadoop-hdfs/src/main/bin/start-dfs.sh       |  12 +-
 .../src/main/bin/start-secure-dns.sh            |   2 +-
 .../hadoop-hdfs/src/main/bin/stop-dfs.sh        |  12 +-
 .../hadoop-hdfs/src/main/bin/stop-secure-dns.sh |   2 +-
 .../token/block/BlockTokenSecretManager.java    |  47 ++++----
 .../hadoop-hdfs/src/main/java/overview.html     |  46 +++----
 .../hadoop-hdfs/src/site/markdown/Federation.md |   2 +-
 .../markdown/HDFSHighAvailabilityWithQJM.md     |   2 +-
 .../hadoop/filecache/DistributedCache.java      |  70 +++++------
 .../mapreduce/filecache/DistributedCache.java   | 108 ++++++++---------
 .../src/site/markdown/MapReduceTutorial.md      |  22 ++--
 .../apache/hadoop/mapred/ReliabilityTest.java   | 119 ++++++++++---------
 .../apache/hadoop/mapred/TestLazyOutput.java    |  31 ++---
 .../apache/hadoop/mapred/pipes/TestPipes.java   | 105 ++++++++--------
 .../mapreduce/TestMapReduceLazyOutput.java      |  44 ++++---
 .../mapreduce/security/TestBinaryTokenFile.java |  68 ++++++-----
 .../mapreduce/security/TestMRCredentials.java   |   6 +-
 .../hadoop-yarn/bin/start-yarn.sh               |   6 +-
 .../hadoop-yarn/bin/stop-yarn.sh                |   6 +-
 hadoop-yarn-project/hadoop-yarn/bin/yarn        |  12 +-
 .../hadoop-yarn/bin/yarn-config.cmd             |   2 +-
 .../hadoop-yarn/bin/yarn-config.sh              |  20 ++--
 .../hadoop-yarn/bin/yarn-daemons.sh             |   6 +-
 hadoop-yarn-project/hadoop-yarn/conf/slaves     |   1 -
 hadoop-yarn-project/hadoop-yarn/pom.xml         |   2 +-
 43 files changed, 588 insertions(+), 565 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/23c3ff85/hadoop-common-project/hadoop-common/src/main/bin/hadoop
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop b/hadoop-common-project/hadoop-common/src/main/bin/hadoop
index 6cf872c..b57a4c1 100755
--- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop
+++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop
@@ -28,7 +28,7 @@ function hadoop_usage
   hadoop_add_option "hostnames list[,of,host,names]" "hosts to use in slave mode"
   hadoop_add_option "loglevel level" "set the log4j level for this command"
   hadoop_add_option "hosts filename" "list of hosts to use in slave mode"
-  hadoop_add_option "slaves" "turn on slave mode"
+  hadoop_add_option "workers" "turn on worker mode"
 
   hadoop_add_subcommand "checknative" "check native Hadoop and compression libraries availability"
   hadoop_add_subcommand "classpath" "prints the class path needed to get the Hadoop jar and the required libraries"
@@ -205,8 +205,8 @@ fi
 
 hadoop_verify_user "${HADOOP_SUBCMD}"
 
-if [[ ${HADOOP_SLAVE_MODE} = true ]]; then
-  hadoop_common_slave_mode_execute "${HADOOP_COMMON_HOME}/bin/hadoop" "${HADOOP_USER_PARAMS[@]}"
+if [[ ${HADOOP_WORKER_MODE} = true ]]; then
+  hadoop_common_worker_mode_execute "${HADOOP_COMMON_HOME}/bin/hadoop" "${HADOOP_USER_PARAMS[@]}"
   exit $?
 fi
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/23c3ff85/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.cmd
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.cmd b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.cmd
index 8d4b897..d77dc53 100644
--- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.cmd
+++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.cmd
@@ -80,12 +80,12 @@ if "%1" == "--config" (
 )
 
 @rem
-@rem check to see it is specified whether to use the slaves or the
+@rem check to see it is specified whether to use the workers or the
 @rem masters file
 @rem
 
 if "%1" == "--hosts" (
-  set HADOOP_SLAVES=%HADOOP_CONF_DIR%\%2
+  set HADOOP_WORKERS=%HADOOP_CONF_DIR%\%2
   shift
   shift
 )

http://git-wip-us.apache.org/repos/asf/hadoop/blob/23c3ff85/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh
index 104247a..ba8d69d 100755
--- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh
+++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh
@@ -113,6 +113,10 @@ hadoop_exec_userfuncs
 hadoop_exec_user_hadoopenv
 hadoop_verify_confdir
 
+hadoop_deprecate_envvar HADOOP_SLAVES HADOOP_WORKERS
+hadoop_deprecate_envvar HADOOP_SLAVE_NAMES HADOOP_WORKER_NAMES
+hadoop_deprecate_envvar HADOOP_SLAVE_SLEEP HADOOP_WORKER_SLEEP
+
 # do all the OS-specific startup bits here
 # this allows us to get a decent JAVA_HOME,
 # call crle for LD_LIBRARY_PATH, etc.

http://git-wip-us.apache.org/repos/asf/hadoop/blob/23c3ff85/hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemons.sh
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemons.sh b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemons.sh
index ae1e324..5530491 100755
--- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemons.sh
+++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemons.sh
@@ -57,13 +57,13 @@ else
 fi
 
 hadoop_error "WARNING: Use of this script to ${daemonmode} HDFS daemons is deprecated."
-hadoop_error "WARNING: Attempting to execute replacement \"hdfs --slaves --daemon ${daemonmode}\" instead."
+hadoop_error "WARNING: Attempting to execute replacement \"hdfs --workers --daemon ${daemonmode}\" instead."
 
 #
 # Original input was usually:
 #  hadoop-daemons.sh (shell options) (start|stop) (datanode|...) (daemon options)
 # we're going to turn this into
-#  hdfs --slaves --daemon (start|stop) (rest of options)
+#  hdfs --workers --daemon (start|stop) (rest of options)
 #
 for (( i = 0; i < ${#HADOOP_USER_PARAMS[@]}; i++ ))
 do
@@ -74,4 +74,4 @@ do
   fi
 done
 
-${hdfsscript} --slaves --daemon "${daemonmode}" "${HADOOP_USER_PARAMS[@]}"
+${hdfsscript} --workers --daemon "${daemonmode}" "${HADOOP_USER_PARAMS[@]}"

http://git-wip-us.apache.org/repos/asf/hadoop/blob/23c3ff85/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh
index 99c4738..c380599 100755
--- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh
+++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh
@@ -602,25 +602,25 @@ function hadoop_basic_init
   HADOOP_SSH_PARALLEL=${HADOOP_SSH_PARALLEL:-10}
 }
 
-## @description  Set the slave support information to the contents
+## @description  Set the worker support information to the contents
 ## @description  of `filename`
 ## @audience     public
 ## @stability    stable
 ## @replaceable  no
 ## @param        filename
 ## @return       will exit if file does not exist
-function hadoop_populate_slaves_file
+function hadoop_populate_workers_file
 {
-  local slavesfile=$1
+  local workersfile=$1
   shift
-  if [[ -f "${slavesfile}" ]]; then
+  if [[ -f "${workersfile}" ]]; then
     # shellcheck disable=2034
-    HADOOP_SLAVES="${slavesfile}"
-  elif [[ -f "${HADOOP_CONF_DIR}/${slavesfile}" ]]; then
+    HADOOP_WORKERS="${workersfile}"
+  elif [[ -f "${HADOOP_CONF_DIR}/${workersfile}" ]]; then
     # shellcheck disable=2034
-    HADOOP_SLAVES="${HADOOP_CONF_DIR}/${slavesfile}"
+    HADOOP_WORKERS="${HADOOP_CONF_DIR}/${workersfile}"
   else
-    hadoop_error "ERROR: Cannot find hosts file \"${slavesfile}\""
+    hadoop_error "ERROR: Cannot find hosts file \"${workersfile}\""
     hadoop_exit_with_usage 1
   fi
 }
@@ -669,14 +669,14 @@ function hadoop_actual_ssh
 {
   # we are passing this function to xargs
   # should get hostname followed by rest of command line
-  local slave=$1
+  local worker=$1
   shift
 
   # shellcheck disable=SC2086
-  ssh ${HADOOP_SSH_OPTS} ${slave} $"${@// /\\ }" 2>&1 | sed "s/^/$slave: /"
+  ssh ${HADOOP_SSH_OPTS} ${worker} $"${@// /\\ }" 2>&1 | sed "s/^/$worker: /"
 }
 
-## @description  Connect to ${HADOOP_SLAVES} or ${HADOOP_SLAVE_NAMES}
+## @description  Connect to ${HADOOP_WORKERS} or ${HADOOP_WORKER_NAMES}
 ## @description  and execute command.
 ## @audience     private
 ## @stability    evolving
@@ -687,45 +687,52 @@ function hadoop_connect_to_hosts
 {
   # shellcheck disable=SC2124
   local params="$@"
-  local slave_file
+  local worker_file
   local tmpslvnames
 
   #
   # ssh (or whatever) to a host
   #
   # User can specify hostnames or a file where the hostnames are (not both)
-  if [[ -n "${HADOOP_SLAVES}" && -n "${HADOOP_SLAVE_NAMES}" ]] ; then
-    hadoop_error "ERROR: Both HADOOP_SLAVES and HADOOP_SLAVE_NAME were defined. Aborting."
+  if [[ -n "${HADOOP_WORKERS}" && -n "${HADOOP_WORKER_NAMES}" ]] ; then
+    hadoop_error "ERROR: Both HADOOP_WORKERS and HADOOP_WORKER_NAME were defined. Aborting."
     exit 1
-  elif [[ -z "${HADOOP_SLAVE_NAMES}" ]]; then
-    slave_file=${HADOOP_SLAVES:-${HADOOP_CONF_DIR}/slaves}
+  elif [[ -z "${HADOOP_WORKER_NAMES}" ]]; then
+    if [[ -n "${HADOOP_WORKERS}" ]]; then
+      worker_file=${HADOOP_WORKERS}
+    elif [[ -f "${HADOOP_CONF_DIR}/workers" ]]; then
+      worker_file=${HADOOP_CONF_DIR}/workers
+    elif [[ -f "${HADOOP_CONF_DIR}/slaves" ]]; then
+      hadoop_error "WARNING: 'slaves' file has been deprecated. Please use 'workers' file instead."
+      worker_file=${HADOOP_CONF_DIR}/slaves
+    fi
   fi
 
   # if pdsh is available, let's use it.  otherwise default
   # to a loop around ssh.  (ugh)
   if [[ -e '/usr/bin/pdsh' ]]; then
-    if [[ -z "${HADOOP_SLAVE_NAMES}" ]] ; then
+    if [[ -z "${HADOOP_WORKER_NAMES}" ]] ; then
       # if we were given a file, just let pdsh deal with it.
       # shellcheck disable=SC2086
       PDSH_SSH_ARGS_APPEND="${HADOOP_SSH_OPTS}" pdsh \
-      -f "${HADOOP_SSH_PARALLEL}" -w ^"${slave_file}" $"${@// /\\ }" 2>&1
+      -f "${HADOOP_SSH_PARALLEL}" -w ^"${worker_file}" $"${@// /\\ }" 2>&1
     else
       # no spaces allowed in the pdsh arg host list
       # shellcheck disable=SC2086
-      tmpslvnames=$(echo ${SLAVE_NAMES} | tr -s ' ' ,)
+      tmpslvnames=$(echo ${HADOOP_WORKER_NAMES} | tr -s ' ' ,)
       PDSH_SSH_ARGS_APPEND="${HADOOP_SSH_OPTS}" pdsh \
         -f "${HADOOP_SSH_PARALLEL}" \
         -w "${tmpslvnames}" $"${@// /\\ }" 2>&1
     fi
   else
-    if [[ -z "${HADOOP_SLAVE_NAMES}" ]]; then
-      HADOOP_SLAVE_NAMES=$(sed 's/#.*$//;/^$/d' "${slave_file}")
+    if [[ -z "${HADOOP_WORKER_NAMES}" ]]; then
+      HADOOP_WORKER_NAMES=$(sed 's/#.*$//;/^$/d' "${worker_file}")
     fi
     hadoop_connect_to_hosts_without_pdsh "${params}"
   fi
 }
 
-## @description  Connect to ${SLAVE_NAMES} and execute command
+## @description  Connect to ${HADOOP_WORKER_NAMES} and execute command
 ## @description  under the environment which does not support pdsh.
 ## @audience     private
 ## @stability    evolving
@@ -736,24 +743,24 @@ function hadoop_connect_to_hosts_without_pdsh
 {
   # shellcheck disable=SC2124
   local params="$@"
-  local slaves=(${HADOOP_SLAVE_NAMES})
-  for (( i = 0; i < ${#slaves[@]}; i++ ))
+  local workers=(${HADOOP_WORKER_NAMES})
+  for (( i = 0; i < ${#workers[@]}; i++ ))
   do
     if (( i != 0 && i % HADOOP_SSH_PARALLEL == 0 )); then
       wait
     fi
     # shellcheck disable=SC2086
-    hadoop_actual_ssh "${slaves[$i]}" ${params} &
+    hadoop_actual_ssh "${workers[$i]}" ${params} &
   done
   wait
 }
 
-## @description  Utility routine to handle --slaves mode
+## @description  Utility routine to handle --workers mode
 ## @audience     private
 ## @stability    evolving
 ## @replaceable  yes
 ## @param        commandarray
-function hadoop_common_slave_mode_execute
+function hadoop_common_worker_mode_execute
 {
   #
   # input should be the command line as given by the user
@@ -761,13 +768,13 @@ function hadoop_common_slave_mode_execute
   #
   local argv=("$@")
 
-  # if --slaves is still on the command line, remove it
+  # if --workers is still on the command line, remove it
   # to prevent loops
   # Also remove --hostnames and --hosts along with arg values
   local argsSize=${#argv[@]};
   for (( i = 0; i < argsSize; i++ ))
   do
-    if [[ "${argv[$i]}" =~ ^--slaves$ ]]; then
+    if [[ "${argv[$i]}" =~ ^--workers$ ]]; then
       unset argv[$i]
     elif [[ "${argv[$i]}" =~ ^--hostnames$ ]] ||
       [[ "${argv[$i]}" =~ ^--hosts$ ]]; then
@@ -2051,13 +2058,13 @@ function hadoop_parse_args
       --hostnames)
         shift
         # shellcheck disable=SC2034
-        HADOOP_SLAVE_NAMES="$1"
+        HADOOP_WORKER_NAMES="$1"
         shift
         ((HADOOP_PARSE_COUNTER=HADOOP_PARSE_COUNTER+2))
       ;;
       --hosts)
         shift
-        hadoop_populate_slaves_file "$1"
+        hadoop_populate_workers_file "$1"
         shift
         ((HADOOP_PARSE_COUNTER=HADOOP_PARSE_COUNTER+2))
       ;;
@@ -2068,10 +2075,10 @@ function hadoop_parse_args
         shift
         ((HADOOP_PARSE_COUNTER=HADOOP_PARSE_COUNTER+2))
       ;;
-      --slaves)
+      --workers)
         shift
         # shellcheck disable=SC2034
-        HADOOP_SLAVE_MODE=true
+        HADOOP_WORKER_MODE=true
         ((HADOOP_PARSE_COUNTER=HADOOP_PARSE_COUNTER+1))
       ;;
       *)
@@ -2104,4 +2111,4 @@ function hadoop_xml_escape
 function hadoop_sed_escape
 {
   sed -e 's/[\/&]/\\&/g' <<< "$1"
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/23c3ff85/hadoop-common-project/hadoop-common/src/main/bin/slaves.sh
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/bin/slaves.sh b/hadoop-common-project/hadoop-common/src/main/bin/slaves.sh
deleted file mode 100755
index 34bf0eb..0000000
--- a/hadoop-common-project/hadoop-common/src/main/bin/slaves.sh
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/usr/bin/env bash
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Run a shell command on all slave hosts.
-#
-# Environment Variables
-#
-#   HADOOP_SLAVES    File naming remote hosts.
-#     Default is ${HADOOP_CONF_DIR}/slaves.
-#   HADOOP_CONF_DIR  Alternate conf dir. Default is ${HADOOP_HOME}/conf.
-#   HADOOP_SLAVE_SLEEP Seconds to sleep between spawning remote commands.
-#   HADOOP_SSH_OPTS Options passed to ssh when running remote commands.
-##
-
-function hadoop_usage
-{
-  echo "Usage: slaves.sh [--config confdir] command..."
-}
-
-# let's locate libexec...
-if [[ -n "${HADOOP_HOME}" ]]; then
-  HADOOP_DEFAULT_LIBEXEC_DIR="${HADOOP_HOME}/libexec"
-else
-  this="${BASH_SOURCE-$0}"
-  bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
-  HADOOP_DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
-fi
-
-HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$HADOOP_DEFAULT_LIBEXEC_DIR}"
-# shellcheck disable=SC2034
-HADOOP_NEW_CONFIG=true
-if [[ -f "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then
-  . "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
-else
-  echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh." 2>&1
-  exit 1
-fi
-
-# if no args specified, show usage
-if [[ $# -le 0 ]]; then
-  hadoop_exit_with_usage 1
-fi
-
-hadoop_connect_to_hosts "$@"
-

http://git-wip-us.apache.org/repos/asf/hadoop/blob/23c3ff85/hadoop-common-project/hadoop-common/src/main/bin/workers.sh
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/bin/workers.sh b/hadoop-common-project/hadoop-common/src/main/bin/workers.sh
new file mode 100755
index 0000000..84ffabd
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/bin/workers.sh
@@ -0,0 +1,60 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Run a shell command on all worker hosts.
+#
+# Environment Variables
+#
+#   HADOOP_WORKERS    File naming remote hosts.
+#     Default is ${HADOOP_CONF_DIR}/workers.
+#   HADOOP_CONF_DIR  Alternate conf dir. Default is ${HADOOP_HOME}/conf.
+#   HADOOP_WORKER_SLEEP Seconds to sleep between spawning remote commands.
+#   HADOOP_SSH_OPTS Options passed to ssh when running remote commands.
+##
+
+function hadoop_usage
+{
+  echo "Usage: workers.sh [--config confdir] command..."
+}
+
+# let's locate libexec...
+if [[ -n "${HADOOP_HOME}" ]]; then
+  HADOOP_DEFAULT_LIBEXEC_DIR="${HADOOP_HOME}/libexec"
+else
+  this="${BASH_SOURCE-$0}"
+  bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
+  HADOOP_DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
+
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$HADOOP_DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then
+  . "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
+else
+  echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh." 2>&1
+  exit 1
+fi
+
+# if no args specified, show usage
+if [[ $# -le 0 ]]; then
+  hadoop_exit_with_usage 1
+fi
+
+hadoop_connect_to_hosts "$@"
+

http://git-wip-us.apache.org/repos/asf/hadoop/blob/23c3ff85/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh
index 3f19e45..6565d1d 100644
--- a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh
+++ b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh
@@ -169,8 +169,8 @@ esac
 # export HADOOP_SSH_PARALLEL=10
 
 # Filename which contains all of the hosts for any remote execution
-# helper scripts # such as slaves.sh, start-dfs.sh, etc.
-# export HADOOP_SLAVES="${HADOOP_CONF_DIR}/slaves"
+# helper scripts # such as workers.sh, start-dfs.sh, etc.
+# export HADOOP_WORKERS="${HADOOP_CONF_DIR}/workers"
 
 ###
 # Options for all daemons

http://git-wip-us.apache.org/repos/asf/hadoop/blob/23c3ff85/hadoop-common-project/hadoop-common/src/main/conf/hadoop-user-functions.sh.example
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-user-functions.sh.example b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-user-functions.sh.example
index 91a2d8b..0eeae3c 100644
--- a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-user-functions.sh.example
+++ b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-user-functions.sh.example
@@ -87,7 +87,7 @@
 #}
 
 #
-# Example:  efficient command execution for the slaves
+# Example:  efficient command execution for the workers
 #
 # To improve performance, you can use xargs -P
 # instead of the for loop, if supported.
@@ -108,7 +108,7 @@
 #  # list with each hostname read from stdin/pipe. But it consider one
 #  # line as one argument while reading from stdin/pipe. So place each
 #  # hostname in different lines while passing via pipe.
-#  tmpslvnames=$(echo "${HADOOP_SLAVE_NAMES}" | tr ' ' '\n' )
+#  tmpslvnames=$(echo "${HADOOP_WORKER_NAMES}" | tr ' ' '\n' )
 #    echo "${tmpslvnames}" | \
 #    xargs -n 1 -P"${HADOOP_SSH_PARALLEL}" \
 #    -I {} bash -c --  "hadoop_actual_ssh {} ${params}"

http://git-wip-us.apache.org/repos/asf/hadoop/blob/23c3ff85/hadoop-common-project/hadoop-common/src/main/conf/workers
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/conf/workers b/hadoop-common-project/hadoop-common/src/main/conf/workers
new file mode 100644
index 0000000..2fbb50c
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/conf/workers
@@ -0,0 +1 @@
+localhost

http://git-wip-us.apache.org/repos/asf/hadoop/blob/23c3ff85/hadoop-common-project/hadoop-common/src/main/java/overview.html
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/overview.html b/hadoop-common-project/hadoop-common/src/main/java/overview.html
index 5868617..2c64121 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/overview.html
+++ b/hadoop-common-project/hadoop-common/src/main/java/overview.html
@@ -23,33 +23,33 @@
 
 Hadoop is a distributed computing platform.
 
-<p>Hadoop primarily consists of the <a 
-href="http://hadoop.apache.org/hdfs/">Hadoop Distributed FileSystem 
-(HDFS)</a> and an 
+<p>Hadoop primarily consists of the <a
+href="http://hadoop.apache.org/hdfs/">Hadoop Distributed FileSystem
+(HDFS)</a> and an
 implementation of the <a href="http://hadoop.apache.org/mapreduce/">
 Map-Reduce</a> programming paradigm.</p>
 
 
-<p>Hadoop is a software framework that lets one easily write and run applications 
+<p>Hadoop is a software framework that lets one easily write and run applications
 that process vast amounts of data. Here's what makes Hadoop especially useful:</p>
 <ul>
   <li>
     <b>Scalable</b>: Hadoop can reliably store and process petabytes.
   </li>
   <li>
-    <b>Economical</b>: It distributes the data and processing across clusters 
-    of commonly available computers. These clusters can number into the thousands 
+    <b>Economical</b>: It distributes the data and processing across clusters
+    of commonly available computers. These clusters can number into the thousands
     of nodes.
   </li>
   <li>
-    <b>Efficient</b>: By distributing the data, Hadoop can process it in parallel 
+    <b>Efficient</b>: By distributing the data, Hadoop can process it in parallel
     on the nodes where the data is located. This makes it extremely rapid.
   </li>
   <li>
-    <b>Reliable</b>: Hadoop automatically maintains multiple copies of data and 
+    <b>Reliable</b>: Hadoop automatically maintains multiple copies of data and
     automatically redeploys computing tasks based on failures.
   </li>
-</ul>  
+</ul>
 
 <h2>Requirements</h2>
 
@@ -61,15 +61,15 @@ that process vast amounts of data. Here's what makes Hadoop especially useful:</
   </li>
   <li>
     Windows is also a supported platform.
-  </li>  
+  </li>
 </ul>
-  
+
 <h3>Requisite Software</h3>
 
 <ol>
   <li>
-    Java 1.6.x, preferably from 
-    <a href="http://java.sun.com/javase/downloads/">Sun</a>. 
+    Java 1.6.x, preferably from
+    <a href="http://java.sun.com/javase/downloads/">Sun</a>.
     Set <tt>JAVA_HOME</tt> to the root of your Java installation.
   </li>
   <li>
@@ -141,8 +141,8 @@ host and port.  This is specified with the configuration property
 href="../mapred-default.html#mapred.job.tracker">mapred.job.tracker</a></tt>.
 </li>
 
-<li>A <em>slaves</em> file that lists the names of all the hosts in
-the cluster.  The default slaves file is <tt>conf/slaves</tt>.
+<li>A <em>workers</em> file that lists the names of all the hosts in
+the cluster.  The default workers file is <tt>conf/workers</tt>.
 
 </ol>
 
@@ -242,31 +242,31 @@ as <tt><em>master.example.com</em>:<em>port</em></tt> in <tt>conf/mapred-site.xm
 
 <li>Directories for <tt><a
 href="../hdfs-default.html#dfs.name.dir">dfs.name.dir</a></tt> and
-<tt><a href="../hdfs-default.html#dfs.data.dir">dfs.data.dir</a> 
+<tt><a href="../hdfs-default.html#dfs.data.dir">dfs.data.dir</a>
 in <tt>conf/hdfs-site.xml</tt>.
 </tt>These are local directories used to hold distributed filesystem
-data on the master node and slave nodes respectively.  Note
+data on the master node and worker nodes respectively.  Note
 that <tt>dfs.data.dir</tt> may contain a space- or comma-separated
 list of directory names, so that data may be stored on multiple local
 devices.</li>
 
 <li><tt><a href="../mapred-default.html#mapred.local.dir">mapred.local.dir</a></tt>
-  in <tt>conf/mapred-site.xml</tt>, the local directory where temporary 
+  in <tt>conf/mapred-site.xml</tt>, the local directory where temporary
   MapReduce data is stored.  It also may be a list of directories.</li>
 
 <li><tt><a
 href="../mapred-default.html#mapred.map.tasks">mapred.map.tasks</a></tt>
 and <tt><a
-href="../mapred-default.html#mapred.reduce.tasks">mapred.reduce.tasks</a></tt> 
+href="../mapred-default.html#mapred.reduce.tasks">mapred.reduce.tasks</a></tt>
 in <tt>conf/mapred-site.xml</tt>.
 As a rule of thumb, use 10x the
-number of slave processors for <tt>mapred.map.tasks</tt>, and 2x the
-number of slave processors for <tt>mapred.reduce.tasks</tt>.</li>
+number of worker processors for <tt>mapred.map.tasks</tt>, and 2x the
+number of worker processors for <tt>mapred.reduce.tasks</tt>.</li>
 
 </ol>
 
-<p>Finally, list all slave hostnames or IP addresses in your
-<tt>conf/slaves</tt> file, one per line.  Then format your filesystem
+<p>Finally, list all worker hostnames or IP addresses in your
+<tt>conf/workers</tt> file, one per line.  Then format your filesystem
 and start your cluster on your master node, as above.
 
 </body>

http://git-wip-us.apache.org/repos/asf/hadoop/blob/23c3ff85/hadoop-common-project/hadoop-common/src/site/markdown/ClusterSetup.md
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/ClusterSetup.md b/hadoop-common-project/hadoop-common/src/site/markdown/ClusterSetup.md
index 7d5040c..0d551b1 100644
--- a/hadoop-common-project/hadoop-common/src/site/markdown/ClusterSetup.md
+++ b/hadoop-common-project/hadoop-common/src/site/markdown/ClusterSetup.md
@@ -27,7 +27,7 @@ This document does not cover advanced topics such as [Security](./SecureMode.htm
 Prerequisites
 -------------
 
-* Install Java. See the [Hadoop Wiki](http://wiki.apache.org/hadoop/HadoopJavaVersions) for known good versions. 
+* Install Java. See the [Hadoop Wiki](http://wiki.apache.org/hadoop/HadoopJavaVersions) for known good versions.
 * Download a stable version of Hadoop from Apache mirrors.
 
 Installation
@@ -37,7 +37,7 @@ Installing a Hadoop cluster typically involves unpacking the software on all the
 
 Typically one machine in the cluster is designated as the NameNode and another machine the as ResourceManager, exclusively. These are the masters. Other services (such as Web App Proxy Server and MapReduce Job History server) are usually run either on dedicated hardware or on shared infrastrucutre, depending upon the load.
 
-The rest of the machines in the cluster act as both DataNode and NodeManager. These are the slaves.
+The rest of the machines in the cluster act as both DataNode and NodeManager. These are the workers.
 
 Configuring Hadoop in Non-Secure Mode
 -------------------------------------
@@ -216,12 +216,12 @@ The health checker script is not supposed to give ERROR if only some of the loca
 Slaves File
 -----------
 
-List all slave hostnames or IP addresses in your `etc/hadoop/slaves` file, one per line. Helper scripts (described below) will use the `etc/hadoop/slaves` file to run commands on many hosts at once. It is not used for any of the Java-based Hadoop configuration. In order to use this functionality, ssh trusts (via either passphraseless ssh or some other means, such as Kerberos) must be established for the accounts used to run Hadoop.
+List all worker hostnames or IP addresses in your `etc/hadoop/workers` file, one per line. Helper scripts (described below) will use the `etc/hadoop/workers` file to run commands on many hosts at once. It is not used for any of the Java-based Hadoop configuration. In order to use this functionality, ssh trusts (via either passphraseless ssh or some other means, such as Kerberos) must be established for the accounts used to run Hadoop.
 
 Hadoop Rack Awareness
 ---------------------
 
-Many Hadoop components are rack-aware and take advantage of the network topology for performance and safety. Hadoop daemons obtain the rack information of the slaves in the cluster by invoking an administrator configured module. See the [Rack Awareness](./RackAwareness.html) documentation for more specific information.
+Many Hadoop components are rack-aware and take advantage of the network topology for performance and safety. Hadoop daemons obtain the rack information of the workers in the cluster by invoking an administrator configured module. See the [Rack Awareness](./RackAwareness.html) documentation for more specific information.
 
 It is highly recommended configuring rack awareness prior to starting HDFS.
 
@@ -253,7 +253,7 @@ Start a HDFS DataNode with the following command on each designated node as *hdf
 
     [hdfs]$ $HADOOP_HOME/bin/hdfs --daemon start datanode
 
-If `etc/hadoop/slaves` and ssh trusted access is configured (see [Single Node Setup](./SingleCluster.html)), all of the HDFS processes can be started with a utility script. As *hdfs*:
+If `etc/hadoop/workers` and ssh trusted access is configured (see [Single Node Setup](./SingleCluster.html)), all of the HDFS processes can be started with a utility script. As *hdfs*:
 
     [hdfs]$ $HADOOP_HOME/sbin/start-dfs.sh
 
@@ -269,7 +269,7 @@ Start a standalone WebAppProxy server. Run on the WebAppProxy server as *yarn*.
 
     [yarn]$ $HADOOP_HOME/bin/yarn --daemon start proxyserver
 
-If `etc/hadoop/slaves` and ssh trusted access is configured (see [Single Node Setup](./SingleCluster.html)), all of the YARN processes can be started with a utility script. As *yarn*:
+If `etc/hadoop/workers` and ssh trusted access is configured (see [Single Node Setup](./SingleCluster.html)), all of the YARN processes can be started with a utility script. As *yarn*:
 
     [yarn]$ $HADOOP_HOME/sbin/start-yarn.sh
 
@@ -287,7 +287,7 @@ Run a script to stop a DataNode as *hdfs*:
 
     [hdfs]$ $HADOOP_HOME/bin/hdfs --daemon stop datanode
 
-If `etc/hadoop/slaves` and ssh trusted access is configured (see [Single Node Setup](./SingleCluster.html)), all of the HDFS processes may be stopped with a utility script. As *hdfs*:
+If `etc/hadoop/workers` and ssh trusted access is configured (see [Single Node Setup](./SingleCluster.html)), all of the HDFS processes may be stopped with a utility script. As *hdfs*:
 
     [hdfs]$ $HADOOP_HOME/sbin/stop-dfs.sh
 
@@ -295,11 +295,11 @@ Stop the ResourceManager with the following command, run on the designated Resou
 
     [yarn]$ $HADOOP_HOME/bin/yarn --daemon stop resourcemanager
 
-Run a script to stop a NodeManager on a slave as *yarn*:
+Run a script to stop a NodeManager on a worker as *yarn*:
 
     [yarn]$ $HADOOP_HOME/bin/yarn --daemon stop nodemanager
 
-If `etc/hadoop/slaves` and ssh trusted access is configured (see [Single Node Setup](./SingleCluster.html)), all of the YARN processes can be stopped with a utility script. As *yarn*:
+If `etc/hadoop/workers` and ssh trusted access is configured (see [Single Node Setup](./SingleCluster.html)), all of the YARN processes can be stopped with a utility script. As *yarn*:
 
     [yarn]$ $HADOOP_HOME/sbin/stop-yarn.sh
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/23c3ff85/hadoop-common-project/hadoop-common/src/site/markdown/CommandsManual.md
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/CommandsManual.md b/hadoop-common-project/hadoop-common/src/site/markdown/CommandsManual.md
index 44ba5ea..4d7d504 100644
--- a/hadoop-common-project/hadoop-common/src/site/markdown/CommandsManual.md
+++ b/hadoop-common-project/hadoop-common/src/site/markdown/CommandsManual.md
@@ -43,10 +43,10 @@ All of the shell commands will accept a common set of options. For some commands
 | `--daemon mode` | If the command supports daemonization (e.g., `hdfs namenode`), execute in the appropriate mode. Supported modes are `start` to start the process in daemon mode, `stop` to stop the process, and `status` to determine the active status of the process. `status` will return an [LSB-compliant](http://refspecs.linuxbase.org/LSB_3.0.0/LSB-generic/LSB-generic/iniscrptact.html) result code. If no option is provided, commands that support daemonization will run in the foreground. For commands that do not support daemonization, this option is ignored. |
 | `--debug` | Enables shell level configuration debugging information |
 | `--help` | Shell script usage information. |
-| `--hostnames` | When `--slaves` is used, override the slaves file with a space delimited list of hostnames where to execute a multi-host subcommand. If `--slaves` is not used, this option is ignored. |
-| `--hosts` | When `--slaves` is used, override the slaves file with another file that contains a list of hostnames where to execute a multi-host subcommand.  If `--slaves` is not used, this option is ignored. |
+| `--hostnames` | When `--workers` is used, override the workers file with a space delimited list of hostnames where to execute a multi-host subcommand. If `--workers` is not used, this option is ignored. |
+| `--hosts` | When `--workers` is used, override the workers file with another file that contains a list of hostnames where to execute a multi-host subcommand.  If `--workers` is not used, this option is ignored. |
 | `--loglevel loglevel` | Overrides the log level. Valid log levels are FATAL, ERROR, WARN, INFO, DEBUG, and TRACE. Default is INFO. |
-| `--slaves` | If possible, execute this command on all hosts in the `slaves` file. |
+| `--workers` | If possible, execute this command on all hosts in the `workers` file. |
 
 ### Generic Options
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/23c3ff85/hadoop-common-project/hadoop-common/src/site/markdown/RackAwareness.md
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/RackAwareness.md b/hadoop-common-project/hadoop-common/src/site/markdown/RackAwareness.md
index f440686..6a52f51 100644
--- a/hadoop-common-project/hadoop-common/src/site/markdown/RackAwareness.md
+++ b/hadoop-common-project/hadoop-common/src/site/markdown/RackAwareness.md
@@ -22,7 +22,7 @@ will use rack awareness for fault tolerance by placing one block
 replica on a different rack. This provides data availability in the
 event of a network switch failure or partition within the cluster.
 
-Hadoop master daemons obtain the rack id of the cluster slaves by
+Hadoop master daemons obtain the rack id of the cluster workers by
 invoking either an external script or java class as specified by
 configuration files. Using either the java class or external script
 for topology, output must adhere to the java
@@ -40,7 +40,7 @@ in the configuration file. An example, NetworkTopology.java, is
 included with the hadoop distribution and can be customized by the
 Hadoop administrator. Using a Java class instead of an external script
 has a performance benefit in that Hadoop doesn't need to fork an
-external process when a new slave node registers itself.
+external process when a new worker node registers itself.
 
 If implementing an external script, it will be specified with the
 **net.topology.script.file.name** parameter in the configuration

http://git-wip-us.apache.org/repos/asf/hadoop/blob/23c3ff85/hadoop-common-project/hadoop-common/src/test/scripts/hadoop_slaves.bats
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/test/scripts/hadoop_slaves.bats b/hadoop-common-project/hadoop-common/src/test/scripts/hadoop_slaves.bats
deleted file mode 100644
index cc33f0e..0000000
--- a/hadoop-common-project/hadoop-common/src/test/scripts/hadoop_slaves.bats
+++ /dev/null
@@ -1,37 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-load hadoop-functions_test_helper
-
-@test "hadoop_populate_slaves_file (specific file)" {
-  touch "${TMP}/file"
-  hadoop_populate_slaves_file "${TMP}/file"
-  [ "${HADOOP_SLAVES}" = "${TMP}/file" ]
-}
-
-@test "hadoop_populate_slaves_file (specific conf dir file)" {
-  HADOOP_CONF_DIR=${TMP}/1
-  mkdir -p "${HADOOP_CONF_DIR}"
-  touch "${HADOOP_CONF_DIR}/file"
-  hadoop_populate_slaves_file "file"
-  echo "${HADOOP_SLAVES}"
-  [ "${HADOOP_SLAVES}" = "${HADOOP_CONF_DIR}/file" ]
-}
-
-@test "hadoop_populate_slaves_file (no file)" {
-  HADOOP_CONF_DIR=${TMP}
-  run hadoop_populate_slaves_file "foo"
-  [ "${status}" -eq 1 ]
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hadoop/blob/23c3ff85/hadoop-common-project/hadoop-common/src/test/scripts/hadoop_ssh.bats
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/test/scripts/hadoop_ssh.bats b/hadoop-common-project/hadoop-common/src/test/scripts/hadoop_ssh.bats
index 2520a1c..50959f9 100644
--- a/hadoop-common-project/hadoop-common/src/test/scripts/hadoop_ssh.bats
+++ b/hadoop-common-project/hadoop-common/src/test/scripts/hadoop_ssh.bats
@@ -30,22 +30,22 @@ load hadoop-functions_test_helper
   hadoop_connect_to_hosts_without_pdsh
 }
 
-@test "hadoop_common_slave_mode_execute (--slaves 1)" {
-  run  hadoop_common_slave_mode_execute --slaves command
+@test "hadoop_common_worker_mode_execute (--workers 1)" {
+  run  hadoop_common_worker_mode_execute --workers command
   [ "${output}" = "command" ]
 }
 
-@test "hadoop_common_slave_mode_execute (--slaves 2)" {
-  run  hadoop_common_slave_mode_execute --slaves command1 command2
+@test "hadoop_common_worker_mode_execute (--workers 2)" {
+  run  hadoop_common_worker_mode_execute --workers command1 command2
   [ "${output}" = "command1 command2" ]
 }
 
-@test "hadoop_common_slave_mode_execute (--hosts)" {
-  run  hadoop_common_slave_mode_execute --hosts filename command
+@test "hadoop_common_worker_mode_execute (--hosts)" {
+  run  hadoop_common_worker_mode_execute --hosts filename command
   [ "${output}" = "command" ]
 }
 
-@test "hadoop_common_slave_mode_execute (--hostnames 2)" {
-  run  hadoop_common_slave_mode_execute --hostnames "host1,host2" command1 command2
+@test "hadoop_common_worker_mode_execute (--hostnames 2)" {
+  run  hadoop_common_worker_mode_execute --hostnames "host1,host2" command1 command2
   [ "${output}" = "command1 command2" ]
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/23c3ff85/hadoop-common-project/hadoop-common/src/test/scripts/hadoop_workers.bats
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/test/scripts/hadoop_workers.bats b/hadoop-common-project/hadoop-common/src/test/scripts/hadoop_workers.bats
new file mode 100644
index 0000000..123bf04
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/test/scripts/hadoop_workers.bats
@@ -0,0 +1,37 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+load hadoop-functions_test_helper
+
+@test "hadoop_populate_workers_file (specific file)" {
+  touch "${TMP}/file"
+  hadoop_populate_workers_file "${TMP}/file"
+  [ "${HADOOP_WORKERS}" = "${TMP}/file" ]
+}
+
+@test "hadoop_populate_workers_file (specific conf dir file)" {
+  HADOOP_CONF_DIR=${TMP}/1
+  mkdir -p "${HADOOP_CONF_DIR}"
+  touch "${HADOOP_CONF_DIR}/file"
+  hadoop_populate_workers_file "file"
+  echo "${HADOOP_WORKERS}"
+  [ "${HADOOP_WORKERS}" = "${HADOOP_CONF_DIR}/file" ]
+}
+
+@test "hadoop_populate_workers_file (no file)" {
+  HADOOP_CONF_DIR=${TMP}
+  run hadoop_populate_workers_file "foo"
+  [ "${status}" -eq 1 ]
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/23c3ff85/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs
index 7952560..5059528 100755
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs
@@ -26,10 +26,10 @@ function hadoop_usage
 {
   hadoop_add_option "--buildpaths" "attempt to add class files from build tree"
   hadoop_add_option "--daemon (start|status|stop)" "operate on a daemon"
-  hadoop_add_option "--hostnames list[,of,host,names]" "hosts to use in slave mode"
+  hadoop_add_option "--hostnames list[,of,host,names]" "hosts to use in worker mode"
   hadoop_add_option "--loglevel level" "set the log4j level for this command"
-  hadoop_add_option "--hosts filename" "list of hosts to use in slave mode"
-  hadoop_add_option "--slaves" "turn on slave mode"
+  hadoop_add_option "--hosts filename" "list of hosts to use in worker mode"
+  hadoop_add_option "--workers" "turn on worker mode"
 
   hadoop_add_subcommand "balancer" "run a cluster balancing utility"
   hadoop_add_subcommand "cacheadmin" "configure the HDFS cache"
@@ -293,8 +293,8 @@ fi
 
 hadoop_verify_user "${HADOOP_SUBCMD}"
 
-if [[ ${HADOOP_SLAVE_MODE} = true ]]; then
-  hadoop_common_slave_mode_execute "${HADOOP_HDFS_HOME}/bin/hdfs" "${HADOOP_USER_PARAMS[@]}"
+if [[ ${HADOOP_WORKER_MODE} = true ]]; then
+  hadoop_common_worker_mode_execute "${HADOOP_HDFS_HOME}/bin/hdfs" "${HADOOP_USER_PARAMS[@]}"
   exit $?
 fi
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/23c3ff85/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-dfs.sh
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-dfs.sh b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-dfs.sh
index 1e35e7d..fc46740 100755
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-dfs.sh
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-dfs.sh
@@ -79,14 +79,14 @@ fi
 echo "Starting namenodes on [${NAMENODES}]"
 
 "${HADOOP_HDFS_HOME}/bin/hdfs" \
-    --slaves \
+    --workers \
     --config "${HADOOP_CONF_DIR}" \
     --hostnames "${NAMENODES}" \
     --daemon start \
     namenode ${nameStartOpt}
 
 #---------------------------------------------------------
-# datanodes (using default slaves file)
+# datanodes (using default workers file)
 
 if [[ -n "${HADOOP_SECURE_DN_USER}" ]] &&
    [[ -z "${HADOOP_SECURE_COMMAND}" ]]; then
@@ -98,7 +98,7 @@ else
   echo "Starting datanodes"
 
   "${HADOOP_HDFS_HOME}/bin/hdfs" \
-    --slaves \
+    --workers \
     --config "${HADOOP_CONF_DIR}" \
     --daemon start \
     datanode ${dataStartOpt}
@@ -125,7 +125,7 @@ if [[ -n "${SECONDARY_NAMENODES}" ]]; then
     echo "Starting secondary namenodes [${SECONDARY_NAMENODES}]"
 
     "${HADOOP_HDFS_HOME}/bin/hdfs" \
-      --slaves \
+      --workers \
       --config "${HADOOP_CONF_DIR}" \
       --hostnames "${SECONDARY_NAMENODES}" \
       --daemon start \
@@ -144,7 +144,7 @@ case "${SHARED_EDITS_DIR}" in
     echo "Starting journal nodes [${JOURNAL_NODES}]"
 
     "${HADOOP_HDFS_HOME}/bin/hdfs" \
-      --slaves \
+      --workers \
       --config "${HADOOP_CONF_DIR}" \
       --hostnames "${JOURNAL_NODES}" \
       --daemon start \
@@ -159,7 +159,7 @@ if [[ "${AUTOHA_ENABLED}" = "true" ]]; then
   echo "Starting ZK Failover Controllers on NN hosts [${NAMENODES}]"
 
   "${HADOOP_HDFS_HOME}/bin/hdfs" \
-    --slaves \
+    --workers \
     --config "${HADOOP_CONF_DIR}" \
     --hostnames "${NAMENODES}" \
     --daemon start \

http://git-wip-us.apache.org/repos/asf/hadoop/blob/23c3ff85/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-secure-dns.sh
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-secure-dns.sh b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-secure-dns.sh
index 3fce345..7dcbba8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-secure-dns.sh
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-secure-dns.sh
@@ -45,7 +45,7 @@ fi
 if [[ "${EUID}" -eq 0 ]] && [[ -n "${HADOOP_SECURE_DN_USER}" ]]; then
   exec "${HADOOP_HDFS_HOME}/bin/hdfs" \
      --config "${HADOOP_CONF_DIR}" \
-     --slaves \
+     --workers \
      --daemon start \
      datanode
 else

http://git-wip-us.apache.org/repos/asf/hadoop/blob/23c3ff85/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-dfs.sh
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-dfs.sh b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-dfs.sh
index e693374..797b95b 100755
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-dfs.sh
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-dfs.sh
@@ -56,14 +56,14 @@ fi
 echo "Stopping namenodes on [${NAMENODES}]"
 
   "${HADOOP_HDFS_HOME}/bin/hdfs" \
-    --slaves \
+    --workers \
     --config "${HADOOP_CONF_DIR}" \
     --hostnames "${NAMENODES}" \
     --daemon stop \
     namenode
 
 #---------------------------------------------------------
-# datanodes (using default slaves file)
+# datanodes (using default workers file)
 
 if [[ -n "${HADOOP_SECURE_DN_USER}" ]] &&
 [[ -z "${HADOOP_SECURE_COMMAND}" ]]; then
@@ -75,7 +75,7 @@ else
   echo "Stopping datanodes"
 
   "${HADOOP_HDFS_HOME}/bin/hdfs" \
-    --slaves \
+    --workers \
     --config "${HADOOP_CONF_DIR}" \
     --daemon stop \
     datanode
@@ -94,7 +94,7 @@ if [[ -n "${SECONDARY_NAMENODES}" ]]; then
   echo "Stopping secondary namenodes [${SECONDARY_NAMENODES}]"
 
   "${HADOOP_HDFS_HOME}/bin/hdfs" \
-    --slaves \
+    --workers \
     --config "${HADOOP_CONF_DIR}" \
     --hostnames "${SECONDARY_NAMENODES}" \
     --daemon stop \
@@ -112,7 +112,7 @@ case "${SHARED_EDITS_DIR}" in
     echo "Stopping journal nodes [${JOURNAL_NODES}]"
 
     "${HADOOP_HDFS_HOME}/bin/hdfs" \
-      --slaves \
+      --workers \
       --config "${HADOOP_CONF_DIR}" \
       --hostnames "${JOURNAL_NODES}" \
       --daemon stop \
@@ -127,7 +127,7 @@ if [[ "${AUTOHA_ENABLED}" = "true" ]]; then
   echo "Stopping ZK Failover Controllers on NN hosts [${NAMENODES}]"
 
   "${HADOOP_HDFS_HOME}/bin/hdfs" \
-    --slaves \
+    --workers \
     --config "${HADOOP_CONF_DIR}" \
     --hostnames "${NAMENODES}" \
     --daemon stop \

http://git-wip-us.apache.org/repos/asf/hadoop/blob/23c3ff85/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-secure-dns.sh
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-secure-dns.sh b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-secure-dns.sh
index 2a973b1..be96836 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-secure-dns.sh
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-secure-dns.sh
@@ -45,7 +45,7 @@ fi
 if [[ "${EUID}" -eq 0 ]] && [[ -n "${HADOOP_SECURE_DN_USER}" ]]; then
   exec "${HADOOP_HDFS_HOME}/bin/hdfs" \
      --config "${HADOOP_CONF_DIR}" \
-     --slaves \
+     --workers \
      --daemon stop \
      datanode
 else

http://git-wip-us.apache.org/repos/asf/hadoop/blob/23c3ff85/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/block/BlockTokenSecretManager.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/block/BlockTokenSecretManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/block/BlockTokenSecretManager.java
index 53da44c..b55a2c4 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/block/BlockTokenSecretManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/block/BlockTokenSecretManager.java
@@ -43,11 +43,12 @@ import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 
 /**
- * BlockTokenSecretManager can be instantiated in 2 modes, master mode and slave
- * mode. Master can generate new block keys and export block keys to slaves,
- * while slaves can only import and use block keys received from master. Both
- * master and slave can generate and verify block tokens. Typically, master mode
- * is used by NN and slave mode is used by DN.
+ * BlockTokenSecretManager can be instantiated in 2 modes, master mode
+ * and worker mode. Master can generate new block keys and export block
+ * keys to workers, while workers can only import and use block keys
+ * received from master. Both master and worker can generate and verify
+ * block tokens. Typically, master mode is used by NN and worker mode
+ * is used by DN.
  */
 @InterfaceAudience.Private
 public class BlockTokenSecretManager extends
@@ -57,7 +58,7 @@ public class BlockTokenSecretManager extends
   public static final Token<BlockTokenIdentifier> DUMMY_TOKEN = new Token<BlockTokenIdentifier>();
 
   private final boolean isMaster;
-  
+
   /**
    * keyUpdateInterval is the interval that NN updates its block keys. It should
    * be set long enough so that all live DN's and Balancer should have sync'ed
@@ -78,7 +79,7 @@ public class BlockTokenSecretManager extends
   private final SecureRandom nonceGenerator = new SecureRandom();
 
   /**
-   * Constructor for slaves.
+   * Constructor for workers.
    *
    * @param keyUpdateInterval how often a new key will be generated
    * @param tokenLifetime how long an individual token is valid
@@ -88,10 +89,10 @@ public class BlockTokenSecretManager extends
     this(false, keyUpdateInterval, tokenLifetime, blockPoolId,
         encryptionAlgorithm, 0, 1);
   }
-  
+
   /**
    * Constructor for masters.
-   * 
+   *
    * @param keyUpdateInterval how often a new key will be generated
    * @param tokenLifetime how long an individual token is valid
    * @param nnIndex namenode index of the namenode for which we are creating the manager
@@ -108,7 +109,7 @@ public class BlockTokenSecretManager extends
     setSerialNo(new SecureRandom().nextInt());
     generateKeys();
   }
-  
+
   private BlockTokenSecretManager(boolean isMaster, long keyUpdateInterval,
       long tokenLifetime, String blockPoolId, String encryptionAlgorithm, int nnIndex, int numNNs) {
     this.intRange = Integer.MAX_VALUE / numNNs;
@@ -121,13 +122,13 @@ public class BlockTokenSecretManager extends
     this.encryptionAlgorithm = encryptionAlgorithm;
     generateKeys();
   }
-  
+
   @VisibleForTesting
   public synchronized void setSerialNo(int serialNo) {
     // we mod the serial number by the range and then add that times the index
     this.serialNo = (serialNo % intRange) + (nnRangeStart);
   }
-  
+
   public void setBlockPoolId(String blockPoolId) {
     this.blockPoolId = blockPoolId;
   }
@@ -180,7 +181,7 @@ public class BlockTokenSecretManager extends
   }
 
   /**
-   * Set block keys, only to be used in slave mode
+   * Set block keys, only to be used in worker mode
    */
   public synchronized void addKeys(ExportedBlockKeys exportedKeys)
       throws IOException {
@@ -324,7 +325,7 @@ public class BlockTokenSecretManager extends
 
   /**
    * Create an empty block token identifier
-   * 
+   *
    * @return a newly created empty block token identifier
    */
   @Override
@@ -334,7 +335,7 @@ public class BlockTokenSecretManager extends
 
   /**
    * Create a new password/secret for the given block token identifier.
-   * 
+   *
    * @param identifier
    *          the block token identifier
    * @return token password/secret
@@ -357,7 +358,7 @@ public class BlockTokenSecretManager extends
 
   /**
    * Look up the token password/secret for the given block token identifier.
-   * 
+   *
    * @param identifier
    *          the block token identifier to look up
    * @return token password/secret as byte[]
@@ -381,11 +382,11 @@ public class BlockTokenSecretManager extends
     }
     return createPassword(identifier.getBytes(), key.getKey());
   }
-  
+
   /**
    * Generate a data encryption key for this block pool, using the current
    * BlockKey.
-   * 
+   *
    * @return a data encryption key which may be used to encrypt traffic
    *         over the DataTransferProtocol
    */
@@ -401,10 +402,10 @@ public class BlockTokenSecretManager extends
         encryptionKey, Time.now() + tokenLifetime,
         encryptionAlgorithm);
   }
-  
+
   /**
    * Recreate an encryption key based on the given key id and nonce.
-   * 
+   *
    * @param keyId identifier of the secret key used to generate the encryption key.
    * @param nonce random value used to create the encryption key
    * @return the encryption key which corresponds to this (keyId, blockPoolId, nonce)
@@ -423,7 +424,7 @@ public class BlockTokenSecretManager extends
     }
     return createPassword(nonce, key.getKey());
   }
-  
+
   @VisibleForTesting
   public synchronized void setKeyUpdateIntervalForTesting(long millis) {
     this.keyUpdateInterval = millis;
@@ -433,10 +434,10 @@ public class BlockTokenSecretManager extends
   public void clearAllKeysForTesting() {
     allKeys.clear();
   }
-  
+
   @VisibleForTesting
   public synchronized int getSerialNoForTesting() {
     return serialNo;
   }
-  
+
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/23c3ff85/hadoop-hdfs-project/hadoop-hdfs/src/main/java/overview.html
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/overview.html b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/overview.html
index 759c093..e6636d7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/overview.html
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/overview.html
@@ -23,33 +23,33 @@
 
 Hadoop is a distributed computing platform.
 
-<p>Hadoop primarily consists of the <a 
-href="http://hadoop.apache.org/hdfs/">Hadoop Distributed FileSystem 
-(HDFS)</a> and an 
+<p>Hadoop primarily consists of the <a
+href="http://hadoop.apache.org/hdfs/">Hadoop Distributed FileSystem
+(HDFS)</a> and an
 implementation of the <a href="http://hadoop.apache.org/mapreduce/">
 Map-Reduce</a> programming paradigm.</p>
 
 
-<p>Hadoop is a software framework that lets one easily write and run applications 
+<p>Hadoop is a software framework that lets one easily write and run applications
 that process vast amounts of data. Here's what makes Hadoop especially useful:</p>
 <ul>
   <li>
     <b>Scalable</b>: Hadoop can reliably store and process petabytes.
   </li>
   <li>
-    <b>Economical</b>: It distributes the data and processing across clusters 
-    of commonly available computers. These clusters can number into the thousands 
+    <b>Economical</b>: It distributes the data and processing across clusters
+    of commonly available computers. These clusters can number into the thousands
     of nodes.
   </li>
   <li>
-    <b>Efficient</b>: By distributing the data, Hadoop can process it in parallel 
+    <b>Efficient</b>: By distributing the data, Hadoop can process it in parallel
     on the nodes where the data is located. This makes it extremely rapid.
   </li>
   <li>
-    <b>Reliable</b>: Hadoop automatically maintains multiple copies of data and 
+    <b>Reliable</b>: Hadoop automatically maintains multiple copies of data and
     automatically redeploys computing tasks based on failures.
   </li>
-</ul>  
+</ul>
 
 <h2>Requirements</h2>
 
@@ -61,15 +61,15 @@ that process vast amounts of data. Here's what makes Hadoop especially useful:</
   </li>
   <li>
     Windows is also a supported platform.
-  </li>  
+  </li>
 </ul>
-  
+
 <h3>Requisite Software</h3>
 
 <ol>
   <li>
-    Java 1.6.x, preferably from 
-    <a href="http://java.sun.com/javase/downloads/">Sun</a>. 
+    Java 1.6.x, preferably from
+    <a href="http://java.sun.com/javase/downloads/">Sun</a>.
     Set <tt>JAVA_HOME</tt> to the root of your Java installation.
   </li>
   <li>
@@ -141,8 +141,8 @@ host and port.  This is specified with the configuration property
 href="../mapred-default.html#mapred.job.tracker">mapred.job.tracker</a></tt>.
 </li>
 
-<li>A <em>slaves</em> file that lists the names of all the hosts in
-the cluster.  The default slaves file is <tt>conf/slaves</tt>.
+<li>A <em>workers</em> file that lists the names of all the hosts in
+the cluster.  The default workers file is <tt>conf/workers</tt>.
 
 </ol>
 
@@ -242,31 +242,31 @@ as <tt><em>master.example.com</em>:<em>port</em></tt> in <tt>conf/mapred-site.xm
 
 <li>Directories for <tt><a
 href="../hdfs-default.html#dfs.name.dir">dfs.name.dir</a></tt> and
-<tt><a href="../hdfs-default.html#dfs.data.dir">dfs.data.dir</a> 
+<tt><a href="../hdfs-default.html#dfs.data.dir">dfs.data.dir</a>
 in <tt>conf/hdfs-site.xml</tt>.
 </tt>These are local directories used to hold distributed filesystem
-data on the master node and slave nodes respectively.  Note
+data on the master node and worker nodes respectively.  Note
 that <tt>dfs.data.dir</tt> may contain a space- or comma-separated
 list of directory names, so that data may be stored on multiple local
 devices.</li>
 
 <li><tt><a href="../mapred-default.html#mapred.local.dir">mapred.local.dir</a></tt>
-  in <tt>conf/mapred-site.xml</tt>, the local directory where temporary 
+  in <tt>conf/mapred-site.xml</tt>, the local directory where temporary
   MapReduce data is stored.  It also may be a list of directories.</li>
 
 <li><tt><a
 href="../mapred-default.html#mapred.map.tasks">mapred.map.tasks</a></tt>
 and <tt><a
-href="../mapred-default.html#mapred.reduce.tasks">mapred.reduce.tasks</a></tt> 
+href="../mapred-default.html#mapred.reduce.tasks">mapred.reduce.tasks</a></tt>
 in <tt>conf/mapred-site.xml</tt>.
 As a rule of thumb, use 10x the
-number of slave processors for <tt>mapred.map.tasks</tt>, and 2x the
-number of slave processors for <tt>mapred.reduce.tasks</tt>.</li>
+number of worker processors for <tt>mapred.map.tasks</tt>, and 2x the
+number of worker processors for <tt>mapred.reduce.tasks</tt>.</li>
 
 </ol>
 
-<p>Finally, list all slave hostnames or IP addresses in your
-<tt>conf/slaves</tt> file, one per line.  Then format your filesystem
+<p>Finally, list all worker hostnames or IP addresses in your
+<tt>conf/workers</tt> file, one per line.  Then format your filesystem
 and start your cluster on your master node, as above.
 
 </body>

http://git-wip-us.apache.org/repos/asf/hadoop/blob/23c3ff85/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/Federation.md
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/Federation.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/Federation.md
index 99a41a2..0ed5df8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/Federation.md
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/Federation.md
@@ -202,7 +202,7 @@ To stop the cluster run the following command:
 
     [hdfs]$ $HADOOP_HOME/sbin/stop-dfs.sh
 
-These commands can be run from any node where the HDFS configuration is available. The command uses the configuration to determine the Namenodes in the cluster and then starts the Namenode process on those nodes. The Datanodes are started on the nodes specified in the `slaves` file. The script can be used as a reference for building your own scripts to start and stop the cluster.
+These commands can be run from any node where the HDFS configuration is available. The command uses the configuration to determine the Namenodes in the cluster and then starts the Namenode process on those nodes. The Datanodes are started on the nodes specified in the `workers` file. The script can be used as a reference for building your own scripts to start and stop the cluster.
 
 ### Balancer
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/23c3ff85/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithQJM.md
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithQJM.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithQJM.md
index 4549824..df2db49 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithQJM.md
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithQJM.md
@@ -70,7 +70,7 @@ The HDFS High Availability feature addresses the above problems by providing the
 Architecture
 ------------
 
-In a typical HA cluster, two or more separate machines are configured as NameNodes. At any point in time, exactly one of the NameNodes is in an *Active* state, and the others are in a *Standby* state. The Active NameNode is responsible for all client operations in the cluster, while the Standbys are simply acting as slaves, maintaining enough state to provide a fast failover if necessary.
+In a typical HA cluster, two or more separate machines are configured as NameNodes. At any point in time, exactly one of the NameNodes is in an *Active* state, and the others are in a *Standby* state. The Active NameNode is responsible for all client operations in the cluster, while the Standbys are simply acting as workers, maintaining enough state to provide a fast failover if necessary.
 
 In order for the Standby node to keep its state synchronized with the Active node, both nodes communicate with a group of separate daemons called "JournalNodes" (JNs). When any namespace modification is performed by the Active node, it durably logs a record of the modification to a majority of these JNs. The Standby node is capable of reading the edits from the JNs, and is constantly watching them for changes to the edit log. As the Standby Node sees the edits, it applies them to its own namespace. In the event of a failover, the Standby will ensure that it has read all of the edits from the JounalNodes before promoting itself to the Active state. This ensures that the namespace state is fully synchronized before a failover occurs.
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/23c3ff85/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/filecache/DistributedCache.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/filecache/DistributedCache.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/filecache/DistributedCache.java
index 0783eb5..65d4329 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/filecache/DistributedCache.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/filecache/DistributedCache.java
@@ -32,79 +32,79 @@ import org.apache.hadoop.mapreduce.MRJobConfig;
 
 /**
  * Distribute application-specific large, read-only files efficiently.
- * 
+ *
  * <p><code>DistributedCache</code> is a facility provided by the Map-Reduce
  * framework to cache files (text, archives, jars etc.) needed by applications.
  * </p>
- * 
- * <p>Applications specify the files, via urls (hdfs:// or http://) to be cached 
+ *
+ * <p>Applications specify the files, via urls (hdfs:// or http://) to be cached
  * via the {@link org.apache.hadoop.mapred.JobConf}. The
  * <code>DistributedCache</code> assumes that the files specified via urls are
  * already present on the {@link FileSystem} at the path specified by the url
  * and are accessible by every machine in the cluster.</p>
- * 
- * <p>The framework will copy the necessary files on to the slave node before 
- * any tasks for the job are executed on that node. Its efficiency stems from 
- * the fact that the files are only copied once per job and the ability to 
- * cache archives which are un-archived on the slaves.</p> 
+ *
+ * <p>The framework will copy the necessary files on to the worker node before
+ * any tasks for the job are executed on that node. Its efficiency stems from
+ * the fact that the files are only copied once per job and the ability to
+ * cache archives which are un-archived on the workers.</p>
  *
  * <p><code>DistributedCache</code> can be used to distribute simple, read-only
- * data/text files and/or more complex types such as archives, jars etc. 
- * Archives (zip, tar and tgz/tar.gz files) are un-archived at the slave nodes. 
- * Jars may be optionally added to the classpath of the tasks, a rudimentary 
+ * data/text files and/or more complex types such as archives, jars etc.
+ * Archives (zip, tar and tgz/tar.gz files) are un-archived at the worker nodes.
+ * Jars may be optionally added to the classpath of the tasks, a rudimentary
  * software distribution mechanism.  Files have execution permissions.
  * In older version of Hadoop Map/Reduce users could optionally ask for symlinks
- * to be created in the working directory of the child task.  In the current 
- * version symlinks are always created.  If the URL does not have a fragment 
- * the name of the file or directory will be used. If multiple files or 
+ * to be created in the working directory of the child task.  In the current
+ * version symlinks are always created.  If the URL does not have a fragment
+ * the name of the file or directory will be used. If multiple files or
  * directories map to the same link name, the last one added, will be used.  All
  * others will not even be downloaded.</p>
- * 
- * <p><code>DistributedCache</code> tracks modification timestamps of the cache 
- * files. Clearly the cache files should not be modified by the application 
+ *
+ * <p><code>DistributedCache</code> tracks modification timestamps of the cache
+ * files. Clearly the cache files should not be modified by the application
  * or externally while the job is executing.</p>
- * 
- * <p>Here is an illustrative example on how to use the 
+ *
+ * <p>Here is an illustrative example on how to use the
  * <code>DistributedCache</code>:</p>
  * <p><blockquote><pre>
  *     // Setting up the cache for the application
- *     
+ *
  *     1. Copy the requisite files to the <code>FileSystem</code>:
- *     
- *     $ bin/hadoop fs -copyFromLocal lookup.dat /myapp/lookup.dat  
- *     $ bin/hadoop fs -copyFromLocal map.zip /myapp/map.zip  
+ *
+ *     $ bin/hadoop fs -copyFromLocal lookup.dat /myapp/lookup.dat
+ *     $ bin/hadoop fs -copyFromLocal map.zip /myapp/map.zip
  *     $ bin/hadoop fs -copyFromLocal mylib.jar /myapp/mylib.jar
  *     $ bin/hadoop fs -copyFromLocal mytar.tar /myapp/mytar.tar
  *     $ bin/hadoop fs -copyFromLocal mytgz.tgz /myapp/mytgz.tgz
  *     $ bin/hadoop fs -copyFromLocal mytargz.tar.gz /myapp/mytargz.tar.gz
- *     
+ *
  *     2. Setup the application's <code>JobConf</code>:
- *     
+ *
  *     JobConf job = new JobConf();
- *     DistributedCache.addCacheFile(new URI("/myapp/lookup.dat#lookup.dat"), 
+ *     DistributedCache.addCacheFile(new URI("/myapp/lookup.dat#lookup.dat"),
  *                                   job);
  *     DistributedCache.addCacheArchive(new URI("/myapp/map.zip", job);
  *     DistributedCache.addFileToClassPath(new Path("/myapp/mylib.jar"), job);
  *     DistributedCache.addCacheArchive(new URI("/myapp/mytar.tar", job);
  *     DistributedCache.addCacheArchive(new URI("/myapp/mytgz.tgz", job);
  *     DistributedCache.addCacheArchive(new URI("/myapp/mytargz.tar.gz", job);
- *     
+ *
  *     3. Use the cached files in the {@link org.apache.hadoop.mapred.Mapper}
  *     or {@link org.apache.hadoop.mapred.Reducer}:
- *     
- *     public static class MapClass extends MapReduceBase  
+ *
+ *     public static class MapClass extends MapReduceBase
  *     implements Mapper&lt;K, V, K, V&gt; {
- *     
+ *
  *       private Path[] localArchives;
  *       private Path[] localFiles;
- *       
+ *
  *       public void configure(JobConf job) {
  *         // Get the cached archives/files
  *         File f = new File("./map.zip/some/file/in/zip.txt");
  *       }
- *       
- *       public void map(K key, V value, 
- *                       OutputCollector&lt;K, V&gt; output, Reporter reporter) 
+ *
+ *       public void map(K key, V value,
+ *                       OutputCollector&lt;K, V&gt; output, Reporter reporter)
  *       throws IOException {
  *         // Use data from the cached archives/files here
  *         // ...
@@ -112,7 +112,7 @@ import org.apache.hadoop.mapreduce.MRJobConfig;
  *         output.collect(k, v);
  *       }
  *     }
- *     
+ *
  * </pre></blockquote>
  *
  * It is also very common to use the DistributedCache by using

http://git-wip-us.apache.org/repos/asf/hadoop/blob/23c3ff85/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/filecache/DistributedCache.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/filecache/DistributedCache.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/filecache/DistributedCache.java
index d4d6c6e..0c43633 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/filecache/DistributedCache.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/filecache/DistributedCache.java
@@ -34,79 +34,79 @@ import java.net.URI;
 
 /**
  * Distribute application-specific large, read-only files efficiently.
- * 
+ *
  * <p><code>DistributedCache</code> is a facility provided by the Map-Reduce
  * framework to cache files (text, archives, jars etc.) needed by applications.
  * </p>
- * 
- * <p>Applications specify the files, via urls (hdfs:// or http://) to be cached 
+ *
+ * <p>Applications specify the files, via urls (hdfs:// or http://) to be cached
  * via the {@link org.apache.hadoop.mapred.JobConf}. The
  * <code>DistributedCache</code> assumes that the files specified via urls are
  * already present on the {@link FileSystem} at the path specified by the url
  * and are accessible by every machine in the cluster.</p>
- * 
- * <p>The framework will copy the necessary files on to the slave node before 
- * any tasks for the job are executed on that node. Its efficiency stems from 
- * the fact that the files are only copied once per job and the ability to 
- * cache archives which are un-archived on the slaves.</p> 
+ *
+ * <p>The framework will copy the necessary files on to the worker node before
+ * any tasks for the job are executed on that node. Its efficiency stems from
+ * the fact that the files are only copied once per job and the ability to
+ * cache archives which are un-archived on the workers.</p>
  *
  * <p><code>DistributedCache</code> can be used to distribute simple, read-only
- * data/text files and/or more complex types such as archives, jars etc. 
- * Archives (zip, tar and tgz/tar.gz files) are un-archived at the slave nodes. 
- * Jars may be optionally added to the classpath of the tasks, a rudimentary 
+ * data/text files and/or more complex types such as archives, jars etc.
+ * Archives (zip, tar and tgz/tar.gz files) are un-archived at the worker nodes.
+ * Jars may be optionally added to the classpath of the tasks, a rudimentary
  * software distribution mechanism.  Files have execution permissions.
  * In older version of Hadoop Map/Reduce users could optionally ask for symlinks
- * to be created in the working directory of the child task.  In the current 
- * version symlinks are always created.  If the URL does not have a fragment 
- * the name of the file or directory will be used. If multiple files or 
+ * to be created in the working directory of the child task.  In the current
+ * version symlinks are always created.  If the URL does not have a fragment
+ * the name of the file or directory will be used. If multiple files or
  * directories map to the same link name, the last one added, will be used.  All
  * others will not even be downloaded.</p>
- * 
- * <p><code>DistributedCache</code> tracks modification timestamps of the cache 
- * files. Clearly the cache files should not be modified by the application 
+ *
+ * <p><code>DistributedCache</code> tracks modification timestamps of the cache
+ * files. Clearly the cache files should not be modified by the application
  * or externally while the job is executing.</p>
- * 
- * <p>Here is an illustrative example on how to use the 
+ *
+ * <p>Here is an illustrative example on how to use the
  * <code>DistributedCache</code>:</p>
  * <p><blockquote><pre>
  *     // Setting up the cache for the application
- *     
+ *
  *     1. Copy the requisite files to the <code>FileSystem</code>:
- *     
- *     $ bin/hadoop fs -copyFromLocal lookup.dat /myapp/lookup.dat  
- *     $ bin/hadoop fs -copyFromLocal map.zip /myapp/map.zip  
+ *
+ *     $ bin/hadoop fs -copyFromLocal lookup.dat /myapp/lookup.dat
+ *     $ bin/hadoop fs -copyFromLocal map.zip /myapp/map.zip
  *     $ bin/hadoop fs -copyFromLocal mylib.jar /myapp/mylib.jar
  *     $ bin/hadoop fs -copyFromLocal mytar.tar /myapp/mytar.tar
  *     $ bin/hadoop fs -copyFromLocal mytgz.tgz /myapp/mytgz.tgz
  *     $ bin/hadoop fs -copyFromLocal mytargz.tar.gz /myapp/mytargz.tar.gz
- *     
+ *
  *     2. Setup the application's <code>JobConf</code>:
- *     
+ *
  *     JobConf job = new JobConf();
- *     DistributedCache.addCacheFile(new URI("/myapp/lookup.dat#lookup.dat"), 
+ *     DistributedCache.addCacheFile(new URI("/myapp/lookup.dat#lookup.dat"),
  *                                   job);
  *     DistributedCache.addCacheArchive(new URI("/myapp/map.zip", job);
  *     DistributedCache.addFileToClassPath(new Path("/myapp/mylib.jar"), job);
  *     DistributedCache.addCacheArchive(new URI("/myapp/mytar.tar", job);
  *     DistributedCache.addCacheArchive(new URI("/myapp/mytgz.tgz", job);
  *     DistributedCache.addCacheArchive(new URI("/myapp/mytargz.tar.gz", job);
- *     
+ *
  *     3. Use the cached files in the {@link org.apache.hadoop.mapred.Mapper}
  *     or {@link org.apache.hadoop.mapred.Reducer}:
- *     
- *     public static class MapClass extends MapReduceBase  
+ *
+ *     public static class MapClass extends MapReduceBase
  *     implements Mapper&lt;K, V, K, V&gt; {
- *     
+ *
  *       private Path[] localArchives;
  *       private Path[] localFiles;
- *       
+ *
  *       public void configure(JobConf job) {
  *         // Get the cached archives/files
  *         File f = new File("./map.zip/some/file/in/zip.txt");
  *       }
- *       
- *       public void map(K key, V value, 
- *                       OutputCollector&lt;K, V&gt; output, Reporter reporter) 
+ *
+ *       public void map(K key, V value,
+ *                       OutputCollector&lt;K, V&gt; output, Reporter reporter)
  *       throws IOException {
  *         // Use data from the cached archives/files here
  *         // ...
@@ -114,7 +114,7 @@ import java.net.URI;
  *         output.collect(k, v);
  *       }
  *     }
- *     
+ *
  * </pre></blockquote>
  *
  * It is also very common to use the DistributedCache by using
@@ -281,7 +281,7 @@ public class DistributedCache {
     conf.set(MRJobConfig.CACHE_ARCHIVES, archives == null ? uri.toString()
              : archives + "," + uri.toString());
   }
-  
+
   /**
    * Add a file to be localized to the conf.  The localized file will be
    * downloaded to the execution node(s), and a link will created to the
@@ -370,7 +370,7 @@ public class DistributedCache {
   /**
    * Get the file entries in classpath as an array of Path.
    * Used by internal DistributedCache code.
-   * 
+   *
    * @param conf Configuration that contains the classpath setting
    * @deprecated Use {@link JobContext#getFileClassPaths()} instead
    * @see JobContext#getFileClassPaths()
@@ -379,8 +379,8 @@ public class DistributedCache {
   public static Path[] getFileClassPaths(Configuration conf) {
     ArrayList<String> list = (ArrayList<String>)conf.getStringCollection(
                                 MRJobConfig.CLASSPATH_FILES);
-    if (list.size() == 0) { 
-      return null; 
+    if (list.size() == 0) {
+      return null;
     }
     Path[] paths = new Path[list.size()];
     for (int i = 0; i < list.size(); i++) {
@@ -392,7 +392,7 @@ public class DistributedCache {
   /**
    * Add an archive path to the current set of classpath entries. It adds the
    * archive to cache as well.  Intended to be used by user code.
-   * 
+   *
    * @param archive Path of the archive to be added
    * @param conf Configuration that contains the classpath setting
    * @deprecated Use {@link Job#addArchiveToClassPath(Path)} instead
@@ -426,7 +426,7 @@ public class DistributedCache {
   /**
    * Get the archive entries in classpath as an array of Path.
    * Used by internal DistributedCache code.
-   * 
+   *
    * @param conf Configuration that contains the classpath setting
    * @deprecated Use {@link JobContext#getArchiveClassPaths()} instead 
    * @see JobContext#getArchiveClassPaths()
@@ -435,8 +435,8 @@ public class DistributedCache {
   public static Path[] getArchiveClassPaths(Configuration conf) {
     ArrayList<String> list = (ArrayList<String>)conf.getStringCollection(
                                 MRJobConfig.CLASSPATH_ARCHIVES);
-    if (list.size() == 0) { 
-      return null; 
+    if (list.size() == 0) {
+      return null;
     }
     Path[] paths = new Path[list.size()];
     for (int i = 0; i < list.size(); i++) {
@@ -449,13 +449,13 @@ public class DistributedCache {
    * Originally intended to enable symlinks, but currently symlinks cannot be
    * disabled. This is a NO-OP.
    * @param conf the jobconf
-   * @deprecated This is a NO-OP. 
+   * @deprecated This is a NO-OP.
    */
   @Deprecated
   public static void createSymlink(Configuration conf){
     //NOOP
   }
-  
+
   /**
    * Originally intended to check if symlinks should be used, but currently
    * symlinks cannot be disabled.
@@ -480,29 +480,29 @@ public class DistributedCache {
   }
 
   /**
-   * Get the booleans on whether the files are public or not.  Used by 
+   * Get the booleans on whether the files are public or not.  Used by
    * internal DistributedCache and MapReduce code.
    * @param conf The configuration which stored the timestamps
-   * @return a string array of booleans 
+   * @return a string array of booleans
    */
   public static boolean[] getFileVisibilities(Configuration conf) {
     return parseBooleans(conf.getStrings(MRJobConfig.CACHE_FILE_VISIBILITIES));
   }
 
   /**
-   * Get the booleans on whether the archives are public or not.  Used by 
+   * Get the booleans on whether the archives are public or not.  Used by
    * internal DistributedCache and MapReduce code.
    * @param conf The configuration which stored the timestamps
-   * @return a string array of booleans 
+   * @return a string array of booleans
    */
   public static boolean[] getArchiveVisibilities(Configuration conf) {
     return parseBooleans(conf.getStrings(MRJobConfig.CACHE_ARCHIVES_VISIBILITIES));
   }
 
   /**
-   * This method checks if there is a conflict in the fragment names 
-   * of the uris. Also makes sure that each uri has a fragment. It 
-   * is only to be called if you want to create symlinks for 
+   * This method checks if there is a conflict in the fragment names
+   * of the uris. Also makes sure that each uri has a fragment. It
+   * is only to be called if you want to create symlinks for
    * the various archives and files.  May be used by user code.
    * @param uriFiles The uri array of urifiles
    * @param uriArchives the uri array of uri archives
@@ -514,7 +514,7 @@ public class DistributedCache {
     // check if fragment is null for any uri
     // also check if there are any conflicts in fragment names
     Set<String> fragments = new HashSet<String>();
-    
+
     // iterate over file uris
     if (uriFiles != null) {
       for (int i = 0; i < uriFiles.length; i++) {
@@ -529,7 +529,7 @@ public class DistributedCache {
         fragments.add(lowerCaseFragment);
       }
     }
-    
+
     // iterate over archive uris
     if (uriArchives != null) {
       for (int i = 0; i < uriArchives.length; i++) {


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org


Mime
View raw message