hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tomwh...@apache.org
Subject svn commit: r644712 - in /hadoop/core/trunk: ./ src/contrib/ec2/bin/ src/contrib/ec2/bin/image/
Date Fri, 04 Apr 2008 13:46:56 GMT
Author: tomwhite
Date: Fri Apr  4 06:46:51 2008
New Revision: 644712

URL: http://svn.apache.org/viewvc?rev=644712&view=rev
Log:
HADOOP-2410.  Make EC2 cluster nodes more independent of each other.  Contributed by Chris
K Wensel.

Added:
    hadoop/core/trunk/src/contrib/ec2/bin/cmd-hadoop-cluster
    hadoop/core/trunk/src/contrib/ec2/bin/destroy-hadoop-cluster
    hadoop/core/trunk/src/contrib/ec2/bin/launch-hadoop-master
    hadoop/core/trunk/src/contrib/ec2/bin/launch-hadoop-slaves
    hadoop/core/trunk/src/contrib/ec2/bin/list-hadoop-clusters
Removed:
    hadoop/core/trunk/src/contrib/ec2/bin/run-hadoop-cluster
    hadoop/core/trunk/src/contrib/ec2/bin/start-hadoop
Modified:
    hadoop/core/trunk/CHANGES.txt
    hadoop/core/trunk/src/contrib/ec2/bin/create-hadoop-image
    hadoop/core/trunk/src/contrib/ec2/bin/hadoop-ec2
    hadoop/core/trunk/src/contrib/ec2/bin/hadoop-ec2-env.sh.template
    hadoop/core/trunk/src/contrib/ec2/bin/image/create-hadoop-image-remote
    hadoop/core/trunk/src/contrib/ec2/bin/image/hadoop-init
    hadoop/core/trunk/src/contrib/ec2/bin/launch-hadoop-cluster
    hadoop/core/trunk/src/contrib/ec2/bin/login-hadoop-cluster
    hadoop/core/trunk/src/contrib/ec2/bin/terminate-hadoop-cluster

Modified: hadoop/core/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=644712&r1=644711&r2=644712&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Fri Apr  4 06:46:51 2008
@@ -77,6 +77,12 @@
     HADOOP-2634. Deprecate ClientProtocol::exists.
     (lohit vijayarenu via cdouglas)
 
+    HADOOP-2410.  Make EC2 cluster nodes more independent of each other.
+    Multiple concurrent EC2 clusters are now supported, and nodes may be
+    added to a cluster on the fly with new nodes starting in the same EC2
+    availability zone as the cluster.  Ganglia monitoring and large
+    instance sizes have also been added.  (Chris K Wensel via tomwhite)
+
   NEW FEATURES
 
     HADOOP-1398.  Add HBase in-memory block cache.  (tomwhite)

Added: hadoop/core/trunk/src/contrib/ec2/bin/cmd-hadoop-cluster
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/ec2/bin/cmd-hadoop-cluster?rev=644712&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/ec2/bin/cmd-hadoop-cluster (added)
+++ hadoop/core/trunk/src/contrib/ec2/bin/cmd-hadoop-cluster Fri Apr  4 06:46:51 2008
@@ -0,0 +1,69 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Run commands on master or specified node of a running Hadoop EC2 cluster.
+
+# if no args specified, show usage
+if [ $# = 0 ]; then
+  echo "Command required!"
+  exit 1
+fi
+
+# get arguments
+COMMAND="$1"
+shift
+# get group
+CLUSTER="$1"
+shift
+
+if [ -z $CLUSTER ]; then
+  echo "Cluster name or instance id required!"
+  exit -1
+fi
+
+# Import variables
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+. "$bin"/hadoop-ec2-env.sh
+
+if [[ $CLUSTER == i-* ]]; then
+  HOST=`ec2-describe-instances $CLUSTER | grep running | awk '{print $4}'`
+  [ -z $HOST ] && echo "Instance still pending or no longer running: $CLUSTER" &&
exit -1
+else
+  [ ! -f $MASTER_IP_PATH ] && echo "Wrong group name, or cluster not launched! $CLUSTER"
&& exit -1
+  HOST=`cat $MASTER_IP_PATH`
+fi
+
+if [ "$COMMAND" = "login" ] ; then
+  echo "Logging in to host $HOST."
+  ssh $SSH_OPTS "root@$HOST"
+elif [ "$COMMAND" = "proxy" ] ; then
+  echo "Proxying to host $HOST via local port 6666"
+  echo "Gangia:     http://$HOST/ganglia"
+  echo "JobTracker: http://$HOST:50030/"
+  echo "NameNode:   http://$HOST:50070/"
+  ssh $SSH_OPTS -D 6666 -N "root@$HOST"
+elif [ "$COMMAND" = "push" ] ; then
+  echo "Pushing $1 to host $HOST."
+  scp $SSH_OPTS -r $1 "root@$HOST:"
+elif [ "$COMMAND" = "screen" ] ; then
+  echo "Logging in and attaching screen on host $HOST."
+  ssh $SSH_OPTS -t "root@$HOST" 'screen -D -R'
+else
+  echo "Executing command on host $HOST."
+  ssh $SSH_OPTS -t "root@$HOST" "$COMMAND"
+fi
\ No newline at end of file

Modified: hadoop/core/trunk/src/contrib/ec2/bin/create-hadoop-image
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/ec2/bin/create-hadoop-image?rev=644712&r1=644711&r2=644712&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/ec2/bin/create-hadoop-image (original)
+++ hadoop/core/trunk/src/contrib/ec2/bin/create-hadoop-image Fri Apr  4 06:46:51 2008
@@ -15,7 +15,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
 # Create a Hadoop AMI.
 # Inspired by Jonathan Siegel's EC2 script (http://blogsiegel.blogspot.com/2006/08/sandboxing-amazon-ec2.html)
 
@@ -24,11 +23,12 @@
 bin=`cd "$bin"; pwd`
 . "$bin"/hadoop-ec2-env.sh
 
-# Use fedora core
-AMI_IMAGE=`ec2-describe-images -a | grep fedora-core4-base | awk '{print $2}'`
+AMI_IMAGE=`ec2-describe-images -a | grep $S3_BUCKET | grep $HADOOP_VERSION | grep $ARCH |
grep available | awk '{print $2}'`
+
+[ ! -z $AMI_IMAGE ] && echo "AMI already registered, use: ec2-deregister $AMI_IMAGE"
&& exit -1
 
-echo "Starting a fedora core base AMI with ID $AMI_IMAGE."
-OUTPUT=`ec2-run-instances $AMI_IMAGE -k $KEY_NAME`
+echo "Starting a AMI with ID $BASE_AMI_IMAGE."
+OUTPUT=`ec2-run-instances $BASE_AMI_IMAGE -k $KEY_NAME -t $INSTANCE_TYPE`
 BOOTING_INSTANCE=`echo $OUTPUT | awk '{print $6}'`
 
 echo "Instance is $BOOTING_INSTANCE."
@@ -44,25 +44,35 @@
 done
 
 echo "The server is available at $HOSTNAME."
+while true; do
+  REPLY=`ssh $SSH_OPTS "root@$HOSTNAME" 'echo "hello"'`
+  if [ ! -z $REPLY ]; then
+   break;
+  fi
+  sleep 5
+done
+
+#read -p "Login first? [yes or no]: " answer
 
-echo "Waiting before trying to connect..."
-sleep 30
+if [ "$answer" == "yes" ]; then
+  ssh $SSH_OPTS "root@$HOSTNAME"
+fi
 
 echo "Copying scripts."
 
 # Copy setup scripts
-scp $SSH_OPTS "$bin"/hadoop-ec2-env.sh "root@$HOSTNAME:"
+scp $SSH_OPTS "$bin"/hadoop-ec2-env.sh "root@$HOSTNAME:/mnt"
+scp $SSH_OPTS "$bin"/image/create-hadoop-image-remote "root@$HOSTNAME:/mnt"
 scp $SSH_OPTS "$bin"/image/hadoop-init "root@$HOSTNAME:"
-scp $SSH_OPTS "$bin"/image/create-hadoop-image-remote "root@$HOSTNAME:"
 
 # Copy private key and certificate (for bundling image)
-scp $SSH_OPTS $EC2_KEYDIR/pk-*.pem "root@$HOSTNAME:/mnt"
-scp $SSH_OPTS $EC2_KEYDIR/cert-*.pem "root@$HOSTNAME:/mnt"
+scp $SSH_OPTS $EC2_KEYDIR/pk*.pem "root@$HOSTNAME:/mnt"
+scp $SSH_OPTS $EC2_KEYDIR/cert*.pem "root@$HOSTNAME:/mnt"
 
 # Connect to it
-ssh $SSH_OPTS "root@$HOSTNAME" './create-hadoop-image-remote'
+ssh $SSH_OPTS "root@$HOSTNAME" '/mnt/create-hadoop-image-remote'
 
 # Register image
-ec2-register $S3_BUCKET/hadoop-$HADOOP_VERSION.manifest.xml
+ec2-register $S3_BUCKET/hadoop-$HADOOP_VERSION-$ARCH.manifest.xml
 
 echo "Terminate with: ec2-terminate-instances $BOOTING_INSTANCE"

Added: hadoop/core/trunk/src/contrib/ec2/bin/destroy-hadoop-cluster
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/ec2/bin/destroy-hadoop-cluster?rev=644712&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/ec2/bin/destroy-hadoop-cluster (added)
+++ hadoop/core/trunk/src/contrib/ec2/bin/destroy-hadoop-cluster Fri Apr  4 06:46:51 2008
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Launch an EC2 cluster of Hadoop instances.
+
+if [ -z $1 ]; then
+  echo "Cluster name required!"
+  exit -1
+fi
+
+CLUSTER=$1
+
+# Import variables
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+. "$bin"/hadoop-ec2-env.sh
+
+rm -f $MASTER_IP_PATH
+rm -f $MASTER_PRIVATE_IP_PATH
+
+ec2-describe-group | egrep "[[:space:]]$CLUSTER_MASTER[[:space:]]" > /dev/null
+if [ $? -eq 0 ]; then
+  echo "Destroying group $CLUSTER_MASTER"
+  ec2-revoke $CLUSTER_MASTER -o $CLUSTER -u $AWS_ACCOUNT_ID
+fi
+
+ec2-describe-group | egrep "[[:space:]]$CLUSTER[[:space:]]" > /dev/null
+if [ $? -eq 0 ]; then
+  echo "Destroying group $CLUSTER"
+  ec2-revoke $CLUSTER -o $CLUSTER_MASTER -u $AWS_ACCOUNT_ID
+fi
+
+ec2-delete-group $CLUSTER_MASTER
+ec2-delete-group $CLUSTER

Modified: hadoop/core/trunk/src/contrib/ec2/bin/hadoop-ec2
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/ec2/bin/hadoop-ec2?rev=644712&r1=644711&r2=644712&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/ec2/bin/hadoop-ec2 (original)
+++ hadoop/core/trunk/src/contrib/ec2/bin/hadoop-ec2 Fri Apr  4 06:46:51 2008
@@ -15,7 +15,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
 bin=`dirname "$0"`
 bin=`cd "$bin"; pwd`
 
@@ -23,30 +22,37 @@
 if [ $# = 0 ]; then
   echo "Usage: hadoop-ec2 COMMAND"
   echo "where COMMAND is one of:"
-  echo "  create-image         create a Hadoop AMI"
-  echo "  launch-cluster       launch a cluster of Hadoop EC2 instances"
-  echo "  start-hadoop         start Hadoop daemons on a cluster"
-  echo "  login                login to the master node of the Hadoop EC2 cluster"
-  echo "  run                  'launch-cluster', 'start-hadoop', 'login'"
-  echo "  terminate-cluster    terminate all Hadoop EC2 instances"
+  echo "  list                                 list all running Hadoop EC2 clusters"
+  echo "  launch-cluster <group> <num slaves>  launch a cluster of Hadoop EC2
instances - launch-master then launch-slaves"
+  echo "  launch-master  <group>               launch or find a cluster master"
+  echo "  launch-slaves  <group> <num slaves>  launch the cluster slaves"
+  echo "  terminate-cluster                    terminate all Hadoop EC2 instances"
+  echo "  login  <group|instance id>           login to the master node of the Hadoop
EC2 cluster"
+  echo "  screen <group|instance id>           start or attach 'screen' on the master
node of the Hadoop EC2 cluster"
+  echo "  proxy  <group|instance id>           start a socks proxy on localhost:6666
(use w/foxyproxy)"
+  echo "  push   <group> <file>                scp a file to the master node
of the Hadoop EC2 cluster"
+  echo "  <shell cmd> <group|instance id>      execute any command remotely on
the master"
+  echo "  create-image                         create a Hadoop AMI"
   exit 1
 fi
 
 # get arguments
-COMMAND=$1
+COMMAND="$1"
 shift
 
 if [ "$COMMAND" = "create-image" ] ; then
-  . "$bin"/create-hadoop-image
+  . "$bin"/create-hadoop-image $*
 elif [ "$COMMAND" = "launch-cluster" ] ; then
-  . "$bin"/launch-hadoop-cluster
-elif [ "$COMMAND" = "start-hadoop" ] ; then
-  . "$bin"/start-hadoop
-elif [ "$COMMAND" = "run" ] ; then
-  . "$bin"/run-hadoop-cluster
-elif [ "$COMMAND" = "login" ] ; then
-  . "$bin"/login-hadoop-cluster
+  . "$bin"/launch-hadoop-cluster $*
+elif [ "$COMMAND" = "launch-master" ] ; then
+  . "$bin"/launch-hadoop-master $*
+elif [ "$COMMAND" = "launch-slaves" ] ; then
+  . "$bin"/launch-hadoop-slaves $*
 elif [ "$COMMAND" = "terminate-cluster" ] ; then
-  . "$bin"/terminate-hadoop-cluster
+  . "$bin"/terminate-hadoop-cluster $*
+elif [ "$COMMAND" = "list" ] ; then
+  . "$bin"/list-hadoop-clusters
+else
+  . "$bin"/cmd-hadoop-cluster "$COMMAND" $*
 fi
-  
+

Modified: hadoop/core/trunk/src/contrib/ec2/bin/hadoop-ec2-env.sh.template
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/ec2/bin/hadoop-ec2-env.sh.template?rev=644712&r1=644711&r2=644712&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/ec2/bin/hadoop-ec2-env.sh.template (original)
+++ hadoop/core/trunk/src/contrib/ec2/bin/hadoop-ec2-env.sh.template Fri Apr  4 06:46:51 2008
@@ -15,7 +15,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
 # Your Amazon Account Number.
 AWS_ACCOUNT_ID=
 
@@ -38,10 +37,10 @@
 PRIVATE_KEY_PATH=`echo "$EC2_KEYDIR"/"id_rsa-$KEY_NAME"`
 
 # SSH options used when connecting to EC2 instances.
-SSH_OPTS=`echo -i "$PRIVATE_KEY_PATH" -o StrictHostKeyChecking=no`
+SSH_OPTS=`echo -i "$PRIVATE_KEY_PATH" -o StrictHostKeyChecking=no -o ServerAliveInterval=30`
 
 # The version of Hadoop to use.
-HADOOP_VERSION=0.14.1
+HADOOP_VERSION=0.17.0
 
 # The Amazon S3 bucket where the Hadoop AMI is stored.
 # The default value is for public images, so can be left if you are using running a public
image.
@@ -49,22 +48,42 @@
 # so you can store it in a bucket you own.
 S3_BUCKET=hadoop-ec2-images
 
-# The EC2 group to run your cluster in.
-GROUP=hadoop-cluster-group
-
-# The hostname of the master node in the cluster. You need to be able to set the DNS for
this host to point to the master's IP address.
-# See http://www.dyndns.com/services/dns/dyndns/, for example.
-MASTER_HOST=
+# Enable public access to JobTracker and TaskTracker web interfaces
+ENABLE_WEB_PORTS=true
 
-# The number of nodes in your cluster.
-NO_INSTANCES=2
+# Boot parameters
+MAX_MAP_TASKS=2
+MAX_REDUCE_TASKS=2
+COMPRESS=true
+
+# The EC2 instance type: m1.small, m1.large, m1.xlarge
+INSTANCE_TYPE="m1.small"
+#INSTANCE_TYPE="m1.large"
+#INSTANCE_TYPE="m1.xlarge"
+
+# The EC2 group master name. CLUSTER is set by calling scripts
+CLUSTER_MASTER=$CLUSTER-master
+
+# Cached values for a given cluster
+MASTER_PRIVATE_IP_PATH=~/.hadooop-private-$CLUSTER_MASTER
+MASTER_IP_PATH=~/.hadooop-$CLUSTER_MASTER
+MASTER_ZONE_PATH=~/.hadooop-zone-$CLUSTER_MASTER
 
 #
 # The following variables are only used when creating an AMI.
 #
 
-# The download URL for the Sun JDK. Visit http://java.sun.com/javase/downloads/index.jsp
and get the URL for the "Linux self-extracting file".
-JAVA_BINARY_URL=''
-
 # The version number of the installed JDK.
-JAVA_VERSION=1.6.0_02
+JAVA_VERSION=1.6.0_05
+
+# SUPPORTED_ARCHITECTURES = ['i386', 'x86_64']
+# The download URL for the Sun JDK. Visit http://java.sun.com/javase/downloads/index.jsp
and get the URL for the "Linux self-extracting file".
+if [ "$INSTANCE_TYPE" == "m1.small" ]; then
+  ARCH='i386'
+  BASE_AMI_IMAGE="ami-f51aff9c"  # ec2-public-images/fedora-8-i386-base-v1.06.manifest.xml
+  JAVA_BINARY_URL=
+else
+  ARCH='x86_64'
+  BASE_AMI_IMAGE="ami-f21aff9b"  # ec2-public-images/fedora-8-x86_64-base-v1.06.manifest.xml
+  JAVA_BINARY_URL=
+fi

Modified: hadoop/core/trunk/src/contrib/ec2/bin/image/create-hadoop-image-remote
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/ec2/bin/image/create-hadoop-image-remote?rev=644712&r1=644711&r2=644712&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/ec2/bin/image/create-hadoop-image-remote (original)
+++ hadoop/core/trunk/src/contrib/ec2/bin/image/create-hadoop-image-remote Fri Apr  4 06:46:51
2008
@@ -27,17 +27,22 @@
 rm -f "$bin"/hadoop-ec2-env.sh
 
 # Install Java
+echo "Downloading and installing java binary."
 cd /usr/local
 wget -nv -O java.bin $JAVA_BINARY_URL
 sh java.bin
 rm -f java.bin
 
 # Install tools
-yum install rsync
+echo "Installing rpms."
+yum -y install rsync lynx screen ganglia-gmetad ganglia-gmond ganglia-web httpd php
+yum -y clean all
 
 # Install Hadoop
+echo "Installing Hadoop $HADOOP_VERSION."
 cd /usr/local
-wget -nv http://www.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz
+wget -nv http://archive.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz
+[ ! -f hadoop-$HADOOP_VERSION.tar.gz ] && wget -nv http://www.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz
 tar xzf hadoop-$HADOOP_VERSION.tar.gz
 rm -f hadoop-$HADOOP_VERSION.tar.gz
 
@@ -45,12 +50,17 @@
 sed -i -e "s|# export JAVA_HOME=.*|export JAVA_HOME=/usr/local/jdk${JAVA_VERSION}|" \
        -e 's|# export HADOOP_LOG_DIR=.*|export HADOOP_LOG_DIR=/mnt/hadoop/logs|' \
        -e 's|# export HADOOP_SLAVE_SLEEP=.*|export HADOOP_SLAVE_SLEEP=1|' \
+       -e 's|# export HADOOP_OPTS=.*|export HADOOP_OPTS=-server|' \
       /usr/local/hadoop-$HADOOP_VERSION/conf/hadoop-env.sh
-mkdir -p /mnt/hadoop/logs
 
 # Do configuration on instance startup
 echo "/root/hadoop-init" >> /etc/rc.d/rc.local
 
+# Setup root user bash environment
+echo "export JAVA_HOME=/usr/local/jdk${JAVA_VERSION}" >> /root/.bash_profile
+echo "export HADOOP_HOME=/usr/local/hadoop-${HADOOP_VERSION}" >> /root/.bash_profile
+echo 'export PATH=$JAVA_HOME/bin:$HADOOP_HOME/bin:$PATH' >> /root/.bash_profile
+
 # Configure networking.
 # Delete SSH authorized_keys since it includes the key it was launched with. (Note that it
is re-populated when an instance starts.)
 rm -f /root/.ssh/authorized_keys
@@ -60,9 +70,10 @@
 # Bundle and upload image
 cd ~root
 # Don't need to delete .bash_history since it isn't written until exit.
-ec2-bundle-vol -d /mnt -k /mnt/pk-*.pem -c /mnt/cert-*.pem -u $AWS_ACCOUNT_ID -s 1536 -p
hadoop-$HADOOP_VERSION
-rm /mnt/pk-*.pem /mnt/cert-*.pem
-ec2-upload-bundle -b $S3_BUCKET -m /mnt/hadoop-$HADOOP_VERSION.manifest.xml -a $AWS_ACCESS_KEY_ID
-s $AWS_SECRET_ACCESS_KEY
+df -h
+ec2-bundle-vol -d /mnt -k /mnt/pk*.pem -c /mnt/cert*.pem -u $AWS_ACCOUNT_ID -s 3072 -p hadoop-$HADOOP_VERSION-$ARCH
-r $ARCH
+
+ec2-upload-bundle -b $S3_BUCKET -m /mnt/hadoop-$HADOOP_VERSION-$ARCH.manifest.xml -a $AWS_ACCESS_KEY_ID
-s $AWS_SECRET_ACCESS_KEY
 
 # End
 echo Done

Modified: hadoop/core/trunk/src/contrib/ec2/bin/image/hadoop-init
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/ec2/bin/image/hadoop-init?rev=644712&r1=644711&r2=644712&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/ec2/bin/image/hadoop-init (original)
+++ hadoop/core/trunk/src/contrib/ec2/bin/image/hadoop-init Fri Apr  4 06:46:51 2008
@@ -1,13 +1,30 @@
 # Use parameters passed in during launch to configure Hadoop
-USER_DATA=`wget -q -O - http://169.254.169.254/1.0/user-data`
-NO_INSTANCES=`python -c "print '$USER_DATA'.split(',')[0]"`
-MASTER_HOST=`python -c "print '$USER_DATA'.split(',')[1]"`
+# expects:
+# MASTER_HOST, MAX_MAP_TASKS, MAX_REDUCE_TASKS, COMPRESS, DFS_WRITE_RETRIES
+
+# set defaults
+MAX_TASKS=3
+[ "$INSTANCE_TYPE" == "m1.large" ] && MAX_TASKS=6
+[ "$INSTANCE_TYPE" == "m1.xlarge" ] && MAX_TASKS=12
+
+MAX_MAP_TASKS=$MAX_TASKS
+MAX_REDUCE_TASKS=$MAX_TASKS
+COMPRESS="true"
+DFS_WRITE_RETRIES=3
+
+wget -q -O - http://169.254.169.254/latest/user-data | tr ',' '\n' > /tmp/user-data
+source /tmp/user-data
+
 HADOOP_HOME=`ls -d /usr/local/hadoop-*`
-echo $NO_INSTANCES, $MASTER_HOST, $HADOOP_HOME
- 
-sed -i -e "s|# export HADOOP_MASTER=.*|export HADOOP_MASTER=$MASTER_HOST:$HADOOP_HOME|" \
-    $HADOOP_HOME/conf/hadoop-env.sh
-      
+
+IS_MASTER="false"
+if [ "$MASTER_HOST" == "master" ]; then
+ IS_MASTER="true"
+ MASTER_HOST=`wget -q -O - http://169.254.169.254/latest/meta-data/local-hostname`
+fi
+
+echo $IS_MASTER $MASTER_HOST $MAX_MAP_TASKS $MAX_REDUCE_TASKS $COMPRESS
+
 cat > $HADOOP_HOME/conf/hadoop-site.xml <<EOF
 <?xml version="1.0"?>
 <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
@@ -30,14 +47,95 @@
 </property>
 
 <property>
-  <name>mapred.map.tasks</name>
-  <value>$(( NO_INSTANCES * 10 ))</value>
+  <name>tasktracker.http.threads</name>
+  <value>80</value>
+</property>
+
+<property>
+  <name>mapred.tasktracker.map.tasks.maximum</name>
+  <value>$MAX_MAP_TASKS</value>
+</property>
+
+<property>
+  <name>mapred.tasktracker.reduce.tasks.maximum</name>
+  <value>$MAX_REDUCE_TASKS</value>
 </property>
 
 <property>
-  <name>mapred.reduce.tasks</name>
-  <value>$(( NO_INSTANCES * 3 ))</value>
+  <name>mapred.output.compress</name>
+  <value>$COMPRESS</value>
+</property>
+
+<property>
+  <name>mapred.output.compression.type</name>
+  <value>BLOCK</value>
+</property>
+
+<property>
+  <name>dfs.client.block.write.retries</name>
+  <value>$DFS_WRITE_RETRIES</value>
 </property>
 
 </configuration>
 EOF
+
+# Configure Hadoop for Ganglia
+# overwrite hadoop-metrics.properties
+cat > $HADOOP_HOME/conf/hadoop-metrics.properties <<EOF
+
+# Ganglia
+# we push to the master gmond so hostnames show up properly
+dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext
+dfs.period=10
+dfs.servers=$MASTER_HOST:8649
+
+mapred.class=org.apache.hadoop.metrics.ganglia.GangliaContext
+mapred.period=10
+mapred.servers=$MASTER_HOST:8649
+
+jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext
+jvm.period=10
+jvm.servers=$MASTER_HOST:8649
+EOF
+
+[ ! -f /etc/hosts ] &&  echo "127.0.0.1 localhost" > /etc/hosts
+
+mkdir -p /mnt/hadoop/logs
+
+# not set on boot
+export USER="root"
+
+if [ "$IS_MASTER" == "true" ]; then
+  # MASTER
+  # Prep Ganglia
+  sed -i -e "s|\( *mcast_join *=.*\)|#\1|" \
+         -e "s|\( *bind *=.*\)|#\1|" \
+         -e "s|\( *mute *=.*\)|  mute = yes|" \
+         -e "s|\( *location *=.*\)|  location = \"master-node\"|" \
+         /etc/gmond.conf
+  mkdir -p /mnt/ganglia/rrds
+  chown -R ganglia:ganglia /mnt/ganglia/rrds
+  rm -rf /var/lib/ganglia; cd /var/lib; ln -s /mnt/ganglia ganglia; cd
+  service gmond start
+  service gmetad start
+  apachectl start
+
+  # Hadoop
+  # only format on first boot
+  [ ! -e /mnt/hadoop/dfs ] && "$HADOOP_HOME"/bin/hadoop namenode -format
+
+  "$HADOOP_HOME"/bin/hadoop-daemon.sh start namenode
+  "$HADOOP_HOME"/bin/hadoop-daemon.sh start jobtracker
+else
+  # SLAVE
+  # Prep Ganglia
+  sed -i -e "s|\( *mcast_join *=.*\)|#\1|" \
+         -e "s|\( *bind *=.*\)|#\1|" \
+         -e "s|\(udp_send_channel {\)|\1\n  host=$MASTER_HOST|" \
+         /etc/gmond.conf
+  service gmond start
+
+  # Hadoop
+  "$HADOOP_HOME"/bin/hadoop-daemon.sh start datanode
+  "$HADOOP_HOME"/bin/hadoop-daemon.sh start tasktracker
+fi

Modified: hadoop/core/trunk/src/contrib/ec2/bin/launch-hadoop-cluster
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/ec2/bin/launch-hadoop-cluster?rev=644712&r1=644711&r2=644712&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/ec2/bin/launch-hadoop-cluster (original)
+++ hadoop/core/trunk/src/contrib/ec2/bin/launch-hadoop-cluster Fri Apr  4 06:46:51 2008
@@ -15,44 +15,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
 # Launch an EC2 cluster of Hadoop instances.
 
 # Import variables
 bin=`dirname "$0"`
 bin=`cd "$bin"; pwd`
-. "$bin"/hadoop-ec2-env.sh
 
-ec2-describe-group | grep $GROUP > /dev/null
-if [ ! $? -eq 0 ]; then
-  echo "Creating group $GROUP"
-  ec2-add-group $GROUP -d "Group for Hadoop clusters."
-  ec2-authorize $GROUP -p 22    # ssh
-  ec2-authorize $GROUP -p 50030 # JobTracker web interface
-  ec2-authorize $GROUP -p 50060 # TaskTracker web interface
-  ec2-authorize $GROUP -o $GROUP -u $AWS_ACCOUNT_ID 
+if [ -z $1 ]; then
+  echo "Cluster name required!"
+  exit -1
 fi
 
-# Finding Hadoop image
-AMI_IMAGE=`ec2-describe-images -a | grep $S3_BUCKET | grep $HADOOP_VERSION | grep available
| awk '{print $2}'`
+if [ -z $2 ]; then
+  echo "Must specify the number of slaves to start."
+fi
 
-# Start a cluster
-echo "Starting cluster with AMI $AMI_IMAGE"
-RUN_INSTANCES_OUTPUT=`ec2-run-instances $AMI_IMAGE -n $NO_INSTANCES -g $GROUP -k $KEY_NAME
-d "$NO_INSTANCES,$MASTER_HOST" | grep INSTANCE | awk '{print $2}'`
-for instance in $RUN_INSTANCES_OUTPUT; do
-  echo "Waiting for instance $instance to start"
-  while true; do
-    printf "."
-    HOSTNAME=`ec2-describe-instances $instance | grep running | awk '{print $4}'`
-    if [ ! -z $HOSTNAME ]; then
-      echo "started as $HOSTNAME"
-      break;
-    fi
-    sleep 1
-  done
-done
+if ! "$bin"/launch-hadoop-master $1 ; then
+  exit $?
+fi
 
-echo "Appointing master"
-MASTER_EC2_HOST=`ec2-describe-instances | grep INSTANCE | grep running | awk '{if ($8 ==
0 || $7 == 0) print $4}'`
-MASTER_IP=`dig +short $MASTER_EC2_HOST`
-echo "Master is $MASTER_EC2_HOST. Please set up DNS so $MASTER_HOST points to $MASTER_IP."
+if ! "$bin"/launch-hadoop-slaves $*; then
+  exit $?
+fi

Added: hadoop/core/trunk/src/contrib/ec2/bin/launch-hadoop-master
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/ec2/bin/launch-hadoop-master?rev=644712&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/ec2/bin/launch-hadoop-master (added)
+++ hadoop/core/trunk/src/contrib/ec2/bin/launch-hadoop-master Fri Apr  4 06:46:51 2008
@@ -0,0 +1,111 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Launch an EC2 Hadoop master.
+
+if [ -z $1 ]; then
+  echo "Cluster name required!"
+  exit -1
+fi
+
+CLUSTER=$1
+
+# Import variables
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+. "$bin"/hadoop-ec2-env.sh
+
+echo "Testing for existing master in group: $CLUSTER"
+MASTER_EC2_HOST=`ec2-describe-instances | awk '"RESERVATION" == $1 && "'$CLUSTER_MASTER'"
== $4, "RESERVATION" == $1 && "'$CLUSTER_MASTER'" != $4'`
+MASTER_EC2_HOST=`echo "$MASTER_EC2_HOST" | awk '"INSTANCE" == $1 && "running" ==
$6 {print $4}'`
+
+if [ ! -z "$MASTER_EC2_HOST" ]; then
+  echo "Master already running on: $MASTER_EC2_HOST"
+  MASTER_HOST=`ec2-describe-instances $INSTANCE | grep INSTANCE | grep running | grep $MASTER_EC2_HOST
| awk '{print $5}'`
+  echo $MASTER_HOST > $MASTER_PRIVATE_IP_PATH
+  echo $MASTER_EC2_HOST > $MASTER_IP_PATH
+  exit 0
+fi
+
+ec2-describe-group | egrep "[[:space:]]$CLUSTER_MASTER[[:space:]]" > /dev/null
+if [ ! $? -eq 0 ]; then
+  echo "Creating group $CLUSTER_MASTER"
+  ec2-add-group $CLUSTER_MASTER -d "Group for Hadoop Master."
+  ec2-authorize $CLUSTER_MASTER -o $CLUSTER_MASTER -u $AWS_ACCOUNT_ID
+  ec2-authorize $CLUSTER_MASTER -p 22    # ssh
+
+  if [ $ENABLE_WEB_PORTS == "true" ]; then
+    ec2-authorize $CLUSTER_MASTER -p 50030 # JobTracker web interface
+    ec2-authorize $CLUSTER_MASTER -p 50060 # TaskTracker web interface
+  fi
+fi
+
+ec2-describe-group | egrep "[[:space:]]$CLUSTER[[:space:]]" > /dev/null
+if [ ! $? -eq 0 ]; then
+  echo "Creating group $CLUSTER"
+  ec2-add-group $CLUSTER -d "Group for Hadoop Slaves."
+  ec2-authorize $CLUSTER -o $CLUSTER -u $AWS_ACCOUNT_ID
+  ec2-authorize $CLUSTER -p 22    # ssh
+
+  if [ $ENABLE_WEB_PORTS == "true" ]; then
+    ec2-authorize $CLUSTER -p 50030 # JobTracker web interface
+    ec2-authorize $CLUSTER -p 50060 # TaskTracker web interface
+  fi
+
+  ec2-authorize $CLUSTER_MASTER -o $CLUSTER -u $AWS_ACCOUNT_ID
+  ec2-authorize $CLUSTER -o $CLUSTER_MASTER -u $AWS_ACCOUNT_ID
+fi
+
+# Finding Hadoop image
+AMI_IMAGE=`ec2-describe-images -a | grep $S3_BUCKET | grep $HADOOP_VERSION | grep $ARCH |
grep available | awk '{print $2}'`
+
+# Start a master
+echo "Starting master with AMI $AMI_IMAGE"
+USER_DATA="MASTER_HOST=master,MAX_MAP_TASKS=$MAX_MAP_TASKS,MAX_REDUCE_TASKS=$MAX_REDUCE_TASKS,COMPRESS=$COMPRESS"
+INSTANCE=`ec2-run-instances $AMI_IMAGE -n 1 -g $CLUSTER_MASTER -k $KEY_NAME -d "$USER_DATA"
-t $INSTANCE_TYPE | grep INSTANCE | awk '{print $2}'`
+echo "Waiting for instance $INSTANCE to start"
+while true; do
+  printf "."
+  # get private dns
+  MASTER_HOST=`ec2-describe-instances $INSTANCE | grep running | awk '{print $5}'`
+  if [ ! -z $MASTER_HOST ]; then
+    echo "Started as $MASTER_HOST"
+    break;
+  fi
+  sleep 1
+done
+
+MASTER_EC2_HOST=`ec2-describe-instances $INSTANCE | grep INSTANCE | grep running | grep $MASTER_HOST
| awk '{print $4}'`
+echo $MASTER_HOST > $MASTER_PRIVATE_IP_PATH
+echo $MASTER_EC2_HOST > $MASTER_IP_PATH
+MASTER_EC2_ZONE=`ec2-describe-instances $INSTANCE | grep INSTANCE | grep running | grep $MASTER_HOST
| awk '{print $11}'`
+echo $MASTER_EC2_ZONE > $MASTER_ZONE_PATH
+
+while true; do
+  REPLY=`ssh $SSH_OPTS "root@$MASTER_EC2_HOST" 'echo "hello"'`
+  if [ ! -z $REPLY ]; then
+   break;
+  fi
+  sleep 5
+done
+
+echo "Copying private key to master"
+scp $SSH_OPTS $PRIVATE_KEY_PATH "root@$MASTER_EC2_HOST:/root/.ssh/id_rsa"
+ssh $SSH_OPTS "root@$MASTER_EC2_HOST" "chmod 600 /root/.ssh/id_rsa"
+
+MASTER_IP=`dig +short $MASTER_EC2_HOST`
+echo "Master is $MASTER_EC2_HOST, ip is $MASTER_IP, zone is $MASTER_EC2_ZONE."
\ No newline at end of file

Added: hadoop/core/trunk/src/contrib/ec2/bin/launch-hadoop-slaves
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/ec2/bin/launch-hadoop-slaves?rev=644712&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/ec2/bin/launch-hadoop-slaves (added)
+++ hadoop/core/trunk/src/contrib/ec2/bin/launch-hadoop-slaves Fri Apr  4 06:46:51 2008
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Launch an EC2 Hadoop slaves.
+
+if [ -z $1 ]; then
+  echo "Cluster name required!"
+  exit -1
+fi
+
+if [ -z $2 ]; then
+  echo "Must specify the number of slaves to start."
+fi
+
+CLUSTER=$1
+NO_INSTANCES=$2
+
+# Import variables
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+. "$bin"/hadoop-ec2-env.sh
+
+if [ ! -f $MASTER_IP_PATH ]; then
+  echo "Must start Cluster Master first!"
+  exit -1
+fi
+
+# Finding Hadoop image
+AMI_IMAGE=`ec2-describe-images -a | grep $S3_BUCKET | grep $HADOOP_VERSION | grep $ARCH |grep
available | awk '{print $2}'`
+MASTER_HOST=`cat $MASTER_PRIVATE_IP_PATH`
+MASTER_ZONE=`cat $MASTER_ZONE_PATH`
+
+# Start slaves
+echo "Adding $1 node(s) to cluster group $CLUSTER with AMI $AMI_IMAGE"
+USER_DATA="MASTER_HOST=$MASTER_HOST,MAX_MAP_TASKS=$MAX_MAP_TASKS,MAX_REDUCE_TASKS=$MAX_REDUCE_TASKS,COMPRESS=$COMPRESS"
+ec2-run-instances $AMI_IMAGE -n "$NO_INSTANCES" -g "$CLUSTER" -k "$KEY_NAME" -d "$USER_DATA"
-t "$INSTANCE_TYPE" -z "$MASTER_ZONE" | grep INSTANCE | awk '{print $2}'
+
+

Added: hadoop/core/trunk/src/contrib/ec2/bin/list-hadoop-clusters
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/ec2/bin/list-hadoop-clusters?rev=644712&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/ec2/bin/list-hadoop-clusters (added)
+++ hadoop/core/trunk/src/contrib/ec2/bin/list-hadoop-clusters Fri Apr  4 06:46:51 2008
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Terminate a cluster.
+
+# Import variables
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+. "$bin"/hadoop-ec2-env.sh
+
+# Finding Hadoop clusters
+CLUSTERS=`ec2-describe-instances | awk '"RESERVATION" == $1 {print $4}' | grep -v -e "-master$"
| sort | uniq`
+
+[ -z "$CLUSTERS" ] && echo "No running clusters." && exit 0
+
+echo "Running Hadoop clusters:"
+echo "$CLUSTERS"

Modified: hadoop/core/trunk/src/contrib/ec2/bin/login-hadoop-cluster
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/ec2/bin/login-hadoop-cluster?rev=644712&r1=644711&r2=644712&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/ec2/bin/login-hadoop-cluster (original)
+++ hadoop/core/trunk/src/contrib/ec2/bin/login-hadoop-cluster Fri Apr  4 06:46:51 2008
@@ -15,7 +15,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
 # Login to the master node of a running Hadoop EC2 cluster.
 
 # Import variables

Modified: hadoop/core/trunk/src/contrib/ec2/bin/terminate-hadoop-cluster
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/ec2/bin/terminate-hadoop-cluster?rev=644712&r1=644711&r2=644712&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/ec2/bin/terminate-hadoop-cluster (original)
+++ hadoop/core/trunk/src/contrib/ec2/bin/terminate-hadoop-cluster Fri Apr  4 06:46:51 2008
@@ -15,17 +15,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
 # Terminate a cluster.
 
+if [ -z $1 ]; then
+  echo "Cluster name required!"
+  exit -1
+fi
+
+CLUSTER=$1
+
 # Import variables
 bin=`dirname "$0"`
 bin=`cd "$bin"; pwd`
 . "$bin"/hadoop-ec2-env.sh
 
 # Finding Hadoop image
-AMI_IMAGE=`ec2-describe-images -a | grep $S3_BUCKET | grep $HADOOP_VERSION | grep available
| awk '{print $2}'`
-HADOOP_INSTANCES=`ec2-describe-instances | grep INSTANCE | grep $AMI_IMAGE`
+HADOOP_INSTANCES=`ec2-describe-instances | awk '"RESERVATION" == $1 && ("'$CLUSTER'"
== $4 || "'$CLUSTER_MASTER'" == $4), "RESERVATION" == $1 && ("'$CLUSTER'" != $4 &&
"'$CLUSTER_MASTER'" != $4)'`
+HADOOP_INSTANCES=`echo "$HADOOP_INSTANCES" | grep INSTANCE | grep running`
+
+[ -z "$HADOOP_INSTANCES" ] && echo "No running instances in cluster $CLUSTER." &&
exit 0
 
 echo "Running Hadoop instances:"
 echo "$HADOOP_INSTANCES"



Mime
View raw message