hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cdoug...@apache.org
Subject svn commit: r636592 - in /hadoop/core/trunk: ./ src/test/gridmix/ src/test/gridmix/submissionScripts/
Date Thu, 13 Mar 2008 00:57:39 GMT
Author: cdouglas
Date: Wed Mar 12 17:57:32 2008
New Revision: 636592

URL: http://svn.apache.org/viewvc?rev=636592&view=rev
Log:
HADOOP-2888. Make gridmix scripts more readily configurable and amenable
to automated execution. Contributed by Mukund Madhugiri



Modified:
    hadoop/core/trunk/CHANGES.txt
    hadoop/core/trunk/src/test/gridmix/generateData.sh
    hadoop/core/trunk/src/test/gridmix/gridmix-env
    hadoop/core/trunk/src/test/gridmix/submissionScripts/allToSameCluster
    hadoop/core/trunk/src/test/gridmix/submissionScripts/maxentToSameCluster
    hadoop/core/trunk/src/test/gridmix/submissionScripts/monsterQueriesToSameCluster
    hadoop/core/trunk/src/test/gridmix/submissionScripts/textSortToSameCluster
    hadoop/core/trunk/src/test/gridmix/submissionScripts/webdataScanToSameCluster
    hadoop/core/trunk/src/test/gridmix/submissionScripts/webdataSortToSameCluster

Modified: hadoop/core/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=636592&r1=636591&r2=636592&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Wed Mar 12 17:57:32 2008
@@ -79,6 +79,9 @@
 
     HADOOP-2765. Enables specifying ulimits for streaming/pipes tasks (ddas)
 
+    HADOOP-2888. Make gridmix scripts more readily configurable and amenable
+    to automated execution. (Mukund Madhugiri via cdouglas)
+
   OPTIMIZATIONS
 
     HADOOP-2790.  Fixed inefficient method hasSpeculativeTask by removing

Modified: hadoop/core/trunk/src/test/gridmix/generateData.sh
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/gridmix/generateData.sh?rev=636592&r1=636591&r2=636592&view=diff
==============================================================================
--- hadoop/core/trunk/src/test/gridmix/generateData.sh (original)
+++ hadoop/core/trunk/src/test/gridmix/generateData.sh Wed Mar 12 17:57:32 2008
@@ -4,18 +4,27 @@
 GRID_DIR=`cd "$GRID_DIR"; pwd`
 source $GRID_DIR/gridmix-env
 
-# 2TB data compressing to approx 500GB
-#COMPRESSED_DATA_BYTES=2147483648000
+# Smaller data set is used by default.
 COMPRESSED_DATA_BYTES=2147483648
-# 500GB
-#UNCOMPRESSED_DATA_BYTES=536870912000
 UNCOMPRESSED_DATA_BYTES=536870912
-# Number of partitions for output data
-NUM_MAPS=100
-# Default approx 70MB per data file, compressed
-#INDIRECT_DATA_BYTES=58720256000
 INDIRECT_DATA_BYTES=58720256
+
+# Number of partitions for output data
+if [ -z ${NUM_MAPS} ] ; then
+  NUM_MAPS=100
+fi
 INDIRECT_DATA_FILES=200
+
+# If the env var USE_REAL_DATASET is set, then use the params to generate the bigger (real)
dataset.
+if [ ! -z ${USE_REAL_DATASET} ] ; then
+  echo "Using real dataset"
+  # 2TB data compressing to approx 500GB
+  COMPRESSED_DATA_BYTES=2147483648000
+  # 500GB
+  UNCOMPRESSED_DATA_BYTES=536870912000
+  # Default approx 70MB per data file, compressed
+  INDIRECT_DATA_BYTES=58720256000 
+fi
 
 ${HADOOP_HOME}/bin/hadoop jar \
   ${EXAMPLE_JAR} randomtextwriter \

Modified: hadoop/core/trunk/src/test/gridmix/gridmix-env
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/gridmix/gridmix-env?rev=636592&r1=636591&r2=636592&view=diff
==============================================================================
--- hadoop/core/trunk/src/test/gridmix/gridmix-env (original)
+++ hadoop/core/trunk/src/test/gridmix/gridmix-env Wed Mar 12 17:57:32 2008
@@ -3,21 +3,42 @@
 
 ## Environment configuration
 # Hadoop installation
-export HADOOP_HOME=
+# set var only if it has not already been set externally
+if [ -z "${HADOOP_HOME}" ] ; then
+  export HADOOP_HOME=
+fi
 # Base directory for gridmix install
-export GRID_MIX_HOME=${GRID_DIR}
+# set var only if it has not already been set externally
+if [ -z "${GRID_MIX_HOME}" ] ; then
+  export GRID_MIX_HOME=${GRID_DIR}
+fi
 # Hadoop example jar
-export EXAMPLE_JAR=${HADOOP_HOME}/hadoop-0.15.2-dev-examples.jar
+# set var only if it has not already been set externally
+if [ -z "${EXAMPLE_JAR}" ] ; then
+  export EXAMPLE_JAR="${HADOOP_HOME}/hadoop-*-examples.jar"
+fi
 # Hadoop test jar
-export APP_JAR=${HADOOP_HOME}/hadoop-0.15.2-dev-test.jar
+# set var only if it has not already been set externally
+if [ -z "${APP_JAR}" ] ; then
+  export APP_JAR="${HADOOP_HOME}/hadoop-*-test.jar"
+fi
 # Hadoop streaming jar
-export STREAM_JAR=${HADOOP_HOME}/contrib/hadoop-0.15.2-streaming.jar
+# set var only if it has not already been set externally
+if [ -z "${STREAM_JAR}" ] ; then
+  export STREAM_JAR="${HADOOP_HOME}/contrib/hadoop-*-streaming.jar"
+fi
 # Location on default filesystem for writing gridmix data (usually HDFS)
 # Default: /gridmix/data
-export GRID_MIX_DATA=/gridmix/data
+# set var only if it has not already been set externally
+if [ -z "${GRID_MIX_DATA}" ] ; then
+  export GRID_MIX_DATA=/gridmix/data
+fi
 # Location of executables in default filesystem (usually HDFS)
 # Default: /gridmix/programs
-export GRID_MIX_PROG=/gridmix/programs
+# set var only if it has not already been set externally
+if [ -z "${GRID_MIX_PROG}" ] ; then
+  export GRID_MIX_PROG=/gridmix/programs
+fi
 
 ## Data sources
 # Variable length key, value compressed SequenceFile

Modified: hadoop/core/trunk/src/test/gridmix/submissionScripts/allToSameCluster
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/gridmix/submissionScripts/allToSameCluster?rev=636592&r1=636591&r2=636592&view=diff
==============================================================================
--- hadoop/core/trunk/src/test/gridmix/submissionScripts/allToSameCluster (original)
+++ hadoop/core/trunk/src/test/gridmix/submissionScripts/allToSameCluster Wed Mar 12 17:57:32
2008
@@ -3,14 +3,25 @@
 GRID_DIR=`dirname "$0"`
 GRID_DIR=`cd "$GRID_DIR"; pwd`
 source $GRID_DIR/../gridmix-env
+PROCESSES=""
 
 $GRID_MIX_HOME/submissionScripts/maxentToSameCluster 2>&1 > maxentToSameCluster.out
&
+PROCESSES="${PROCESSES} $!"
 sleep 20
 $GRID_MIX_HOME/submissionScripts/textSortToSameCluster 2>&1 > textSortToSameCluster.out
 &
+PROCESSES="${PROCESSES} $!"
 sleep 20
 $GRID_MIX_HOME/submissionScripts/monsterQueriesToSameCluster 2>&1 > monsterQueriesToSameCluster.out
&
+PROCESSES="${PROCESSES} $!"
 sleep 20
 $GRID_MIX_HOME/submissionScripts/webdataScanToSameCluster 2>&1 > webdataScanToSameCluster.out
&
+PROCESSES="${PROCESSES} $!"
 sleep 20
 $GRID_MIX_HOME/submissionScripts/webdataSortToSameCluster  2>&1 > webdataSortToSameCluster.out
&
+PROCESSES="${PROCESSES} $!"
+
+echo "Waiting for processes: ${PROCESSES}"
+for APROC in ${PROCESSES}; do
+  wait ${APROC}
+done
 

Modified: hadoop/core/trunk/src/test/gridmix/submissionScripts/maxentToSameCluster
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/gridmix/submissionScripts/maxentToSameCluster?rev=636592&r1=636591&r2=636592&view=diff
==============================================================================
--- hadoop/core/trunk/src/test/gridmix/submissionScripts/maxentToSameCluster (original)
+++ hadoop/core/trunk/src/test/gridmix/submissionScripts/maxentToSameCluster Wed Mar 12 17:57:32
2008
@@ -3,10 +3,17 @@
 GRID_DIR=`dirname "$0"`
 GRID_DIR=`cd "$GRID_DIR"; pwd`
 source $GRID_DIR/../gridmix-env
+PROCESSES=""
 
 for ((i=0; i < $NUM_OF_LARGE_JOBS_PER_CLASS; i++))
 do
     echo $i
     $GRID_MIX_HOME/maxent/maxent.large  2>&1 > maxent.large.$i.out &
+    PROCESSES="${PROCESSES} $!"
     $GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
 done
+
+for APROC in ${PROCESSES}; do
+  wait ${APROC}
+done
+

Modified: hadoop/core/trunk/src/test/gridmix/submissionScripts/monsterQueriesToSameCluster
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/gridmix/submissionScripts/monsterQueriesToSameCluster?rev=636592&r1=636591&r2=636592&view=diff
==============================================================================
--- hadoop/core/trunk/src/test/gridmix/submissionScripts/monsterQueriesToSameCluster (original)
+++ hadoop/core/trunk/src/test/gridmix/submissionScripts/monsterQueriesToSameCluster Wed Mar
12 17:57:32 2008
@@ -3,11 +3,13 @@
 GRID_DIR=`dirname "$0"`
 GRID_DIR=`cd "$GRID_DIR"; pwd`
 source $GRID_DIR/../gridmix-env
+PROCESSES=""
 
 for ((i=0; i < $NUM_OF_SMALL_JOBS_PER_CLASS; i++))
 do
     echo $i
     $GRID_MIX_HOME/monsterQuery/monster_query.small  2>&1 > monster_query.medium.$i.out
&
+    PROCESSES="${PROCESSES} $!"
     $GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
 done
     
@@ -15,6 +17,7 @@
 do
     echo $i
     $GRID_MIX_HOME/monsterQuery/monster_query.medium  2>&1 > monster_query.medium.$i.out
&
+    PROCESSES="${PROCESSES} $!"
     $GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
 done
 
@@ -22,6 +25,10 @@
 do
     echo $i
     $GRID_MIX_HOME/monsterQuery/monster_query.large  2>&1 > monster_query.large.$i.out
&
+    PROCESSES="${PROCESSES} $!"
     $GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
 done
     
+for APROC in ${PROCESSES}; do
+  wait ${APROC}
+done

Modified: hadoop/core/trunk/src/test/gridmix/submissionScripts/textSortToSameCluster
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/gridmix/submissionScripts/textSortToSameCluster?rev=636592&r1=636591&r2=636592&view=diff
==============================================================================
--- hadoop/core/trunk/src/test/gridmix/submissionScripts/textSortToSameCluster (original)
+++ hadoop/core/trunk/src/test/gridmix/submissionScripts/textSortToSameCluster Wed Mar 12
17:57:32 2008
@@ -3,15 +3,19 @@
 GRID_DIR=`dirname "$0"`
 GRID_DIR=`cd "$GRID_DIR"; pwd`
 source $GRID_DIR/../gridmix-env
+PROCESSES=""
 
 for ((i=0; i < $NUM_OF_SMALL_JOBS_PER_CLASS; i++))
 do
     echo $i
     $GRID_MIX_HOME/pipesort/text-sort.small  2>&1 > pipesort.small.$i.out &
+    PROCESSES="${PROCESSES} $!"
     $GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
     $GRID_MIX_HOME/streamsort/text-sort.small  2>&1 > streamsort.small.$i.out &
+    PROCESSES="${PROCESSES} $!"
     $GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
     $GRID_MIX_HOME/javasort/text-sort.small  2>&1 > javasort.small.$i.out &

+    PROCESSES="${PROCESSES} $!"
     $GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
 done
     
@@ -19,10 +23,13 @@
 do
     echo $i
     $GRID_MIX_HOME/pipesort/text-sort.medium  2>&1 > pipesort.medium.$i.out &
+    PROCESSES="${PROCESSES} $!"
     $GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
     $GRID_MIX_HOME/streamsort/text-sort.medium  2>&1 > streamsort.medium.$i.out
&
+    PROCESSES="${PROCESSES} $!"
     $GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
     $GRID_MIX_HOME/javasort/text-sort.medium  2>&1 > javasort.medium.$i.out &

+    PROCESSES="${PROCESSES} $!"
     $GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
 done
 
@@ -30,10 +37,17 @@
 do
     echo $i
     $GRID_MIX_HOME/pipesort/text-sort.large  2>&1 > pipesort.large.$i.out &
+    PROCESSES="${PROCESSES} $!"
     $GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
     $GRID_MIX_HOME/streamsort/text-sort.large  2>&1 > pipesort.large.$i.out &
+    PROCESSES="${PROCESSES} $!"
     $GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
     $GRID_MIX_HOME/javasort/text-sort.large  2>&1 > pipesort.large.$i.out &
+    PROCESSES="${PROCESSES} $!"
     $GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
+done
+
+for APROC in ${PROCESSES}; do
+  wait ${APROC}
 done
     

Modified: hadoop/core/trunk/src/test/gridmix/submissionScripts/webdataScanToSameCluster
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/gridmix/submissionScripts/webdataScanToSameCluster?rev=636592&r1=636591&r2=636592&view=diff
==============================================================================
--- hadoop/core/trunk/src/test/gridmix/submissionScripts/webdataScanToSameCluster (original)
+++ hadoop/core/trunk/src/test/gridmix/submissionScripts/webdataScanToSameCluster Wed Mar
12 17:57:32 2008
@@ -3,11 +3,13 @@
 GRID_DIR=`dirname "$0"`
 GRID_DIR=`cd "$GRID_DIR"; pwd`
 source $GRID_DIR/../gridmix-env
+PROCESSES=""
     
 for ((i=0; i < $NUM_OF_MEDIUM_JOBS_PER_CLASS; i++))
 do
     echo $i
     $GRID_MIX_HOME/webdatascan/webdata_scan.medium  2>&1 > webdata_scan.medium.$i.out
&
+    PROCESSES="${PROCESSES} $!"
     $GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
 done
     
@@ -15,6 +17,7 @@
 do
     echo $i
     $GRID_MIX_HOME/webdatascan/webdata_scan.small  2>&1 > webdata_scan.small.$i.out
&
+    PROCESSES="${PROCESSES} $!"
     $GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
 done
 
@@ -22,7 +25,10 @@
 do
     echo $i
     $GRID_MIX_HOME/webdatascan/webdata_scan.large  2>&1 > webdata_scan.large.$i.out
&
+    PROCESSES="${PROCESSES} $!"
     $GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
 done
     
-
+for APROC in ${PROCESSES}; do
+  wait ${APROC}
+done

Modified: hadoop/core/trunk/src/test/gridmix/submissionScripts/webdataSortToSameCluster
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/gridmix/submissionScripts/webdataSortToSameCluster?rev=636592&r1=636591&r2=636592&view=diff
==============================================================================
--- hadoop/core/trunk/src/test/gridmix/submissionScripts/webdataSortToSameCluster (original)
+++ hadoop/core/trunk/src/test/gridmix/submissionScripts/webdataSortToSameCluster Wed Mar
12 17:57:32 2008
@@ -3,11 +3,16 @@
 GRID_DIR=`dirname "$0"`
 GRID_DIR=`cd "$GRID_DIR"; pwd`
 source $GRID_DIR/../gridmix-env
+PROCESSES=""
 
 for ((i=0; i < $NUM_OF_LARGE_JOBS_PER_CLASS; i++))
 do
     echo $i
     $GRID_MIX_HOME/webdatasort/webdata_sort.large  2>&1 > webdata_sort.large.$i.out
&
+    PROCESSES="${PROCESSES} $!"
     $GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
 done
     
+for APROC in ${PROCESSES}; do
+  wait ${APROC}
+done



Mime
View raw message