Return-Path:
X-Original-To: apmail-hadoop-mapreduce-commits-archive@minotaur.apache.org
Delivered-To: apmail-hadoop-mapreduce-commits-archive@minotaur.apache.org
Received: from mail.apache.org (hermes.apache.org [140.211.11.3])
by minotaur.apache.org (Postfix) with SMTP id EF9084AE3
for ;
Thu, 2 Jun 2011 13:51:14 +0000 (UTC)
Received: (qmail 55518 invoked by uid 500); 2 Jun 2011 13:51:14 -0000
Delivered-To: apmail-hadoop-mapreduce-commits-archive@hadoop.apache.org
Received: (qmail 55453 invoked by uid 500); 2 Jun 2011 13:51:14 -0000
Mailing-List: contact mapreduce-commits-help@hadoop.apache.org; run by ezmlm
Precedence: bulk
List-Help:
List-Unsubscribe:
List-Post:
List-Id:
Reply-To: mapreduce-dev@hadoop.apache.org
Delivered-To: mailing list mapreduce-commits@hadoop.apache.org
Received: (qmail 55445 invoked by uid 99); 2 Jun 2011 13:51:14 -0000
Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230)
by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 02 Jun 2011 13:51:14 +0000
X-ASF-Spam-Status: No, hits=-2000.0 required=5.0
tests=ALL_TRUSTED
X-Spam-Check-By: apache.org
Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4)
by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 02 Jun 2011 13:51:11 +0000
Received: by eris.apache.org (Postfix, from userid 65534)
id ED8D3238897D; Thu, 2 Jun 2011 13:50:49 +0000 (UTC)
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Subject: svn commit: r1130550 - in /hadoop/mapreduce/trunk: CHANGES.txt
src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java
src/docs/src/documentation/content/xdocs/gridmix.xml
Date: Thu, 02 Jun 2011 13:50:49 -0000
To: mapreduce-commits@hadoop.apache.org
From: amarrk@apache.org
X-Mailer: svnmailer-1.0.8
Message-Id: <20110602135049.ED8D3238897D@eris.apache.org>
X-Virus-Checked: Checked by ClamAV on apache.org
Author: amarrk
Date: Thu Jun 2 13:50:49 2011
New Revision: 1130550
URL: http://svn.apache.org/viewvc?rev=1130550&view=rev
Log:
MAPREDUCE-2543. [Gridmix] High-Ram feature emulation in Gridmix. (amarrk)
Modified:
hadoop/mapreduce/trunk/CHANGES.txt
hadoop/mapreduce/trunk/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java
hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/gridmix.xml
Modified: hadoop/mapreduce/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/CHANGES.txt?rev=1130550&r1=1130549&r2=1130550&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/CHANGES.txt (original)
+++ hadoop/mapreduce/trunk/CHANGES.txt Thu Jun 2 13:50:49 2011
@@ -9,6 +9,8 @@ Trunk (unreleased changes)
NEW FEATURES
+ MAPREDUCE-2543. [Gridmix] High-Ram feature emulation in Gridmix. (amarrk)
+
MAPREDUCE-2408. [Gridmix] Compression emulation in Gridmix. (amarrk)
MAPREDUCE-2473. Add "mapred groups" command to query the server-side groups
Modified: hadoop/mapreduce/trunk/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java?rev=1130550&r1=1130549&r2=1130550&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java (original)
+++ hadoop/mapreduce/trunk/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java Thu Jun 2 13:50:49 2011
@@ -37,10 +37,12 @@ import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.MRConfig;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.tools.rumen.JobStory;
@@ -81,6 +83,9 @@ abstract class GridmixJob implements Cal
"gridmix.job-submission.use-queue-in-trace";
protected static final String GRIDMIX_DEFAULT_QUEUE =
"gridmix.job-submission.default-queue";
+ // configuration key to enable/disable High-Ram feature emulation
+ static final String GRIDMIX_HIGHRAM_EMULATION_ENABLE =
+ "gridmix.highram-emulation.enable";
private static void setJobQueue(Job job, String queue) {
if (queue != null) {
@@ -126,6 +131,12 @@ abstract class GridmixJob implements Cal
}
}
+ // configure high ram properties if enabled
+ if (conf.getBoolean(GRIDMIX_HIGHRAM_EMULATION_ENABLE, true)) {
+ configureHighRamProperties(jobdesc.getJobConf(),
+ ret.getConfiguration());
+ }
+
return ret;
}
});
@@ -138,6 +149,108 @@ abstract class GridmixJob implements Cal
outdir = new Path(outRoot, "" + seq);
}
+ // Scales the desired job-level configuration parameter. This API makes sure
+ // that the ratio of the job level configuration parameter to the cluster
+ // level configuration parameter is maintained in the simulated run. Hence
+ // the values are scaled from the original cluster's configuration to the
+ // simulated cluster's configuration for higher emulation accuracy.
+ // This kind of scaling is useful for memory parameters.
+ private static void scaleConfigParameter(Configuration sourceConf,
+ Configuration destConf, String clusterValueKey,
+ String jobValueKey, long defaultValue) {
+ long simulatedClusterDefaultValue =
+ destConf.getLong(clusterValueKey, defaultValue);
+
+ long originalClusterDefaultValue =
+ sourceConf.getLong(clusterValueKey, defaultValue);
+
+ long originalJobValue =
+ sourceConf.getLong(jobValueKey, defaultValue);
+
+ double scaleFactor = (double)originalJobValue/originalClusterDefaultValue;
+
+ long simulatedJobValue = (long)(scaleFactor * simulatedClusterDefaultValue);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("For the job configuration parameter '" + jobValueKey
+ + "' and the cluster configuration parameter '"
+ + clusterValueKey + "', the original job's configuration value"
+ + " is scaled from '" + originalJobValue + "' to '"
+ + simulatedJobValue + "' using the default (unit) value of "
+ + "'" + originalClusterDefaultValue + "' for the original "
+ + " cluster and '" + simulatedClusterDefaultValue + "' for the"
+ + " simulated cluster.");
+ }
+
+ destConf.setLong(jobValueKey, simulatedJobValue);
+ }
+
+ // Checks if the scaling of original job's memory parameter value is
+ // valid
+ @SuppressWarnings("deprecation")
+ private static boolean checkMemoryUpperLimits(String jobKey, String limitKey,
+ Configuration conf,
+ boolean convertLimitToMB) {
+ if (conf.get(limitKey) != null) {
+ long limit = conf.getLong(limitKey, JobConf.DISABLED_MEMORY_LIMIT);
+ // scale only if the max memory limit is set.
+ if (limit >= 0) {
+ if (convertLimitToMB) {
+ limit /= (1024 * 1024); //Converting to MB
+ }
+
+ long scaledConfigValue =
+ conf.getLong(jobKey, JobConf.DISABLED_MEMORY_LIMIT);
+
+ // check now
+ if (scaledConfigValue > limit) {
+ throw new RuntimeException("Simulated job's configuration"
+ + " parameter '" + jobKey + "' got scaled to a value '"
+ + scaledConfigValue + "' which exceeds the upper limit of '"
+ + limit + "' defined for the simulated cluster by the key '"
+ + limitKey + "'. To disable High-Ram feature emulation, set '"
+ + GRIDMIX_HIGHRAM_EMULATION_ENABLE + "' to 'false'.");
+ }
+ return true;
+ }
+ }
+ return false;
+ }
+
+ // Check if the parameter scaling does not exceed the cluster limits.
+ @SuppressWarnings("deprecation")
+ private static void validateTaskMemoryLimits(Configuration conf,
+ String jobKey, String clusterMaxKey) {
+ if (!checkMemoryUpperLimits(jobKey,
+ JobConf.UPPER_LIMIT_ON_TASK_VMEM_PROPERTY, conf, true)) {
+ checkMemoryUpperLimits(jobKey, clusterMaxKey, conf, false);
+ }
+ }
+
+ /**
+ * Sets the high ram job properties in the simulated job's configuration.
+ */
+ @SuppressWarnings("deprecation")
+ static void configureHighRamProperties(Configuration sourceConf,
+ Configuration destConf) {
+ // set the memory per map task
+ scaleConfigParameter(sourceConf, destConf,
+ MRConfig.MAPMEMORY_MB, MRJobConfig.MAP_MEMORY_MB,
+ JobConf.DISABLED_MEMORY_LIMIT);
+
+ // validate and fail early
+ validateTaskMemoryLimits(destConf, MRJobConfig.MAP_MEMORY_MB,
+ JTConfig.JT_MAX_MAPMEMORY_MB);
+
+ // set the memory per reduce task
+ scaleConfigParameter(sourceConf, destConf,
+ MRConfig.REDUCEMEMORY_MB, MRJobConfig.REDUCE_MEMORY_MB,
+ JobConf.DISABLED_MEMORY_LIMIT);
+ // validate and fail early
+ validateTaskMemoryLimits(destConf, MRJobConfig.REDUCE_MEMORY_MB,
+ JTConfig.JT_MAX_REDUCEMEMORY_MB);
+ }
+
/**
* Indicates whether this {@link GridmixJob} supports compression emulation.
*/
Modified: hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/gridmix.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/gridmix.xml?rev=1130550&r1=1130549&r2=1130550&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/gridmix.xml (original)
+++ hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/gridmix.xml Thu Jun 2 13:50:49 2011
@@ -639,6 +639,35 @@ hadoop jar <gridmix-jar> org.apach
+
+ Emulating High-Ram jobs
+ MapReduce allows users to define a job as a High-Ram job. Tasks from a
+ High-Ram job can occupy multiple slots on the task-trackers.
+ Task-tracker assigns fixed virtual memory for each slot. Tasks from
+ High-Ram jobs can occupy multiple slots and thus can use up more
+ virtual memory as compared to a default task.
+
+ Emulating this behavior is important because of the following reasons
+
+
+ - Impact on scheduler: Scheduling of tasks from High-Ram jobs
+ impacts the scheduling behavior as it might result into slot
+ reservation and slot/resource utilization.
+
+ - Impact on the node : Since High-Ram tasks occupy multiple slots,
+ trackers do some bookkeeping for allocating extra resources for
+ these tasks. Thus this becomes a precursor for memory emulation
+ where tasks with high memory requirements needs to be considered
+ as a High-Ram task.
+
+
+ High-Ram feature emulation can be disabled by setting
+ gridmix.highram-emulation.enable
to
+ false
. By default High-Ram feature emulation is enabled.
+ Note that this feature works only for jobs of type LOADJOB.
+
+
+
Simplifying Assumptions
GridMix will be developed in stages, incorporating feedback and