hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From omal...@apache.org
Subject svn commit: r724531 [2/2] - in /hadoop/core/trunk: ./ src/contrib/vaidya/ src/contrib/vaidya/src/ src/contrib/vaidya/src/java/ src/contrib/vaidya/src/java/org/ src/contrib/vaidya/src/java/org/apache/ src/contrib/vaidya/src/java/org/apache/hadoop/ src/c...
Date Mon, 08 Dec 2008 22:45:39 GMT
Added: hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/statistics/job/JobStatisticsInterface.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/statistics/job/JobStatisticsInterface.java?rev=724531&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/statistics/job/JobStatisticsInterface.java
(added)
+++ hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/statistics/job/JobStatisticsInterface.java
Mon Dec  8 14:45:38 2008
@@ -0,0 +1,124 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.vaidya.statistics.job;
+
+import java.util.ArrayList;
+
+import org.apache.hadoop.mapred.JobConf;
+
+public interface JobStatisticsInterface {
+  
+  /**
+   * Get job configuration (job.xml) values
+   */
+  public JobConf getJobConf();
+  
+  /*
+   * Get Job Counters of type long
+   */
+  public long getLongValue(Enum key);
+  
+  /*
+   * Get job Counters of type Double
+   */
+  public double getDoubleValue(Enum key);
+  
+  /* 
+   * Get Job Counters of type String
+   */
+  public String getStringValue(Enum key);
+  
+  /*
+   * Set key value of type long
+   */
+  public void setValue(Enum key, long value);
+  
+  /*
+   * Set key value of type double
+   */
+  public void setValue(Enum key, double valye);
+  
+  /*
+   * Set key value of type String
+   */
+  public void setValue(Enum key, String value);
+  
+  /**
+   * @return mapTaskList : ArrayList of MapTaskStatistics
+   * @param mapTaskSortKey : Specific counter key used for sorting the task list
+   * @param datatype : indicates the data type of the counter key used for sorting
+   * If sort key is null then by default map tasks are sorted using map task ids.
+   */
+  public ArrayList<MapTaskStatistics> getMapTaskList(Enum mapTaskSortKey, KeyDataType
dataType);
+  
+  /**
+   * @return reduceTaskList : ArrayList of ReduceTaskStatistics
+   * @param reduceTaskSortKey : Specific counter key used for sorting the task list
+   * @param dataType : indicates the data type of the counter key used for sorting
+   * If sort key is null then, by default reduce tasks are sorted using task ids.
+   */
+  public ArrayList<ReduceTaskStatistics> getReduceTaskList(Enum reduceTaskSortKey,
KeyDataType dataType);
+  
+  
+  /*
+   * Print the Job Execution Statistics
+   */
+  public void printJobExecutionStatistics();
+  
+  
+  /*
+   * Job and Task statistics Key data types
+   */
+  public static enum KeyDataType {
+    STRING, LONG, DOUBLE
+  }
+  
+  /**
+   * Job Keys
+   */
+  public static enum JobKeys {
+    JOBTRACKERID, JOBID, JOBNAME, USER, SUBMIT_TIME, CONF_PATH, LAUNCH_TIME, TOTAL_MAPS,
TOTAL_REDUCES,
+    STATUS, FINISH_TIME, FINISHED_MAPS, FINISHED_REDUCES, FAILED_MAPS, FAILED_REDUCES, 
+    LAUNCHED_MAPS, LAUNCHED_REDUCES, RACKLOCAL_MAPS, DATALOCAL_MAPS, HDFS_BYTES_READ,
+    HDFS_BYTES_WRITTEN, LOCAL_BYTES_READ, LOCAL_BYTES_WRITTEN, COMBINE_OUTPUT_RECORDS,
+    COMBINE_INPUT_RECORDS, REDUCE_INPUT_GROUPS, REDUCE_INPUT_RECORDS, REDUCE_OUTPUT_RECORDS,
+    MAP_INPUT_RECORDS, MAP_OUTPUT_RECORDS, MAP_INPUT_BYTES, MAP_OUTPUT_BYTES, MAP_HDFS_BYTES_WRITTEN,
+    JOBCONF
+   }
+  
+  /**
+   * Map Task Keys
+   */
+  public static enum MapTaskKeys {
+    TASK_ID, TASK_TYPE, START_TIME, STATUS, FINISH_TIME, HDFS_BYTES_READ, HDFS_BYTES_WRITTEN,
+    LOCAL_BYTES_READ, LOCAL_BYTES_WRITTEN, COMBINE_OUTPUT_RECORDS, COMBINE_INPUT_RECORDS,

+    OUTPUT_RECORDS, INPUT_RECORDS, INPUT_BYTES, OUTPUT_BYTES, NUM_ATTEMPTS, ATTEMPT_ID,
+    HOSTNAME, SPLITS
+  }
+  
+  /**
+   * Reduce Task Keys
+   */
+  public static enum ReduceTaskKeys {
+    
+    TASK_ID, TASK_TYPE, START_TIME, STATUS, FINISH_TIME, HDFS_BYTES_READ, HDFS_BYTES_WRITTEN,
+    LOCAL_BYTES_READ, LOCAL_BYTES_WRITTEN, COMBINE_OUTPUT_RECORDS, COMBINE_INPUT_RECORDS,

+    OUTPUT_RECORDS, INPUT_RECORDS, NUM_ATTEMPTS, ATTEMPT_ID, HOSTNAME, SHUFFLE_FINISH_TIME,
+    SORT_FINISH_TIME, INPUT_GROUPS
+  }
+}

Added: hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/statistics/job/MapTaskStatistics.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/statistics/job/MapTaskStatistics.java?rev=724531&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/statistics/job/MapTaskStatistics.java
(added)
+++ hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/statistics/job/MapTaskStatistics.java
Mon Dec  8 14:45:38 2008
@@ -0,0 +1,25 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.vaidya.statistics.job;
+
+/*
+ * Map task statistics extends TaskStatistics
+ */
+public class MapTaskStatistics extends TaskStatistics {
+  
+}

Added: hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/statistics/job/ReduceTaskStatistics.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/statistics/job/ReduceTaskStatistics.java?rev=724531&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/statistics/job/ReduceTaskStatistics.java
(added)
+++ hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/statistics/job/ReduceTaskStatistics.java
Mon Dec  8 14:45:38 2008
@@ -0,0 +1,25 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.vaidya.statistics.job;
+
+/*
+ * Reduce task statistics extends TaskStatistics
+ */
+public class ReduceTaskStatistics extends TaskStatistics {
+  
+}

Added: hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/statistics/job/TaskStatistics.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/statistics/job/TaskStatistics.java?rev=724531&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/statistics/job/TaskStatistics.java
(added)
+++ hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/statistics/job/TaskStatistics.java
Mon Dec  8 14:45:38 2008
@@ -0,0 +1,90 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.vaidya.statistics.job;
+
+import java.util.Hashtable;
+import java.util.Map;
+
+/**
+ *
+ */
+public class TaskStatistics {
+  
+  /*
+   * Stores task statistics as Enum/String key,value pairs.
+   */
+  private Hashtable<Enum, String>  _task = new Hashtable<Enum, String>();
+  
+  /*
+   * Get Long key value
+   */
+  public long getLongValue(Enum key) {
+    return Long.parseLong(this._task.get(key));
+  }
+  
+  /*
+   * Get double key value
+   */
+  public double getDoubleValue(Enum key) {
+    return Double.parseDouble(this._task.get(key));
+  }
+  
+  /*
+   * Get String key value
+   */
+  public String getStringValue(Enum key) {
+    return this._task.get(key);
+  }
+  
+  /*
+   * Set long key value 
+   */
+  public void setValue(Enum key, long value) {
+    this._task.put(key, Long.toString(value));
+  }
+  
+  /*
+   * Set double key value
+   */
+  public void setValue(Enum key, double value) {
+    this._task.put(key, Double.toString(value));
+  }
+  
+  /*
+   * Set String key value
+   */
+  public void setValue(Enum key, String value) {
+    this._task.put(key, value);
+  }
+  
+  /*
+   * Print the key/values pairs for a task 
+   */
+  public void  printKeys () {
+    java.util.Set<Map.Entry<Enum, String>> task = this._task.entrySet();
+    int size = task.size();
+    java.util.Iterator<Map.Entry<Enum, String>> kv = task.iterator();
+    for (int i = 0; i < size; i++)
+    {
+      Map.Entry<Enum, String> entry = (Map.Entry<Enum, String>) kv.next();
+      Enum key = entry.getKey();
+      String value = entry.getValue();
+      System.out.println("Key:<" + key.name() + ">, value:<"+ value +">"); 
+    }
+  }
+}

Added: hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/util/XMLUtils.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/util/XMLUtils.java?rev=724531&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/util/XMLUtils.java
(added)
+++ hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/util/XMLUtils.java
Mon Dec  8 14:45:38 2008
@@ -0,0 +1,237 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.vaidya.util;
+
+import java.io.IOException;
+import java.io.File;
+import java.io.InputStream;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.transform.TransformerConfigurationException;
+import javax.xml.transform.TransformerException;
+import javax.xml.transform.Source;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.Result;
+import javax.xml.transform.stream.StreamResult;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerFactory;
+
+import org.xml.sax.SAXParseException;
+import org.xml.sax.SAXException;
+
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.NamedNodeMap;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+
+
+/**
+ * Sample Utility class to work with DOM document
+ */
+public class XMLUtils {
+
+  /** Prints the specified node, then prints all of its children. */
+
+  public static void printDOM(Node node) {
+
+    int type = node.getNodeType();
+
+    switch (type) {
+
+      // print the document element
+      case Node.DOCUMENT_NODE: {
+        System.out.print("<?xml version=\"1.0\" ?>");
+        printDOM(((Document)node).getDocumentElement());
+        break;
+      }
+
+      // print element with attributes
+      case Node.ELEMENT_NODE: {
+      System.out.println();
+        System.out.print("<");
+        System.out.print(node.getNodeName());
+        NamedNodeMap attrs = node.getAttributes();
+        for (int i = 0; i < attrs.getLength(); i++) {
+          Node attr = attrs.item(i);
+          System.out.print(" " + attr.getNodeName().trim() +
+                           "=\"" + attr.getNodeValue().trim() +
+                           "\"");
+        }
+        System.out.print(">");
+        NodeList children = node.getChildNodes();
+
+        if (children != null) {
+          int len = children.getLength();
+          for (int i = 0; i < len; i++)
+            printDOM(children.item(i));
+        }
+        break;
+      }
+
+      // handle entity reference nodes
+
+      case Node.ENTITY_REFERENCE_NODE: {
+        System.out.print("&");
+        System.out.print(node.getNodeName().trim());
+        System.out.print(";");
+        break;
+      }
+
+      // print cdata sections
+      case Node.CDATA_SECTION_NODE: {
+        System.out.print("<![CDATA[");
+        System.out.print(node.getNodeValue().trim());
+        System.out.print("]]>");
+        break;
+      }
+
+      // print text
+      case Node.TEXT_NODE: {
+      System.out.println();
+        System.out.print(node.getNodeValue().trim());
+        break;
+      }
+
+      // print processing instruction
+
+    case Node.PROCESSING_INSTRUCTION_NODE: {
+      System.out.print("<?");
+      System.out.print(node.getNodeName().trim());
+      String data = node.getNodeValue().trim(); {
+        System.out.print(" ");
+        System.out.print(data);
+      }
+        System.out.print("?>");
+        break;
+      }
+    }
+
+    if (type == Node.ELEMENT_NODE) {
+      System.out.println();
+      System.out.print("</");
+      System.out.print(node.getNodeName().trim());
+      System.out.print('>');
+    }
+  }
+
+  /*
+   * Get the value of the first (or only) element given its node name
+   */
+  public static String getElementValue(String elementName, Element element) throws Exception
{
+    String value = null;
+    NodeList childNodes = element.getElementsByTagName(elementName);
+    Element cn = (Element)childNodes.item(0);
+    value = cn.getFirstChild().getNodeValue().trim();
+    //value = childNodes.item(0).getNodeValue().trim();
+    if (value == null) { 
+      throw new Exception ("No element found with given name:"+elementName);
+    }
+    return value;
+  }
+
+  /**
+   * Parse the XML file and create Document
+   * @param fileName
+   * @return Document
+   */
+  public static Document parse(InputStream fs) {
+    Document document = null;
+    // Initiate DocumentBuilderFactory
+    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+
+    // To get a validating parser
+    factory.setValidating(false);
+
+    // To get one that understands namespaces
+    factory.setNamespaceAware(true);
+    try {
+      // Get DocumentBuilder
+      DocumentBuilder builder = factory.newDocumentBuilder();
+
+      // Parse and load into memory the Document
+      //document = builder.parse( new File(fileName));
+      document = builder.parse(fs);
+      return document;
+    } catch (SAXParseException spe) {
+      // Error generated by the parser
+      System.out.println("\n** Parsing error , line " + spe.getLineNumber()
+                         + ", uri " + spe.getSystemId());
+      System.out.println(" " + spe.getMessage() );
+      // Use the contained exception, if any
+      Exception x = spe;
+      if (spe.getException() != null)
+        x = spe.getException();
+      x.printStackTrace();
+    } catch (SAXException sxe) {
+      // Error generated during parsing
+      Exception x = sxe;
+      if (sxe.getException() != null)
+        x = sxe.getException();
+      x.printStackTrace();
+    } catch (ParserConfigurationException pce) {
+      // Parser with specified options can't be built
+      pce.printStackTrace();
+    } catch (IOException ioe) {
+      // I/O error
+      ioe.printStackTrace();
+    }
+    
+    return null;
+  }
+
+  /**
+   * This method writes a DOM document to a file
+   * @param filename
+   * @param document
+   */
+  public static void writeXmlToFile(String filename, Document document) {
+    try {
+      // Prepare the DOM document for writing
+      Source source = new DOMSource(document);
+      
+      // Prepare the output file
+      File file = new File(filename);
+      Result result = new StreamResult(file);
+
+      // Write the DOM document to the file
+      // Get Transformer
+      Transformer xformer = TransformerFactory.newInstance().newTransformer();
+
+      // Write to a file
+      xformer.transform(source, result);
+
+    } catch (TransformerConfigurationException e) {
+      System.out.println("TransformerConfigurationException: " + e);
+    } catch (TransformerException e) {
+      System.out.println("TransformerException: " + e);
+    }
+  }
+
+  /**
+   * Count Elements in Document by Tag Name
+   * @param tag
+   * @param document
+   * @return number elements by Tag Name
+   */
+  public static int countByTagName(String tag, Document document){
+    NodeList list = document.getElementsByTagName(tag);
+    return list.getLength();
+  }
+}

Added: hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/vaidya.sh
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/vaidya.sh?rev=724531&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/vaidya.sh (added)
+++ hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/vaidya.sh Mon Dec
 8 14:45:38 2008
@@ -0,0 +1,47 @@
+#!/bin/sh
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+this="$0"
+while [ -h "$this" ]; do
+  ls=`ls -ld "$this"`
+  link=`expr "$ls" : '.*-> \(.*\)$'`
+  if expr "$link" : '.*/.*' > /dev/null; then
+    this="$link"
+  else
+    this=`dirname "$this"`/"$link"
+  fi
+done
+
+# convert relative path to absolute path
+bin=`dirname "$this"`
+script=`basename "$this"`
+bin=`cd "$bin"; pwd`
+this="$bin/$script"
+
+# Check if HADOOP_HOME AND JAVA_HOME is set.
+if [ -z $HADOOP_HOME ] ; then
+  echo "HADOOP_HOME environment variable not defined"
+  exit -1;
+fi
+
+if [ -z $JAVA_HOME ] ; then
+  echo "JAVA_HOME environment variable not defined"
+  exit -1;
+fi
+
+hadoopVersion=`$HADOOP_HOME/bin/hadoop version | awk 'BEGIN { RS = "" ; FS = "\n" } ; { print
$1 }' | awk '{print $2}'`
+
+$JAVA_HOME/bin/java -classpath $HADOOP_HOME/hadoop-${hadoopVersion}-core.jar:$HADOOP_HOME/contrib/vaidya/hadoop-${hadoopVersion}-vaidya.jar:$HADOOP_HOME/lib/commons-logging-1.0.4.jar:${CLASSPATH}
org.apache.hadoop.vaidya.postexdiagnosis.PostExPerformanceDiagnoser $@

Modified: hadoop/core/trunk/src/docs/src/documentation/content/xdocs/site.xml
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/docs/src/documentation/content/xdocs/site.xml?rev=724531&r1=724530&r2=724531&view=diff
==============================================================================
--- hadoop/core/trunk/src/docs/src/documentation/content/xdocs/site.xml (original)
+++ hadoop/core/trunk/src/docs/src/documentation/content/xdocs/site.xml Mon Dec  8 14:45:38
2008
@@ -52,6 +52,7 @@
     <hod-admin-guide label="HOD Admin Guide" href="hod_admin_guide.html"/>
     <hod-config-guide label="HOD Config Guide" href="hod_config_guide.html"/>
     <capacity_scheduler label="Capacity Scheduler" href="capacity_scheduler.html"/>
+    <vaidya    label="Hadoop Vaidya" href="vaidya.html"/>
     <api       label="API Docs"           href="ext:api/index" />
     <jdiff     label="API Changes"        href="ext:jdiff/changes" />
     <wiki      label="Wiki"               href="ext:wiki" />

Added: hadoop/core/trunk/src/docs/src/documentation/content/xdocs/vaidya.xml
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/docs/src/documentation/content/xdocs/vaidya.xml?rev=724531&view=auto
==============================================================================
--- hadoop/core/trunk/src/docs/src/documentation/content/xdocs/vaidya.xml (added)
+++ hadoop/core/trunk/src/docs/src/documentation/content/xdocs/vaidya.xml Mon Dec  8 14:45:38
2008
@@ -0,0 +1,171 @@
+<?xml version="1.0"?>
+<!--
+  Copyright 2002-2004 The Apache Software Foundation
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
+
+<document>
+  
+  <header>
+    <title>Hadoop Vaidya: A Performance Diagnostic Tool for Map-Reduce Jobs</title>
+  </header>
+  
+  <body>
+  
+    <section>
+      <title>Purpose</title>
+      
+      <p>This document describes various user-facing facets of the Hadoop Vaidya tool.
It
+         describes both, how to execute a default set of rules against your Map-Reduce job
counters,
+         as well as how to write and execute new rules to detect specific performance problems.

+      </p>
+      <p>At present few sample test rules are provided with the tool with the objective
of growing the rules database over the time. 
+         You are welcome to contribute the new rules for everyone's benefit and for that
follow the similar
+         <a href="http://wiki.apache.org/hadoop/HowToContribute">procedure</a>

+         specified on Apache Hadoop website.
+      </p>
+    </section>
+    
+    <section>
+      <title>Pre-requisites</title>
+      
+      <p>Ensure that Hadoop is installed and configured. More details:</p> 
+      <ul>
+        <li>
+          Make sure HADOOP_HOME environment variable is set.
+        </li>
+        <li>
+          Make sure Java is installed and configured as a part of the Hadoop installation.
+        </li>
+      </ul>
+    </section>
+    
+    <section>
+      <title>Overview</title>
+      
+      <p>Hadoop Vaidya (Vaidya in Sanskrit language means "one who knows", or "a physician")

+	    is a rule based performance diagnostic tool for 
+        Map/Reduce jobs. It performs a post execution analysis of map/reduce 
+        job by parsing and collecting execution statistics through job history 
+        and job configuration files. It runs a set of predefined tests/rules 
+        against job execution statistics to diagnose various performance problems. 
+        Each test rule detects a specific performance problem with the Map/Reduce job and
provides 
+        a targeted advice to the user. This tool generates an XML report based on 
+        the evaluation results of individual test rules.
+      </p>
+      
+    </section>
+  
+    <section>
+	 <title>Terminology</title>
+	 
+	 <p> This section describes main concepts and terminology involved with Hadoop Vaidya,</p>
+		<ul>
+			<li> <em>PostExPerformanceDiagnoser</em>: This class extends the base
Diagnoser class and acts as a driver for post execution performance analysis of Map/Reduce
Jobs. 
+                       It detects performance inefficiencies by executing a set of performance
diagnosis rules against the job execution statistics.</li>
+			<li> <em>Job Statistics</em>: This includes the job configuration information
(job.xml) and various counters logged by Map/Reduce job as a part of the job history log
+		           file. The counters are parsed and collected into the Job Statistics data structures,
which contains global job level aggregate counters and 
+			     a set of counters for each Map and Reduce task.</li>
+			<li> <em>Diagnostic Test/Rule</em>: This is a program logic that detects
the inefficiency of M/R job based on the job statistics. The
+				 description of the Test is specified as an XML element (DiagnosticTest) in a test description
file e.g. 
+				 default tests description file, <em>$HADOOP_HOME/contrib/vaidya/conf/postex_diagnosis_tests.xml</em>.
The actual logic is coded as
+				 a java class and referenced in the DiagnosticTest XML element. </li>
+		</ul>
+	<p></p>
+	<p>Following section describes the <em>DiagnosticTest</em> XML element
in a diagnostic test description file </p>
+		<ul>
+			<li> <em>DiagnosticTest{Title}</em>: Specifies a short name/description
of the test.</li>
+			<li> <em>DiagnosticTest{ClassName}</em>: Specifies fully qualified class
name that implements the test logic.</li>
+			<li> <em>DiagnosticTest{Description}</em>: Specifies a full description
of the test rule.</li>
+			<li> <em>DiagnosticTest{Importance}</em>: Specifies a declarative value
for overall importance of the test rule. (Values: High, Medium, Low)</li>
+			<li> <em>DiagnosticTest{SuccessThreshod}</em>: This is a threshold value
specified by test case writer such that if impact level of the test case
+				 is lesser, then test is declared as PASSED (or NEGATIVE). The impact level is calculated
and returned
+				 by individual test's evaluate function, specifying the degree of problem job has with
respect to the condition being evaluated.</li>
+			<li> <em>DiagnosticTest{Prescription}</em>: This is a targeted advice
written by the test case adviser for the user to follow when test is not PASSED. </li>
+			<li> <em>DiagonsticTest{InputElement}</em>: This is a test specific
input that test writer has to optionally provide. This will be supplied to individual test
case
+                       class so that test writer can use it within test case. This is typically
a test configuration information such that test writer need not change the
+                       Java code for test case but rather can configure the test case using
these input values. </li>
+		</ul>
+	<p></p>
+	<p>Following section describes the performance analysis report generated by the tool
in XML format</p>
+		<ul>
+			<li> <em>PostExPerformanceDiagnosticReport</em>: This is a document
(root) element from the XML report generated by the tool. </li>
+			<li> <em>TestReportElement</em>: This is a XML report element from the
test report document, one for each individual test specified in test description
+				 file </li>  
+			<li> <em>TestReportElement{TestTitle}</em>: Will be included from DiagnosticTest{Title}
</li>
+			<li> <em>TestReportElement{TestDescription}</em>: Will be included from
DiagnosticTest{Description} </li>
+			<li> <em>TestReportElement{TestImportance}</em>: Will be included from
DiagnosticTest{Importance} </li>
+			<li> <em>TestReportElement{TestSeverity}</em>: This is a product of
Test Impact level and Test Importance. It indicates overall severity of the test.</li>
+			<li> <em>TestReportElement{ReferenceDetails}</em>: This is a test specific
runtime information provided by test case to support the test result and severity. Typically
+				 Test writer should print the test impact level in this section. </li>
+			<li> <em>TestReportElement{TestResults}</em>: This is boolean outcome
of the test based on the SuccessThreshold specified by test writer in the DiagnosticTest description.
The 
+				 test PASSED(NEGATIVE) indicates no problem vs. FAILED (POSITIVE) indicates a potential
problem with the job for given test case. </li>
+			<li> <em>TestReportElement{TestPrescription}</em>: This will be included
from DiagnosticTest{Prescription}, unless test case writer overrides it in the test case class
through getPrescription()
+				 method </li>
+		</ul>	 
+	</section>
+	
+	<section>
+		<title>How to Execute the Hadoop Vaidya Tool</title>
+		  
+      	<p>Script to execute Hadoop Vaidya is in <code>$HADOOP_HOME/contrib/vaidya/bin/</code>
directory.
+		   It comes with a default set of rules defined in file: 
+           <code>$HADOOP_HOME/contrib/vaidya/conf/postex_diagnosis_tests.xml</code>
</p>
+		  <ul>
+			<li>Make sure HADOOP_HOME environment variable is set and Java is installed and
configured.</li>
+			<li>Execute the Hadoop Vaidya script with -help (or without any arguments) to get
the command line help. e.g. 
+                       <code>=>sh $HADOOP_HOME/contrib/vaidya/bin/vaidya.sh -help</code></li>
+			<li>User needs to 
+				 supply job's configuration file (<code>-jobconf job_conf.xml</code>), job
history log file (<code>-joblog job_history_log_file</code>), and optionally the
test description
+				 file (<code>-testconf postex_diagonostic_tests.xml</code>). If test description
file is not specified then the default one is picked up from the Hadoop Vaidya Jar (<code>$HADOOP_HOME/contrib/vaidya/hadoop-{version}-vaidya.jar</code>).
+				 This default test description file is also available at following location for users
to make a local copy, modify and add new test rules: 
+			     <code>$HADOOP_HOME/contrib/vaidya/conf/postex_diagnostic_tests.xml</code></li>
+			<li> Use <code>-report report_file</code> option to store the xml report
into specified report_file. </li>  
+		 </ul>
+	</section>
+	
+    <section>
+		<title>How to Write and Execute your own Tests</title>
+		<p>Writing and executing your own test rules is not very hard. You can take a look
at Hadoop Vaidya source code for existing set of tests. 
+		   The source code is at this <a href="http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/">hadoop
svn repository location</a>
+		   . The default set of tests are under <code>"postexdiagnosis/tests/"</code>
folder.</p>
+		<ul>
+		  <li>Writing a test class for your new test case should extend the <code>org.apache.hadoop.vaidya.DiagnosticTest</code>
class and 
+		       it should override following three methods from the base class, 
+              <ul> 
+				<li> evaluate() </li>
+				<li> getPrescription() </li> 
+ 				<li> getReferenceDetails() </li> 
+              </ul>
+          </li>
+		  <li>Make a local copy of the <code>$HADOOP_HOME/contrib/vaidya/conf/postex_diagnostic_tests.xml</code>
file or create a new test description XML file.</li>
+		  <li>Add the test description element for your new test case to this test description
file.</li>
+		  <li>Compile your new test class (or multiple classes), archive them into a Jar
file and add it to the CLASSPATH e.g. (<code>export CLASSPATH=$CLASSPATH:newtests.jar</code>)</li>
+		  <li>Execute the Hadoop Vaidya script with the job configuration, job history log
and reference to newly created test description file using <em>--testconf</em>
option. 
+		  <code>=>sh $HADOOP_HOME/contrib/vaidya/bin/vaidya.sh -joblog job_history_log_file
-jobconf job.xml -testconf new_test_description_file -report report.xml</code></li>
+		</ul>
+	</section>
+	
+    <p> </p>
+    <p> </p>
+    <p>
+      <em>Java and JNI are trademarks or registered trademarks of 
+      Sun Microsystems, Inc. in the United States and other countries.</em>
+    </p>
+    
+  </body>
+  
+</document>
\ No newline at end of file



Mime
View raw message