hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sur...@apache.org
Subject svn commit: r1362639 [2/3] - in /hadoop/common/branches/branch-1-win: ./ bin/ ivy/ lib/jdiff/ src/c++/libhdfs/ src/c++/libhdfs/m4/ src/c++/pipes/ src/c++/pipes/impl/ src/c++/task-controller/impl/ src/c++/utils/ src/c++/utils/impl/ src/contrib/streaming...
Date Tue, 17 Jul 2012 20:36:12 GMT
Modified: hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/security/UserGroupInformation.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/security/UserGroupInformation.java?rev=1362639&r1=1362638&r2=1362639&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/security/UserGroupInformation.java (original)
+++ hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/security/UserGroupInformation.java Tue Jul 17 20:36:07 2012
@@ -51,14 +51,11 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.security.authentication.util.KerberosUtil;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.TokenIdentifier;
 import org.apache.hadoop.util.Shell;
 
-import com.sun.security.auth.NTUserPrincipal;
-import com.sun.security.auth.UnixPrincipal;
-import com.sun.security.auth.module.Krb5LoginModule;
-
 /**
  * User and group information for Hadoop.
  * This class wraps around a JAAS Subject and provides methods to determine the
@@ -253,22 +250,53 @@ public class UserGroupInformation {
   private final boolean isKeytab;
   private final boolean isKrbTkt;
   
-  private static final String OS_LOGIN_MODULE_NAME;
-  private static final Class<? extends Principal> OS_PRINCIPAL_CLASS;
+  private static String OS_LOGIN_MODULE_NAME;
+  private static Class<? extends Principal> OS_PRINCIPAL_CLASS;
   private static final boolean windows = 
                            System.getProperty("os.name").startsWith("Windows");
   private static Thread renewerThread = null;
   private static volatile boolean shouldRunRenewerThread = true;
   
-  static {
-    if (windows) {
-      OS_LOGIN_MODULE_NAME = "com.sun.security.auth.module.NTLoginModule";
-      OS_PRINCIPAL_CLASS = NTUserPrincipal.class;
+  /* Return the OS login module class name */
+  private static String getOSLoginModuleName() {
+    if (System.getProperty("java.vendor").contains("IBM")) {
+      return windows ? "com.ibm.security.auth.module.NTLoginModule"
+       : "com.ibm.security.auth.module.LinuxLoginModule";    
     } else {
-      OS_LOGIN_MODULE_NAME = "com.sun.security.auth.module.UnixLoginModule";
-      OS_PRINCIPAL_CLASS = UnixPrincipal.class;
+      return windows ? "com.sun.security.auth.module.NTLoginModule"
+        : "com.sun.security.auth.module.UnixLoginModule";
     }
   }
+
+  /* Return the OS principal class */
+  @SuppressWarnings("unchecked")
+  private static Class<? extends Principal> getOsPrincipalClass() {
+    ClassLoader cl = ClassLoader.getSystemClassLoader();
+    try {
+      if (System.getProperty("java.vendor").contains("IBM")) {
+        if (windows) {
+          return (Class<? extends Principal>)
+            cl.loadClass("com.ibm.security.auth.UsernamePrincipal");
+        } else {
+          return (Class<? extends Principal>)
+            (System.getProperty("os.arch").contains("64")
+             ? cl.loadClass("com.ibm.security.auth.UsernamePrincipal")
+             : cl.loadClass("com.ibm.security.auth.LinuxPrincipal"));
+        }
+      } else {
+        return (Class<? extends Principal>) (windows
+           ? cl.loadClass("com.sun.security.auth.NTUserPrincipal")
+           : cl.loadClass("com.sun.security.auth.UnixPrincipal"));
+      }
+    } catch (ClassNotFoundException e) {
+      LOG.error("Unable to find JAAS classes:" + e.getMessage());
+    }
+    return null;
+  }
+  static {
+    OS_LOGIN_MODULE_NAME = getOSLoginModuleName();
+    OS_PRINCIPAL_CLASS = getOsPrincipalClass();
+  }
   
   private static class RealUser implements Principal {
     private final UserGroupInformation realUser;
@@ -339,7 +367,7 @@ public class UserGroupInformation {
       }
     }
     private static final AppConfigurationEntry USER_KERBEROS_LOGIN =
-      new AppConfigurationEntry(Krb5LoginModule.class.getName(),
+      new AppConfigurationEntry(KerberosUtil.getKrb5LoginModuleName(),
                                 LoginModuleControlFlag.OPTIONAL,
                                 USER_KERBEROS_OPTIONS);
     private static final Map<String,String> KEYTAB_KERBEROS_OPTIONS = 
@@ -350,7 +378,7 @@ public class UserGroupInformation {
       KEYTAB_KERBEROS_OPTIONS.put("storeKey", "true");
     }
     private static final AppConfigurationEntry KEYTAB_KERBEROS_LOGIN =
-      new AppConfigurationEntry(Krb5LoginModule.class.getName(),
+      new AppConfigurationEntry(KerberosUtil.getKrb5LoginModuleName(),
                                 LoginModuleControlFlag.REQUIRED,
                                 KEYTAB_KERBEROS_OPTIONS);
     

Modified: hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/security/authentication/client/KerberosAuthenticator.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/security/authentication/client/KerberosAuthenticator.java?rev=1362639&r1=1362638&r2=1362639&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/security/authentication/client/KerberosAuthenticator.java (original)
+++ hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/security/authentication/client/KerberosAuthenticator.java Tue Jul 17 20:36:07 2012
@@ -13,12 +13,12 @@
  */
 package org.apache.hadoop.security.authentication.client;
 
-import com.sun.security.auth.module.Krb5LoginModule;
 import org.apache.commons.codec.binary.Base64;
+import org.apache.hadoop.security.authentication.util.KerberosUtil;
 import org.ietf.jgss.GSSContext;
 import org.ietf.jgss.GSSManager;
 import org.ietf.jgss.GSSName;
-import sun.security.jgss.GSSUtil;
+import org.ietf.jgss.Oid;
 
 import javax.security.auth.Subject;
 import javax.security.auth.login.AppConfigurationEntry;
@@ -97,7 +97,7 @@ public class KerberosAuthenticator imple
     }
 
     private static final AppConfigurationEntry USER_KERBEROS_LOGIN =
-      new AppConfigurationEntry(Krb5LoginModule.class.getName(),
+      new AppConfigurationEntry(KerberosUtil.getKrb5LoginModuleName(),
                                 AppConfigurationEntry.LoginModuleControlFlag.OPTIONAL,
                                 USER_KERBEROS_OPTIONS);
 
@@ -109,7 +109,7 @@ public class KerberosAuthenticator imple
       return USER_KERBEROS_CONF;
     }
   }
-
+  
   private URL url;
   private HttpURLConnection conn;
   private Base64 base64;
@@ -195,9 +195,11 @@ public class KerberosAuthenticator imple
           try {
             GSSManager gssManager = GSSManager.getInstance();
             String servicePrincipal = "HTTP/" + KerberosAuthenticator.this.url.getHost();
+            Oid oid = KerberosUtil.getOidInstance("NT_GSS_KRB5_PRINCIPAL");
             GSSName serviceName = gssManager.createName(servicePrincipal,
-                                                        GSSUtil.NT_GSS_KRB5_PRINCIPAL);
-            gssContext = gssManager.createContext(serviceName, GSSUtil.GSS_KRB5_MECH_OID, null,
+                                                        oid);
+            oid = KerberosUtil.getOidInstance("GSS_KRB5_MECH_OID");
+            gssContext = gssManager.createContext(serviceName, oid, null,
                                                   GSSContext.DEFAULT_LIFETIME);
             gssContext.requestCredDeleg(true);
             gssContext.requestMutualAuth(true);

Modified: hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/security/authentication/server/KerberosAuthenticationHandler.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/security/authentication/server/KerberosAuthenticationHandler.java?rev=1362639&r1=1362638&r2=1362639&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/security/authentication/server/KerberosAuthenticationHandler.java (original)
+++ hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/security/authentication/server/KerberosAuthenticationHandler.java Tue Jul 17 20:36:07 2012
@@ -15,9 +15,9 @@ package org.apache.hadoop.security.authe
 
 import org.apache.hadoop.security.authentication.client.AuthenticationException;
 import org.apache.hadoop.security.authentication.client.KerberosAuthenticator;
-import com.sun.security.auth.module.Krb5LoginModule;
 import org.apache.commons.codec.binary.Base64;
 import org.apache.hadoop.security.KerberosName;
+import org.apache.hadoop.security.authentication.util.KerberosUtil;
 import org.ietf.jgss.GSSContext;
 import org.ietf.jgss.GSSCredential;
 import org.ietf.jgss.GSSManager;
@@ -93,7 +93,7 @@ public class KerberosAuthenticationHandl
       }
 
       return new AppConfigurationEntry[]{
-        new AppConfigurationEntry(Krb5LoginModule.class.getName(),
+          new AppConfigurationEntry(KerberosUtil.getKrb5LoginModuleName(),
                                   AppConfigurationEntry.LoginModuleControlFlag.REQUIRED,
                                   options),};
     }

Modified: hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenSecretManager.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenSecretManager.java?rev=1362639&r1=1362638&r2=1362639&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenSecretManager.java (original)
+++ hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenSecretManager.java Tue Jul 17 20:36:07 2012
@@ -381,24 +381,30 @@ extends AbstractDelegationTokenIdentifie
         while (running) {
           long now = System.currentTimeMillis();
           if (lastMasterKeyUpdate + keyUpdateInterval < now) {
-            try {
-              rollMasterKey();
-              lastMasterKeyUpdate = now;
-            } catch (IOException e) {
-              LOG.error("Master key updating failed. "
-                  + StringUtils.stringifyException(e));
+            synchronized (AbstractDelegationTokenSecretManager.this) {
+              if (running) {
+                try {
+                  rollMasterKey();
+                  lastMasterKeyUpdate = now;
+                } catch (IOException e) {
+                  LOG.error("Master key updating failed. "
+                            + StringUtils.stringifyException(e));
+                }
+              }
             }
           }
           if (lastTokenCacheCleanup + tokenRemoverScanInterval < now) {
-            removeExpiredToken();
-            lastTokenCacheCleanup = now;
+            synchronized (AbstractDelegationTokenSecretManager.this) {
+              if (running) {
+                removeExpiredToken();
+                lastTokenCacheCleanup = now;
+              }
+            }
           }
           try {
             Thread.sleep(5000); // 5 seconds
           } catch (InterruptedException ie) {
-            LOG
-            .error("InterruptedExcpetion recieved for ExpiredTokenRemover thread "
-                + ie);
+            return;
           }
         }
       } catch (Throwable t) {

Modified: hadoop/common/branches/branch-1-win/src/docs/releasenotes.html
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/src/docs/releasenotes.html?rev=1362639&r1=1362638&r2=1362639&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/src/docs/releasenotes.html (original)
+++ hadoop/common/branches/branch-1-win/src/docs/releasenotes.html Tue Jul 17 20:36:07 2012
@@ -2,7 +2,7 @@
 <html>
 <head>
 <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
-<title>Hadoop 1.0.0 Release Notes</title>
+<title>Hadoop 1.0.3 Release Notes</title>
 <STYLE type="text/css">
 		H1 {font-family: sans-serif}
 		H2 {font-family: sans-serif; margin-left: 7mm}
@@ -10,10 +10,402 @@
 	</STYLE>
 </head>
 <body>
-<h1>Hadoop 1.0.0 Release Notes</h1>
+<h1>Hadoop 1.0.3 Release Notes</h1>
 		These release notes include new developer and user-facing incompatibilities, features, and major improvements. 
 
 <a name="changes"/>
+
+<h2>Changes since Hadoop 1.0.2</h2>
+
+<h3>Jiras with Release Notes (describe major or incompatible changes)</h3>
+<ul>
+
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-5528">HADOOP-5528</a>.
+     Major new feature reported by klbostee and fixed by klbostee <br>
+     <b>Binary partitioner</b><br>
+     <blockquote>                                              New BinaryPartitioner that partitions BinaryComparable keys by hashing a configurable part of the bytes array corresponding to the key.
+
+      
+</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-8352">HADOOP-8352</a>.
+     Major improvement reported by owen.omalley and fixed by owen.omalley <br>
+     <b>We should always generate a new configure script for the c++ code</b><br>
+     <blockquote>If you are compiling c++, the configure script will now be automatically regenerated as it should be.<br>This requires autoconf version 2.61 or greater.</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-4017">MAPREDUCE-4017</a>.
+     Trivial improvement reported by knoguchi and fixed by tgraves (jobhistoryserver, jobtracker)<br>
+     <b>Add jobname to jobsummary log</b><br>
+     <blockquote>                                              The Job Summary log may contain commas in values that are escaped by a &#39;\&#39; character.  This was true before, but is more likely to be exposed now. 
+
+      
+</blockquote></li>
+
+</ul>
+
+
+<h3>Other Jiras (describe bug fixes and minor changes)</h3>
+<ul>
+
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-6924">HADOOP-6924</a>.
+     Major bug reported by wattsteve and fixed by devaraj <br>
+     <b>Build fails with non-Sun JREs due to different pathing to the operating system architecture shared libraries</b><br>
+     <blockquote>The src/native/configure script used to build the native libraries has an environment variable called JNI_LDFLAGS which is set as follows:<br><br>JNI_LDFLAGS=&quot;-L$JAVA_HOME/jre/lib/$OS_ARCH/server&quot;<br><br>This pathing convention to the shared libraries for the operating system architecture is unique to Oracle/Sun Java and thus on other flavors of Java the path will not exist and will result in a build failure with the following exception:<br><br>     [exec] gcc -shared  ../src/org/apache/hadoop/io/compress/zlib...</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-6941">HADOOP-6941</a>.
+     Major bug reported by wattsteve and fixed by devaraj <br>
+     <b>Support non-SUN JREs in UserGroupInformation</b><br>
+     <blockquote>Attempting to format the namenode or attempting to start Hadoop using Apache Harmony or the IBM Java JREs results in the following exception:<br><br>10/09/07 16:35:05 ERROR namenode.NameNode: java.lang.NoClassDefFoundError: com.sun.security.auth.UnixPrincipal<br>	at org.apache.hadoop.security.UserGroupInformation.&lt;clinit&gt;(UserGroupInformation.java:223)<br>	at java.lang.J9VMInternals.initializeImpl(Native Method)<br>	at java.lang.J9VMInternals.initialize(J9VMInternals.java:200)<br>	at org.apache.hadoop.hdfs.ser...</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-6963">HADOOP-6963</a>.
+     Critical bug reported by owen.omalley and fixed by raviprak (fs)<br>
+     <b>Fix FileUtil.getDU. It should not include the size of the directory or follow symbolic links</b><br>
+     <blockquote>The getDU method should not include the size of the directory. The Java interface says that the value is undefined and in Linux/Sun it gets the 4096 for the inode. Clearly this isn&apos;t useful.<br>It also recursively calls itself. In case the directory has a symbolic link forming a cycle, getDU keeps spinning in the cycle. In our case, we saw this in the org.apache.hadoop.mapred.JobLocalizer.downloadPrivateCacheObjects call. This prevented other tasks on the same node from committing, causing the T...</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-7381">HADOOP-7381</a>.
+     Major bug reported by jrottinghuis and fixed by jrottinghuis (build)<br>
+     <b>FindBugs OutOfMemoryError</b><br>
+     <blockquote>When running the findbugs target from Jenkins, I get an OutOfMemory error.<br>The &quot;effort&quot; in FindBugs is set to Max which ends up using a lot of memory to go through all the classes. The jvmargs passed to FindBugs is hardcoded to 512 MB max.<br><br>We can leave the default to 512M, as long as we pass this as an ant parameter which can be overwritten in individual cases through -D, or in the build.properties file (either basedir, or user&apos;s home directory).<br></blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-8027">HADOOP-8027</a>.
+     Minor improvement reported by qwertymaniac and fixed by atm (metrics)<br>
+     <b>Visiting /jmx on the daemon web interfaces may print unnecessary error in logs</b><br>
+     <blockquote>Logs that follow a {{/jmx}} servlet visit:<br><br>{code}<br>11/11/22 12:09:52 ERROR jmx.JMXJsonServlet: getting attribute UsageThreshold of java.lang:type=MemoryPool,name=Par Eden Space threw an exception<br>javax.management.RuntimeMBeanException: java.lang.UnsupportedOperationException: Usage threshold is not supported<br>	at com.sun.jmx.interceptor.DefaultMBeanServerInterceptor.rethrow(DefaultMBeanServerInterceptor.java:856)<br>...<br>{code}</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-8151">HADOOP-8151</a>.
+     Major bug reported by tlipcon and fixed by mattf (io, native)<br>
+     <b>Error handling in snappy decompressor throws invalid exceptions</b><br>
+     <blockquote>SnappyDecompressor.c has the following code in a few places:<br>{code}<br>    THROW(env, &quot;Ljava/lang/InternalError&quot;, &quot;Could not decompress data. Buffer length is too small.&quot;);<br>{code}<br>this is incorrect, though, since the THROW macro doesn&apos;t need the &quot;L&quot; before the class name. This results in a ClassNotFoundException for Ljava.lang.InternalError being thrown, instead of the intended exception.</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-8188">HADOOP-8188</a>.
+     Major improvement reported by devaraj and fixed by devaraj <br>
+     <b>Fix the build process to do with jsvc, with IBM&apos;s JDK as the underlying jdk</b><br>
+     <blockquote>When IBM JDK is used as the underlying JDK for the build process, the build of jsvc fails. I just needed to add an extra &quot;os arch&quot; expression in the condition that sets os-arch.</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-8251">HADOOP-8251</a>.
+     Blocker bug reported by tlipcon and fixed by tlipcon (security)<br>
+     <b>SecurityUtil.fetchServiceTicket broken after HADOOP-6941</b><br>
+     <blockquote>HADOOP-6941 replaced direct references to some classes with reflective access so as to support other JDKs. Unfortunately there was a mistake in the name of the Krb5Util class, which broke fetchServiceTicket. This manifests itself as the inability to run checkpoints or other krb5-SSL HTTP-based transfers:<br><br>java.lang.ClassNotFoundException: sun.security.jgss.krb5</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-8293">HADOOP-8293</a>.
+     Major bug reported by owen.omalley and fixed by owen.omalley (build)<br>
+     <b>The native library&apos;s Makefile.am doesn&apos;t include JNI path</b><br>
+     <blockquote>When compiling on centos 6, I get the following error when compiling the native library:<br><br>{code}<br> [exec] /usr/bin/ld: cannot find -ljvm<br>{code}<br><br>The problem is simply that the Makefile.am libhadoop_la_LDFLAGS doesn&apos;t include AM_LDFLAGS.</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-8294">HADOOP-8294</a>.
+     Critical bug reported by kihwal and fixed by kihwal (ipc)<br>
+     <b>IPC Connection becomes unusable even if server address was temporarilly unresolvable</b><br>
+     <blockquote>This is same as HADOOP-7428, but was observed on 1.x data nodes. This can happen more frequently after HADOOP-7472, which allows IPC Connection to re-resolve the name. HADOOP-7428 needs to be back-ported.</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-8338">HADOOP-8338</a>.
+     Major bug reported by owen.omalley and fixed by owen.omalley (security)<br>
+     <b>Can&apos;t renew or cancel HDFS delegation tokens over secure RPC</b><br>
+     <blockquote>The fetchdt tool is failing for secure deployments when given --renew or --cancel on tokens fetched using RPC. (The tokens fetched over HTTP can be renewed and canceled fine.)</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-8346">HADOOP-8346</a>.
+     Blocker bug reported by tucu00 and fixed by devaraj (security)<br>
+     <b>Changes to support Kerberos with non Sun JVM (HADOOP-6941) broke SPNEGO</b><br>
+     <blockquote>before HADOOP-6941 hadoop-auth testcases with Kerberos ON pass, *mvn test -PtestKerberos*<br><br>after HADOOP-6941 the tests fail with the error below.<br><br>Doing some IDE debugging I&apos;ve found out that the changes in HADOOP-6941 are making the JVM Kerberos libraries to append an extra element to the kerberos principal of the server (on the client side when creating the token) so *HTTP/localhost* ends up being *HTTP/localhost/localhost*. Then, when contacting the KDC to get the granting ticket, the serv...</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-119">HDFS-119</a>.
+     Major bug reported by shv and fixed by sureshms (name-node)<br>
+     <b>logSync() may block NameNode forever.</b><br>
+     <blockquote># {{FSEditLog.logSync()}} first waits until {{isSyncRunning}} is false and then performs syncing to file streams by calling {{EditLogOutputStream.flush()}}.<br>If an exception is thrown after {{isSyncRunning}} is set to {{true}} all threads will always wait on this condition.<br>An {{IOException}} may be thrown by {{EditLogOutputStream.setReadyToFlush()}} or a {{RuntimeException}} may be thrown by {{EditLogOutputStream.flush()}} or by {{processIOError()}}.<br># The loop that calls {{eStream.flush()}} ...</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-1041">HDFS-1041</a>.
+     Major bug reported by szetszwo and fixed by szetszwo (hdfs client)<br>
+     <b>DFSClient does not retry in getFileChecksum(..)</b><br>
+     <blockquote>If connection to the first datanode fails, DFSClient does not retry in getFileChecksum(..).</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-3061">HDFS-3061</a>.
+     Blocker bug reported by alex.holmes and fixed by kihwal (name-node)<br>
+     <b>Cached directory size in INodeDirectory can get permantently out of sync with computed size, causing quota issues</b><br>
+     <blockquote>It appears that there&apos;s a condition under which a HDFS directory with a space quota set can get to a point where the cached size for the directory can permanently differ from the computed value.  When this happens the following command:<br><br>{code}<br>hadoop fs -count -q /tmp/quota-test<br>{code}<br><br>results in the following output in the NameNode logs:<br><br>{code}<br>WARN org.apache.hadoop.hdfs.server.namenode.NameNode: Inconsistent diskspace for directory quota-test. Cached: 6000 Computed: 6072<br>{code}<br><br>I&apos;ve ob...</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-3127">HDFS-3127</a>.
+     Major bug reported by brandonli and fixed by brandonli (name-node)<br>
+     <b>failure in recovering removed storage directories should not stop checkpoint process</b><br>
+     <blockquote>When a restore fails, rollEditLog() also fails even if there are healthy directories. Any exceptions from recovering the removed directories should not fail checkpoint process.</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-3265">HDFS-3265</a>.
+     Major bug reported by kumarr and fixed by kumarr (build)<br>
+     <b>PowerPc Build error.</b><br>
+     <blockquote>When attempting to build branch-1, the following error is seen and ant exits.<br>[exec] configure: error: Unsupported CPU architecture &quot;powerpc64&quot;<br><br>The following command was used to build hadoop-common<br><br>ant -Dlibhdfs=true -Dcompile.native=true -Dfusedfs=true -Dcompile.c++=true -Dforrest.home=$FORREST_HOME compile-core-native compile-c++ compile-c++-examples task-controller tar record-parser compile-hdfs-classes package -Djava5.home=/opt/ibm/ibm-java2-ppc64-50/ </blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-3310">HDFS-3310</a>.
+     Major bug reported by cmccabe and fixed by cmccabe <br>
+     <b>Make sure that we abort when no edit log directories are left</b><br>
+     <blockquote>We should make sure to abort when there are no edit log directories left to write to.  It seems that there is at least one case that is slipping through the cracks right now in branch-1.</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-3374">HDFS-3374</a>.
+     Major bug reported by owen.omalley and fixed by owen.omalley (name-node)<br>
+     <b>hdfs&apos; TestDelegationToken fails intermittently with a race condition</b><br>
+     <blockquote>The testcase is failing because the MiniDFSCluster is shutdown before the secret manager can change the key, which calls system.exit with no edit streams available.<br><br>{code}<br><br>    [junit] 2012-05-04 15:03:51,521 WARN  common.Storage (FSImage.java:updateRemovedDirs(224)) - Removing storage dir /home/horton/src/hadoop/build/test/data/dfs/name1<br>    [junit] 2012-05-04 15:03:51,522 FATAL namenode.FSNamesystem (FSEditLog.java:fatalExit(388)) - No edit streams are accessible<br>    [junit] java.lang.Exce...</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-1238">MAPREDUCE-1238</a>.
+     Major bug reported by rramya and fixed by tgraves (jobtracker)<br>
+     <b>mapred metrics shows negative count of waiting maps and reduces </b><br>
+     <blockquote>Negative waiting_maps and waiting_reduces count is observed in the mapred metrics</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-3377">MAPREDUCE-3377</a>.
+     Major bug reported by jxchen and fixed by jxchen <br>
+     <b>Compatibility issue with 0.20.203.</b><br>
+     <blockquote>I have an OutputFormat which implements Configurable.  I set new config entries to a job configuration during checkOutputSpec() so that the tasks will get the config entries through the job configuration.  This works fine in 0.20.2, but stopped working starting from 0.20.203.  With 0.20.203, my OutputFormat still has the configuration set, but the copy a task gets does not have the new entries that are set as part of checkOutputSpec().  <br><br>I believe that the problem is with JobClient.  The job...</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-3857">MAPREDUCE-3857</a>.
+     Major bug reported by jeagles and fixed by jeagles (examples)<br>
+     <b>Grep example ignores mapred.job.queue.name</b><br>
+     <blockquote>Grep example creates two jobs as part of its implementation. The first job correctly uses the configuration settings. The second job ignores configuration settings.</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-4003">MAPREDUCE-4003</a>.
+     Major bug reported by zaozaowang and fixed by knoguchi (task-controller, tasktracker)<br>
+     <b>log.index (No such file or directory) AND Task process exit with nonzero status of 126</b><br>
+     <blockquote>hello?I have dwelled on this hadoop(cdhu3) problem for 2 days,I have tried every google method.This is the issue: when ran hadoop example &quot;wordcount&quot; ,the tasktracker&apos;s log in one slave node presented such errors<br><br> 1.WARN org.apache.hadoop.mapred.DefaultTaskController: Task wrapper stderr: bash: /var/tmp/mapred/local/ttprivate/taskTracker/hdfs/jobcache/job_201203131751_0003/attempt_201203131751_0003_m_000006_0/taskjvm.sh: Permission denied<br><br>2.WARN org.apache.hadoop.mapred.TaskRunner: attempt_...</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-4012">MAPREDUCE-4012</a>.
+     Minor bug reported by knoguchi and fixed by tgraves <br>
+     <b>Hadoop Job setup error leaves no useful info to users (when LinuxTaskController is used)</b><br>
+     <blockquote>When distributed cache pull fail on the TaskTracker, job webUI only shows <br>{noformat}<br>Job initialization failed (255)<br>{noformat}<br>leaving users confused.  <br><br>On the TaskTracker log, there is a log with useful info <br>{noformat}<br>2012-03-14 21:44:17,083 INFO org.apache.hadoop.mapred.TaskController: org.apache.hadoop.security.AccessControlException: org.apache.hadoop.security.AccessControlException: <br>Permission denied: user=user1, access=READ, inode=&quot;testfile&quot;:user3:users:rw-------<br>...<br>2012-03-14 21...</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-4154">MAPREDUCE-4154</a>.
+     Major bug reported by thejas and fixed by devaraj <br>
+     <b>streaming MR job succeeds even if the streaming command fails</b><br>
+     <blockquote>Hadoop 1.0.1 behaves as expected - The task fails for streaming MR job if the streaming command fails. But it succeeds in hadoop 1.0.2 .<br></blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-4207">MAPREDUCE-4207</a>.
+     Major bug reported by kihwal and fixed by kihwal (mrv1)<br>
+     <b>Remove System.out.println() in FileInputFormat</b><br>
+     <blockquote>MAPREDUCE-3607 accidentally left the println statement. </blockquote></li>
+
+
+</ul>
+
+
+<h2>Changes since Hadoop 1.0.1</h2>
+
+<h3>Jiras with Release Notes (describe major or incompatible changes)</h3>
+<ul>
+
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-1722">HADOOP-1722</a>.
+     Major improvement reported by runping and fixed by klbostee <br>
+     <b>Make streaming to handle non-utf8 byte array</b><br>
+     <blockquote>                                              Streaming allows binary (or other non-UTF8) streams.
+
+      
+</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-3851">MAPREDUCE-3851</a>.
+     Major bug reported by kihwal and fixed by tgraves (tasktracker)<br>
+     <b>Allow more aggressive action on detection of the jetty issue</b><br>
+     <blockquote>                    added new configuration variables to control when TT aborts if it sees a certain number of exceptions:
<br/>
+
+
<br/>
+
+&nbsp;&nbsp;&nbsp;&nbsp;// Percent of shuffle exceptions (out of sample size) seen before it&#39;s
<br/>
+
+&nbsp;&nbsp;&nbsp;&nbsp;// fatal - acceptable values are from 0 to 1.0, 0 disables the check.
<br/>
+
+&nbsp;&nbsp;&nbsp;&nbsp;// ie. 0.3 = 30% of the last X number of requests matched the exception,
<br/>
+
+&nbsp;&nbsp;&nbsp;&nbsp;// so abort.
<br/>
+
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;conf.getFloat(
<br/>
+
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&quot;mapreduce.reduce.shuffle.catch.exception.percent.limit.fatal&quot;, 0);
<br/>
+
+
<br/>
+
+&nbsp;&nbsp;&nbsp;&nbsp;// The number of trailing requests we track, used for the fatal
<br/>
+
+&nbsp;&nbsp;&nbsp;&nbsp;// limit calculation
<br/>
+
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;conf.getInt(&quot;mapreduce.reduce.shuffle.catch.exception.sample.size&quot;, 1000);
+</blockquote></li>
+
+</ul>
+
+<h3>Other Jiras (describe bug fixes and minor changes)</h3>
+<ul>
+
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-5450">HADOOP-5450</a>.
+     Blocker improvement reported by klbostee and fixed by klbostee <br>
+     <b>Add support for application-specific typecodes to typed bytes</b><br>
+     <blockquote>For serializing objects of types that are not supported by typed bytes serialization, applications might want to use a custom serialization format. Right now, typecode 0 has to be used for the bytes resulting from this custom serialization, which could lead to problems when deserializing the objects because the application cannot know if a byte sequence following typecode 0 is a customly serialized object or just a raw sequence of bytes. Therefore, a range of typecodes that are treated as ali...</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-7206">HADOOP-7206</a>.
+     Major new feature reported by eli and fixed by tucu00 <br>
+     <b>Integrate Snappy compression</b><br>
+     <blockquote>Google release Zippy as an open source (APLv2) project called Snappy (http://code.google.com/p/snappy). This tracks integrating it into Hadoop.<br><br>{quote}<br>Snappy is a compression/decompression library. It does not aim for maximum compression, or compatibility with any other compression library; instead, it aims for very high speeds and reasonable compression. For instance, compared to the fastest mode of zlib, Snappy is an order of magnitude faster for most inputs, but the resulting compressed ...</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-8050">HADOOP-8050</a>.
+     Major bug reported by kihwal and fixed by kihwal (metrics)<br>
+     <b>Deadlock in metrics</b><br>
+     <blockquote>The metrics serving thread and the periodic snapshot thread can deadlock.<br>It happened a few times on one of namenodes we have. When it happens RPC works but the web ui and hftp stop working. I haven&apos;t look at the trunk too closely, but it might happen there too.</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-8088">HADOOP-8088</a>.
+     Major bug reported by kihwal and fixed by  (security)<br>
+     <b>User-group mapping cache incorrectly does negative caching on transient failures</b><br>
+     <blockquote>We&apos;ve seen a case where some getGroups() calls fail when the ldap server or the network is having transient failures. Looking at the code, the shell-based and the JNI-based implementations swallow exceptions and return an empty or partial list. The caller, Groups#getGroups() adds this likely empty list into the mapping cache for the user. This will function as negative caching until the cache expires. I don&apos;t think we want negative caching here, but even if we do, it should be intelligent eno...</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-8090">HADOOP-8090</a>.
+     Major improvement reported by gkesavan and fixed by gkesavan <br>
+     <b>rename hadoop 64 bit rpm/deb package name</b><br>
+     <blockquote>change hadoop rpm/deb name from hadoop-&lt;version&gt;.amd64.rpm/deb hadoop-&lt;version&gt;.x86_64.rpm/deb   </blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-8132">HADOOP-8132</a>.
+     Major bug reported by arpitgupta and fixed by arpitgupta <br>
+     <b>64bit secure datanodes do not start as the jsvc path is wrong</b><br>
+     <blockquote>64bit secure datanodes were looking for /usr/libexec/../libexec/jsvc. instead of /usr/libexec/../libexec/jsvc.amd64</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-8201">HADOOP-8201</a>.
+     Blocker bug reported by gkesavan and fixed by gkesavan <br>
+     <b>create the configure script for native compilation as part of the build</b><br>
+     <blockquote>configure script is checked into svn and its not regenerated during build. Ideally configure scritp should not be checked into svn and instead should be generated during build using autoreconf.</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-2701">HDFS-2701</a>.
+     Major improvement reported by eli and fixed by eli (name-node)<br>
+     <b>Cleanup FS* processIOError methods</b><br>
+     <blockquote>Let&apos;s rename the various &quot;processIOError&quot; methods to be more descriptive. The current code makes it difficult to identify and reason about bug fixes. While we&apos;re at it let&apos;s remove &quot;Fatal&quot; from the &quot;Unable to sync the edit log&quot; log since it&apos;s not actually a fatal error (this is confusing to users). And 2NN &quot;Checkpoint done&quot; should be info, not a warning (also confusing to users).<br><br>Thanks to HDFS-1073 these issues don&apos;t exist on trunk or 23.</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-2702">HDFS-2702</a>.
+     Critical bug reported by eli and fixed by eli (name-node)<br>
+     <b>A single failed name dir can cause the NN to exit </b><br>
+     <blockquote>There&apos;s a bug in FSEditLog#rollEditLog which results in the NN process exiting if a single name dir has failed. Here&apos;s the relevant code:<br><br>{code}<br>close()  // So editStreams.size() is 0 <br>foreach edits dir {<br>  ..<br>  eStream = new ...  // Might get an IOE here<br>  editStreams.add(eStream);<br>} catch (IOException ioe) {<br>  removeEditsForStorageDir(sd);  // exits if editStreams.size() &lt;= 1  <br>}<br>{code}<br><br>If we get an IOException before we&apos;ve added two edits streams to the list we&apos;ll exit, eg if there&apos;s an ...</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-2703">HDFS-2703</a>.
+     Major bug reported by eli and fixed by eli (name-node)<br>
+     <b>removedStorageDirs is not updated everywhere we remove a storage dir</b><br>
+     <blockquote>There are a number of places (FSEditLog#open, purgeEditLog, and rollEditLog) where we remove a storage directory but don&apos;t add it to the removedStorageDirs list. This means a storage dir may have been removed but we don&apos;t see it in the log or Web UI. This doesn&apos;t affect trunk/23 since the code there is totally different.</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-2978">HDFS-2978</a>.
+     Major new feature reported by atm and fixed by atm (name-node)<br>
+     <b>The NameNode should expose name dir statuses via JMX</b><br>
+     <blockquote>We currently display this info on the NN web UI, so users who wish to monitor this must either do it manually or parse HTML. We should publish this information via JMX.</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-3006">HDFS-3006</a>.
+     Major bug reported by bcwalrus and fixed by szetszwo (name-node)<br>
+     <b>Webhdfs &quot;SETOWNER&quot; call returns incorrect content-type</b><br>
+     <blockquote>The SETOWNER call returns an empty body. But the header has &quot;Content-Type: application/json&quot;, which is a contradiction (empty string is not valid json). This appears to happen for SETTIMES and SETPERMISSION as well.</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-3075">HDFS-3075</a>.
+     Major improvement reported by brandonli and fixed by brandonli (name-node)<br>
+     <b>Backport HADOOP-4885 to branch-1</b><br>
+     <blockquote>When a storage directory is inaccessible, namenode removes it from the valid storage dir list to a removedStorageDirs list. Those storage directories will not be restored when they become healthy again. <br><br>The proposed solution is to restore the previous failed directories at the beginning of checkpointing, say, rollEdits, by copying necessary metadata files from healthy directory to unhealthy ones. In this way, whenever a failed storage directory is recovered by the administrator, he/she can ...</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-3101">HDFS-3101</a>.
+     Major bug reported by wangzw and fixed by szetszwo (hdfs client)<br>
+     <b>cannot read empty file using webhdfs</b><br>
+     <blockquote>STEP:<br>1, create a new EMPTY file<br>2, read it using webhdfs.<br><br>RESULT:<br>expected: get a empty file<br>I got: {&quot;RemoteException&quot;:{&quot;exception&quot;:&quot;IOException&quot;,&quot;javaClassName&quot;:&quot;java.io.IOException&quot;,&quot;message&quot;:&quot;Offset=0 out of the range [0, 0); OPEN, path=/testFile&quot;}}<br><br>First of all, [0, 0) is not a valid range, and I think read a empty file should be OK.</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-764">MAPREDUCE-764</a>.
+     Blocker bug reported by klbostee and fixed by klbostee (contrib/streaming)<br>
+     <b>TypedBytesInput&apos;s readRaw() does not preserve custom type codes</b><br>
+     <blockquote>The typed bytes format supports byte sequences of the form {{&lt;custom type code&gt; &lt;length&gt; &lt;bytes&gt;}}. When reading such a sequence via {{TypedBytesInput}}&apos;s {{readRaw()}} method, however, the returned sequence currently is {{0 &lt;length&gt; &lt;bytes&gt;}} (0 is the type code for a bytes array), which leads to bugs such as the one described [here|http://dumbo.assembla.com/spaces/dumbo/tickets/54].</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-3583">MAPREDUCE-3583</a>.
+     Critical bug reported by zhihyu@ebaysf.com and fixed by zhihyu@ebaysf.com <br>
+     <b>ProcfsBasedProcessTree#constructProcessInfo() may throw NumberFormatException</b><br>
+     <blockquote>HBase PreCommit builds frequently gave us NumberFormatException.<br><br>From https://builds.apache.org/job/PreCommit-HBASE-Build/553//testReport/org.apache.hadoop.hbase.mapreduce/TestHFileOutputFormat/testMRIncrementalLoad/:<br>{code}<br>2011-12-20 01:44:01,180 WARN  [main] mapred.JobClient(784): No job jar file set.  User classes may not be found. See JobConf(Class) or JobConf#setJar(String).<br>java.lang.NumberFormatException: For input string: &quot;18446743988060683582&quot;<br>	at java.lang.NumberFormatException.fo...</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-3773">MAPREDUCE-3773</a>.
+     Major new feature reported by owen.omalley and fixed by owen.omalley (jobtracker)<br>
+     <b>Add queue metrics with buckets for job run times</b><br>
+     <blockquote>It would be nice to have queue metrics that reflect the number of jobs in each queue that have been running for different ranges of time.<br><br>Reasonable time ranges are probably 0-1 hr, 1-5 hr, 5-24 hr, 24+ hrs; but they should be configurable.</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-3824">MAPREDUCE-3824</a>.
+     Critical bug reported by aw and fixed by tgraves (distributed-cache)<br>
+     <b>Distributed caches are not removed properly</b><br>
+     <blockquote>Distributed caches are not being properly removed by the TaskTracker when they are expected to be expired. </blockquote></li>
+
+</ul>
+
+<h2>Changes since Hadoop 1.0.0</h2>
+
+<h3>Jiras with Release Notes (describe major or incompatible changes)</h3>
+<ul>
+
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-8009">HADOOP-8009</a>.
+     Critical improvement reported by tucu00 and fixed by tucu00 (build)<br>
+     <b>Create hadoop-client and hadoop-minicluster artifacts for downstream projects </b><br>
+     <blockquote>                    Generate integration artifacts &quot;org.apache.hadoop:hadoop-client&quot; and &quot;org.apache.hadoop:hadoop-minicluster&quot; containing all the jars needed to use Hadoop client APIs, and to run Hadoop MiniClusters, respectively.  Push these artifacts to the maven repository when mvn-deploy, along with existing artifacts. 
+</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-8037">HADOOP-8037</a>.
+     Blocker bug reported by mattf and fixed by gkesavan (build)<br>
+     <b>Binary tarball does not preserve platform info for native builds, and RPMs fail to provide needed symlinks for libhadoop.so</b><br>
+     <blockquote>                    This fix is marked &quot;incompatible&quot; only because it changes the bin-tarball directory structure to be consistent with the source tarball directory structure. The source tarball is unchanged. RPMs and DEBs now use an intermediate bin-tarball with an &quot;${os.arch}&quot; tag (like the packages themselves). The un-tagged bin-tarball is now multi-platform and retains the structure of the source tarball; it is in fact generated by target &quot;tar&quot;, not by target &quot;binary&quot;. Finally, in the 64-bit RPMs and DEBs, the native libs go in the &quot;lib64&quot; directory instead of &quot;lib&quot;.
+</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-3184">MAPREDUCE-3184</a>.
+     Major improvement reported by tlipcon and fixed by tlipcon (jobtracker)<br>
+     <b>Improve handling of fetch failures when a tasktracker is not responding on HTTP</b><br>
+     <blockquote>                    The TaskTracker now has a thread which monitors for a known Jetty bug in which the selector thread starts spinning and map output can no longer be served. If the bug is detected, the TaskTracker will shut itself down. This feature can be disabled by setting mapred.tasktracker.jetty.cpu.check.enabled to false.
+</blockquote></li>
+
+</ul>
+
+<h3>Other Jiras (describe bug fixes and minor changes)</h3>
+<ul>
+
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-7470">HADOOP-7470</a>.
+     Minor improvement reported by stevel@apache.org and fixed by enis (util)<br>
+     <b>move up to Jackson 1.8.8</b><br>
+     <blockquote>I see that hadoop-core still depends on Jackson 1.0.1 -but that project is now up to 1.8.2 in releases. Upgrading will make it easier for other Jackson-using apps that are more up to date to keep their classpath consistent.<br><br>The patch would be updating the ivy file to pull in the later version; no test</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-7960">HADOOP-7960</a>.
+     Major bug reported by gkesavan and fixed by mattf <br>
+     <b>Port HADOOP-5203 to branch-1, build version comparison is too restrictive</b><br>
+     <blockquote>hadoop services should not be using the build timestamp to verify version difference in the cluster installation. Instead it should use the source checksum as in HADOOP-5203.<br>  </blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-7964">HADOOP-7964</a>.
+     Blocker bug reported by kihwal and fixed by daryn (security, util)<br>
+     <b>Deadlock in class init.</b><br>
+     <blockquote>After HADOOP-7808, client-side commands hang occasionally. There are cyclic dependencies in NetUtils and SecurityUtil class initialization. Upon initial look at the stack trace, two threads deadlock when they hit the either of class init the same time.</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-7987">HADOOP-7987</a>.
+     Major improvement reported by devaraj and fixed by jnp (security)<br>
+     <b>Support setting the run-as user in unsecure mode</b><br>
+     <blockquote>Some applications need to be able to perform actions (such as launch MR jobs) from map or reduce tasks. In earlier unsecure versions of hadoop (20.x), it was possible to do this by setting user.name in the configuration. But in 20.205 and 1.0, when running in unsecure mode, this does not work. (In secure mode, you can do this using the kerberos credentials).</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-7988">HADOOP-7988</a>.
+     Major bug reported by jnp and fixed by jnp <br>
+     <b>Upper case in hostname part of the principals doesn&apos;t work with kerberos.</b><br>
+     <blockquote>Kerberos doesn&apos;t like upper case in the hostname part of the principals.<br>This issue has been seen in 23 as well as 1.0.</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-8010">HADOOP-8010</a>.
+     Minor bug reported by rvs and fixed by rvs (scripts)<br>
+     <b>hadoop-config.sh spews error message when HADOOP_HOME_WARN_SUPPRESS is set to true and HADOOP_HOME is present</b><br>
+     <blockquote>Running hadoop daemon commands when HADOOP_HOME_WARN_SUPPRESS is set to true and HADOOP_HOME is present produces:<br>{noformat}<br>  [: 76: true: unexpected operator<br>{noformat}</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-8052">HADOOP-8052</a>.
+     Major bug reported by reznor and fixed by reznor (metrics)<br>
+     <b>Hadoop Metrics2 should emit Float.MAX_VALUE (instead of Double.MAX_VALUE) to avoid making Ganglia&apos;s gmetad core</b><br>
+     <blockquote>Ganglia&apos;s gmetad converts the doubles emitted by Hadoop&apos;s Metrics2 system to strings, and the buffer it uses is 256 bytes wide.<br><br>When the SampleStat.MinMax class (in org.apache.hadoop.metrics2.util) emits its default min value (currently initialized to Double.MAX_VALUE), it ends up causing a buffer overflow in gmetad, which causes it to core, effectively rendering Ganglia useless (for some, the core is continuous; for others who are more fortunate, it&apos;s only a one-time Hadoop-startup-time thi...</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-2379">HDFS-2379</a>.
+     Critical bug reported by tlipcon and fixed by tlipcon (data-node)<br>
+     <b>0.20: Allow block reports to proceed without holding FSDataset lock</b><br>
+     <blockquote>As disks are getting larger and more plentiful, we&apos;re seeing DNs with multiple millions of blocks on a single machine. When page cache space is tight, block reports can take multiple minutes to generate. Currently, during the scanning of the data directories to generate a report, the FSVolumeSet lock is held. This causes writes and reads to block, timeout, etc, causing big problems especially for clients like HBase.<br><br>This JIRA is to explore some of the ideas originally discussed in HADOOP-458...</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-2814">HDFS-2814</a>.
+     Minor improvement reported by hitesh and fixed by hitesh <br>
+     <b>NamenodeMXBean does not account for svn revision in the version information</b><br>
+     <blockquote>Unlike the jobtracker where both the UI and jmx information report the version as &quot;x.y.z, r&lt;svn revision&quot;, in case of the namenode, the UI displays x.y.z and svn revision info but the jmx output only contains the x.y.z version.</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-3343">MAPREDUCE-3343</a>.
+     Major bug reported by ahmed.radwan and fixed by zhaoyunjiong (mrv1)<br>
+     <b>TaskTracker Out of Memory because of distributed cache</b><br>
+     <blockquote>This Out of Memory happens when you run large number of jobs (using the distributed cache) on a TaskTracker. <br><br>Seems the basic issue is with the distributedCacheManager (instance of TrackerDistributedCacheManager in TaskTracker.java), this gets created during TaskTracker.initialize(), and it keeps references to TaskDistributedCacheManager for every submitted job via the jobArchives Map, also references to CacheStatus via cachedArchives map. I am not seeing these cleaned up between jobs, so th...</blockquote></li>
+
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-3607">MAPREDUCE-3607</a>.
+     Major improvement reported by tomwhite and fixed by tomwhite (client)<br>
+     <b>Port missing new API mapreduce lib classes to 1.x</b><br>
+     <blockquote>There are a number of classes under mapreduce.lib that are not present in the 1.x series. Including these would help users and downstream projects using the new MapReduce API migrate to later versions of Hadoop in the future.<br><br>A few examples of where this would help:<br>* Sqoop uses mapreduce.lib.db.DBWritable and mapreduce.lib.input.CombineFileInputFormat (SQOOP-384).<br>* Mahout uses mapreduce.lib.output.MultipleOutputs (MAHOUT-822).<br>* HBase has a backport of mapreduce.lib.partition.InputSampler ...</blockquote></li>
+
+
+</ul>
+
+
 <h2>Changes since Hadoop 0.20.205.0</h2>
 
 <h3>Jiras with Release Notes (describe major or incompatible changes)</h3>

Modified: hadoop/common/branches/branch-1-win/src/examples/org/apache/hadoop/examples/Grep.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/src/examples/org/apache/hadoop/examples/Grep.java?rev=1362639&r1=1362638&r2=1362639&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/src/examples/org/apache/hadoop/examples/Grep.java (original)
+++ hadoop/common/branches/branch-1-win/src/examples/org/apache/hadoop/examples/Grep.java Tue Jul 17 20:36:07 2012
@@ -68,7 +68,7 @@ public class Grep extends Configured imp
 
       JobClient.runJob(grepJob);
 
-      JobConf sortJob = new JobConf(Grep.class);
+      JobConf sortJob = new JobConf(getConf(), Grep.class);
       sortJob.setJobName("grep-sort");
 
       FileInputFormat.setInputPaths(sortJob, tempDir);

Modified: hadoop/common/branches/branch-1-win/src/hdfs/org/apache/hadoop/hdfs/DFSClient.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/src/hdfs/org/apache/hadoop/hdfs/DFSClient.java?rev=1362639&r1=1362638&r2=1362639&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/src/hdfs/org/apache/hadoop/hdfs/DFSClient.java (original)
+++ hadoop/common/branches/branch-1-win/src/hdfs/org/apache/hadoop/hdfs/DFSClient.java Tue Jul 17 20:36:07 2012
@@ -534,7 +534,7 @@ public class DFSClient implements FSCons
     return hints;
   }
 
-  private static LocatedBlocks callGetBlockLocations(ClientProtocol namenode,
+  static LocatedBlocks callGetBlockLocations(ClientProtocol namenode,
       String src, long start, long length) throws IOException {
     try {
       return namenode.getBlockLocations(src, start, length);
@@ -931,25 +931,28 @@ public class DFSClient implements FSCons
      
       boolean done = false;
       for(int j = 0; !done && j < datanodes.length; j++) {
-        //connect to a datanode
-        final Socket sock = socketFactory.createSocket();
-        NetUtils.connect(sock, 
-                         NetUtils.createSocketAddr(datanodes[j].getName()),
-                         timeout);
-        sock.setSoTimeout(timeout);
-
-        DataOutputStream out = new DataOutputStream(
-            new BufferedOutputStream(NetUtils.getOutputStream(sock), 
-                                     DataNode.SMALL_BUFFER_SIZE));
-        DataInputStream in = new DataInputStream(NetUtils.getInputStream(sock));
-
-        // get block MD5
+        Socket sock = null;
+        DataOutputStream out = null;
+        DataInputStream in = null;
+        
         try {
+          //connect to a datanode
+          sock = socketFactory.createSocket();
+          NetUtils.connect(sock,
+              NetUtils.createSocketAddr(datanodes[j].getName()), timeout);
+          sock.setSoTimeout(timeout);
+
+          out = new DataOutputStream(
+              new BufferedOutputStream(NetUtils.getOutputStream(sock), 
+                                       DataNode.SMALL_BUFFER_SIZE));
+          in = new DataInputStream(NetUtils.getInputStream(sock));
+
           if (LOG.isDebugEnabled()) {
             LOG.debug("write to " + datanodes[j].getName() + ": "
-                + DataTransferProtocol.OP_BLOCK_CHECKSUM +
-                ", block=" + block);
+                + DataTransferProtocol.OP_BLOCK_CHECKSUM + ", block=" + block);
           }
+
+          // get block MD5
           out.writeShort(DataTransferProtocol.DATA_TRANSFER_VERSION);
           out.write(DataTransferProtocol.OP_BLOCK_CHECKSUM);
           out.writeLong(block.getBlockId());

Modified: hadoop/common/branches/branch-1-win/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java?rev=1362639&r1=1362638&r2=1362639&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java (original)
+++ hadoop/common/branches/branch-1-win/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java Tue Jul 17 20:36:07 2012
@@ -69,6 +69,9 @@ class FSDirectory implements FSConstants
         ns.createFsOwnerPermissions(new FsPermission((short)0755)),
         Integer.MAX_VALUE, -1);
     this.fsImage = fsImage;
+    fsImage.setRestoreRemovedDirs(conf.getBoolean(
+        DFSConfigKeys.DFS_NAMENODE_NAME_DIR_RESTORE_KEY,
+        DFSConfigKeys.DFS_NAMENODE_NAME_DIR_RESTORE_DEFAULT));
     namesystem = ns;
     int configuredLimit = conf.getInt(
         DFSConfigKeys.DFS_LIST_LIMIT, DFSConfigKeys.DFS_LIST_LIMIT_DEFAULT);
@@ -340,7 +343,12 @@ class FSDirectory implements FSConstants
       NameNode.stateChangeLog.debug("DIR* FSDirectory.addFile: "
                                     +path+" with "+block
                                     +" block is added to the file system");
+      // update space consumed
+      INode[] pathINodes = getExistingPathINodes(path);
+      updateCount(pathINodes, pathINodes.length-1, 0,
+          -fileNode.getPreferredBlockSize()*fileNode.getReplication(), true);
     }
+
     return true;
   }
 

Modified: hadoop/common/branches/branch-1-win/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java?rev=1362639&r1=1362638&r2=1362639&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java (original)
+++ hadoop/common/branches/branch-1-win/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java Tue Jul 17 20:36:07 2012
@@ -29,6 +29,7 @@ import java.io.IOException;
 import java.io.RandomAccessFile;
 import java.util.ArrayList;
 import java.util.Iterator;
+import java.util.LinkedList;
 import java.lang.Math;
 import java.nio.channels.FileChannel;
 import java.nio.ByteBuffer;
@@ -428,6 +429,7 @@ public class FSEditLog {
     
     File dir = getStorageDirForStream(idx);
     editStreams.remove(idx);
+    exitIfNoStreams();
     fsimage.removeStorageDir(dir);
   }
 
@@ -446,6 +448,7 @@ public class FSEditLog {
         editStreams.remove(idx);
       }
     }
+    exitIfNoStreams();
   }
   
   /**
@@ -957,69 +960,86 @@ public class FSEditLog {
     // Fetch the transactionId of this thread. 
     long mytxid = myTransactionId.get().txid;
 
-    final int numEditStreams;
-    synchronized (this) {
-      numEditStreams = editStreams.size();
-      assert numEditStreams > 0 : "no editlog streams";
-      printStatistics(false);
+    ArrayList<EditLogOutputStream> streams = new ArrayList<EditLogOutputStream>();
+    boolean sync = false;
+    try {
+      synchronized (this) {
+        printStatistics(false);
 
-      // if somebody is already syncing, then wait
-      while (mytxid > synctxid && isSyncRunning) {
-        try {
-          wait(1000);
-        } catch (InterruptedException ie) { 
+        // if somebody is already syncing, then wait
+        while (mytxid > synctxid && isSyncRunning) {
+          try {
+            wait(1000);
+          } catch (InterruptedException ie) { 
+          }
         }
-      }
-
-      //
-      // If this transaction was already flushed, then nothing to do
-      //
-      if (mytxid <= synctxid) {
-        numTransactionsBatchedInSync++;
-        if (metrics != null) // Metrics is non-null only when used inside name node
-          metrics.incrTransactionsBatchedInSync();
-        return;
-      }
-   
-      // now, this thread will do the sync
-      syncStart = txid;
-      isSyncRunning = true;   
-
-      // swap buffers
-      for (int idx = 0; idx < numEditStreams; idx++) {
-        editStreams.get(idx).setReadyToFlush();
-      }
-    }
 
-    // do the sync
-    long start = FSNamesystem.now();
-    for (int idx = 0; idx < numEditStreams; idx++) {
-      EditLogOutputStream eStream = editStreams.get(idx);
-      try {
-        eStream.flush();
-      } catch (IOException ioe) {
         //
-        // remember the streams that encountered an error.
+        // If this transaction was already flushed, then nothing to do
         //
-        if (errorStreams == null) {
-          errorStreams = new ArrayList<EditLogOutputStream>(1);
+        if (mytxid <= synctxid) {
+          numTransactionsBatchedInSync++;
+          if (metrics != null) // Metrics is non-null only when used inside name node
+            metrics.incrTransactionsBatchedInSync();
+          return;
+        }
+
+        // now, this thread will do the sync
+        syncStart = txid;
+        isSyncRunning = true;
+        sync = true;
+
+        // swap buffers
+        exitIfNoStreams();
+        for(EditLogOutputStream eStream : editStreams) {
+          try {
+            eStream.setReadyToFlush();
+            streams.add(eStream);
+          } catch (IOException ie) {
+            FSNamesystem.LOG.error("Unable to get ready to flush.", ie);
+            //
+            // remember the streams that encountered an error.
+            //
+            if (errorStreams == null) {
+              errorStreams = new ArrayList<EditLogOutputStream>(1);
+            }
+            errorStreams.add(eStream);
+          }
         }
-        errorStreams.add(eStream);
-        FSNamesystem.LOG.error("Unable to sync "+eStream.getName());
       }
-    }
-    long elapsed = FSNamesystem.now() - start;
 
-    synchronized (this) {
-       removeEditsStreamsAndStorageDirs(errorStreams);
-       exitIfNoStreams();
-       synctxid = syncStart;
-       isSyncRunning = false;
-       this.notifyAll();
-    }
+      // do the sync
+      long start = FSNamesystem.now();
+      for (EditLogOutputStream eStream : streams) {
+        try {
+          eStream.flush();
+        } catch (IOException ie) {
+          FSNamesystem.LOG.error("Unable to sync edit log.", ie);
+          //
+          // remember the streams that encountered an error.
+          //
+          if (errorStreams == null) {
+            errorStreams = new ArrayList<EditLogOutputStream>(1);
+          }
+          errorStreams.add(eStream);
+        }
+      }
+      long elapsed = FSNamesystem.now() - start;
+      removeEditsStreamsAndStorageDirs(errorStreams);
+      exitIfNoStreams();
 
-    if (metrics != null) // Metrics is non-null only when used inside name node
-      metrics.addSync(elapsed);
+      if (metrics != null) // Metrics is non-null only when used inside name node
+        metrics.addSync(elapsed);
+
+    } finally {
+      synchronized (this) {
+        if(sync) {
+          synctxid = syncStart;
+          isSyncRunning = false;
+        }
+        this.notifyAll();
+      }
+    }
   }
 
   //
@@ -1233,22 +1253,30 @@ public class FSEditLog {
     if (existsNew()) {
       Iterator<StorageDirectory> it =
         fsimage.dirIterator(NameNodeDirType.EDITS);
+      StringBuilder b = new StringBuilder();
       while (it.hasNext()) {
         File editsNew = getEditNewFile(it.next());
+        b.append("\n  ").append(editsNew);
         if (!editsNew.exists()) {
           throw new IOException(
               "Inconsistent existence of edits.new " + editsNew);
         }
       }
-      return; // nothing to do, edits.new exists!
+      FSNamesystem.LOG.warn("Cannot roll edit log," +
+          " edits.new files already exists in all healthy directories:" + b);
+      return;
     }
-
     close(); // close existing edit log
 
+    // After edit streams are closed, healthy edits files should be identical,
+    // and same to fsimage files
+    fsimage.restoreStorageDirs();
+    
     //
     // Open edits.new
     //
     Iterator<StorageDirectory> it = fsimage.dirIterator(NameNodeDirType.EDITS);
+    LinkedList<StorageDirectory> toRemove = new LinkedList<StorageDirectory>();
     while (it.hasNext()) {
       StorageDirectory sd = it.next();
       try {
@@ -1257,11 +1285,19 @@ public class FSEditLog {
         eStream.create();
         editStreams.add(eStream);
       } catch (IOException ioe) {
-        removeEditsForStorageDir(sd);
-        fsimage.updateRemovedDirs(sd, ioe);
+        FSImage.LOG.error("error retrying to reopen storage directory '" +
+            sd.getRoot().getAbsolutePath() + "'", ioe);
+        toRemove.add(sd);
         it.remove();
       }
     }
+
+    // updateRemovedDirs will abort the NameNode if it removes the last
+    // valid edit log directory.
+    for (StorageDirectory sd : toRemove) {
+      removeEditsForStorageDir(sd);
+      fsimage.updateRemovedDirs(sd);
+    }
     exitIfNoStreams();
   }
 
@@ -1294,7 +1330,7 @@ public class FSEditLog {
         if (!getEditNewFile(sd).renameTo(getEditFile(sd))) {
           sd.unlock();
           removeEditsForStorageDir(sd);
-          fsimage.updateRemovedDirs(sd, null);
+          fsimage.updateRemovedDirs(sd);
           it.remove();
         }
       }

Modified: hadoop/common/branches/branch-1-win/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java?rev=1362639&r1=1362638&r2=1362639&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java (original)
+++ hadoop/common/branches/branch-1-win/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java Tue Jul 17 20:36:07 2012
@@ -28,39 +28,41 @@ import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
 import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Date;
+import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Map;
 import java.util.Properties;
 import java.util.Random;
-import java.util.Map;
-import java.util.HashMap;
-import java.lang.Math;
-import java.nio.ByteBuffer;
 
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.permission.PermissionStatus;
 import org.apache.hadoop.fs.permission.FsPermission;
-import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.permission.PermissionStatus;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.protocol.FSConstants;
 import org.apache.hadoop.hdfs.server.common.HdfsConstants.NodeType;
 import org.apache.hadoop.hdfs.server.common.HdfsConstants.StartupOption;
-import org.apache.hadoop.io.UTF8;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.hdfs.server.namenode.NameNode;
-import org.apache.hadoop.hdfs.server.namenode.BlocksMap.BlockInfo;
-import org.apache.hadoop.hdfs.server.namenode.FSEditLog.EditLogFileInputStream;
 import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
 import org.apache.hadoop.hdfs.server.common.Storage;
 import org.apache.hadoop.hdfs.server.common.StorageInfo;
 import org.apache.hadoop.hdfs.server.common.UpgradeManager;
+import org.apache.hadoop.hdfs.server.namenode.BlocksMap.BlockInfo;
+import org.apache.hadoop.hdfs.server.namenode.FSEditLog.EditLogFileInputStream;
 import org.apache.hadoop.hdfs.util.AtomicFileOutputStream;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.io.MultipleIOException;
+import org.apache.hadoop.io.UTF8;
+import org.apache.hadoop.io.Writable;
 
 /**
  * FSImage handles checkpointing and logging of the namespace edits.
@@ -138,6 +140,9 @@ public class FSImage extends Storage {
   static private final FsPermission FILE_PERM = new FsPermission((short)0);
   static private final byte[] PATH_SEPARATOR = DFSUtil.string2Bytes(Path.SEPARATOR);
 
+  /** Flag to restore removed storage directories at checkpointing */
+  private boolean restoreRemovedDirs = DFSConfigKeys.DFS_NAMENODE_NAME_DIR_RESTORE_DEFAULT;
+
   /**
    */
   FSImage() {
@@ -215,6 +220,11 @@ public class FSImage extends Storage {
     removedStorageDirs.add(sd);
   }
 
+  void updateRemovedDirs(StorageDirectory sd) {
+    LOG.warn("Removing storage dir " + sd.getRoot().getPath());
+    removedStorageDirs.add(sd);
+  }
+
   File getEditFile(StorageDirectory sd) {
     return getImageFile(sd, NameNodeFile.EDITS);
   }
@@ -639,8 +649,9 @@ public class FSImage extends Storage {
     while (it.hasNext()) {
       StorageDirectory sd = it.next();
       if (sd.getRoot().getPath().equals(dir.getPath())) {
-        updateRemovedDirs(sd, null);
+        updateRemovedDirs(sd);
         it.remove();
+        editLog.removeEditsForStorageDir(sd);
       }
     }
   }
@@ -1215,7 +1226,96 @@ public class FSImage extends Storage {
       newID = r.nextInt(0x7FFFFFFF);  // use 31 bits only
     return newID;
   }
+  
+  void setRestoreRemovedDirs(boolean allow) {
+    this.restoreRemovedDirs = allow;
+  }  
+  
+  /** restore a metadata file */
+  private static void restoreFile(File src, File dstdir, String dstfile)
+      throws IOException {
+    File dst = new File(dstdir, dstfile);
+    IOUtils.copyBytes(new FileInputStream(src), new FileOutputStream(dst),
+        DFSConfigKeys.DFS_STREAM_BUFFER_SIZE_DEFAULT, true);
+  }
+
+  /** 
+   * Refresh storage dirs by copying files from good storage dir
+   */
+  void restoreStorageDirs() {
+    if (!restoreRemovedDirs || getRemovedStorageDirs().isEmpty()) {
+      return;
+    }
+    
+    Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.EDITS);
+    if (!it.hasNext()) {
+      FSNamesystem.LOG.warn("No healthy edits directory");
+      return;
+    }
+    StorageDirectory goodSd = it.next();
+    File goodEdits = getEditFile(goodSd);
+
+    it = dirIterator(NameNodeDirType.IMAGE);
+    if (!it.hasNext()) {
+      FSNamesystem.LOG.warn("No healthy fsimage directory");
+      return;
+    }
+    goodSd = it.next();
+    File goodImage = getImageFile(goodSd, NameNodeFile.IMAGE);
+    File goodFstime = getImageFile(goodSd, NameNodeFile.TIME);
+    File goodVersion = goodSd.getVersionFile();
+    //for Hadoop version < 0.13 to fail to start
+    File goodImage013 = new File(goodSd.getRoot(), "image/fsimage");
+
+    for (Iterator<StorageDirectory> i = removedStorageDirs.iterator();
+        i.hasNext();) {
+      StorageDirectory sd = i.next();
+      FSNamesystem.LOG.info("Try to recover removed directory " + sd.getRoot()
+          + " by reformatting");
+      try {
+        // don't create dir if it doesn't exist, since it may should be mounted
+        if (!sd.getRoot().exists()) {
+          throw new IOException("Directory " + sd.getRoot() + "doesn't exist"); 
+        }
+        if (!FileUtil.fullyDeleteContents(sd.getRoot())) {
+          throw new IOException("Can't fully delete content of " + sd.getRoot());
+        }
+        sd.clearDirectory(); // create empty "current" dir
+        restoreFile(goodVersion, sd.getCurrentDir(), Storage.STORAGE_FILE_VERSION);
+        restoreFile(goodFstime, sd.getCurrentDir(), NameNodeFile.TIME.getName());
+
+        // Create image directory
+        File imageDir = new File(sd.getRoot(), "image");
+        if (!imageDir.mkdir()) {
+          throw new IOException("Can't make directory 'image'.");
+        }
+        restoreFile(goodImage013, imageDir, NameNodeFile.IMAGE.getName());
 
+        if (sd.getStorageDirType().equals(NameNodeDirType.EDITS)) {
+          restoreFile(goodEdits, sd.getCurrentDir(), NameNodeFile.EDITS.getName());
+        } else if (sd.getStorageDirType().equals(NameNodeDirType.IMAGE)) {
+          restoreFile(goodImage, sd.getCurrentDir(), NameNodeFile.IMAGE.getName());
+        } else if (sd.getStorageDirType().equals(
+            NameNodeDirType.IMAGE_AND_EDITS)) {
+          restoreFile(goodEdits, sd.getCurrentDir(), NameNodeFile.EDITS.getName());
+          restoreFile(goodImage, sd.getCurrentDir(), NameNodeFile.IMAGE.getName());
+        } else {
+          throw new IOException("Invalid NameNodeDirType: "
+              + sd.getStorageDirType());
+        }
+        
+        //remove from removedStorageDirs and add back to healthy. 
+        i.remove();
+        addStorageDir(new StorageDirectory(sd.getRoot(), sd.getStorageDirType()));
+      } catch (IOException e) {
+        FSNamesystem.LOG.warn("Failed to recover removed directory "
+            + sd.getRoot() + " with " + e);
+        //ignore restore exception
+      }
+    }
+  }
+  
+  
   /** Create new dfs name directory.  Caution: this destroys all files
    * in this filesystem. */
   void format(StorageDirectory sd) throws IOException {
@@ -1462,7 +1562,7 @@ public class FSImage extends Storage {
         curFile.delete();
         if (!ckpt.renameTo(curFile)) {
           editLog.removeEditsForStorageDir(sd);
-          updateRemovedDirs(sd, null);
+          updateRemovedDirs(sd);
           it.remove();
         }
       }

Modified: hadoop/common/branches/branch-1-win/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1362639&r1=1362638&r2=1362639&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java (original)
+++ hadoop/common/branches/branch-1-win/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Tue Jul 17 20:36:07 2012
@@ -83,6 +83,8 @@ import org.apache.hadoop.hdfs.security.t
 import org.apache.hadoop.hdfs.server.common.GenerationStamp;
 import org.apache.hadoop.hdfs.server.common.HdfsConstants;
 import org.apache.hadoop.hdfs.server.common.HdfsConstants.StartupOption;
+import org.apache.hadoop.hdfs.server.common.Storage.StorageDirType;
+import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
 import org.apache.hadoop.hdfs.server.common.Storage;
 import org.apache.hadoop.hdfs.server.common.UpgradeStatusReport;
 import org.apache.hadoop.hdfs.server.namenode.BlocksMap.BlockInfo;
@@ -5799,6 +5801,30 @@ public class FSNamesystem implements FSC
     return JSON.toString(info);
   }
 
+  @Override  // NameNodeMXBean
+  public String getNameDirStatuses() {
+    Map<String, Map<File, StorageDirType>> statusMap =
+      new HashMap<String, Map<File, StorageDirType>>();
+    
+    Map<File, StorageDirType> activeDirs = new HashMap<File, StorageDirType>();
+    for (Iterator<StorageDirectory> it
+        = getFSImage().dirIterator(); it.hasNext();) {
+      StorageDirectory st = it.next();
+      activeDirs.put(st.getRoot(), st.getStorageDirType());
+    }
+    statusMap.put("active", activeDirs);
+    
+    List<Storage.StorageDirectory> removedStorageDirs
+        = getFSImage().getRemovedStorageDirs();
+    Map<File, StorageDirType> failedDirs = new HashMap<File, StorageDirType>();
+    for (StorageDirectory st : removedStorageDirs) {
+      failedDirs.put(st.getRoot(), st.getStorageDirType());
+    }
+    statusMap.put("failed", failedDirs);
+    
+    return JSON.toString(statusMap);
+  }
+
   private long getLastContact(DatanodeDescriptor alivenode) {
     return (System.currentTimeMillis() - alivenode.getLastUpdate())/1000;
   }

Modified: hadoop/common/branches/branch-1-win/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NameNodeMXBean.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NameNodeMXBean.java?rev=1362639&r1=1362638&r2=1362639&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NameNodeMXBean.java (original)
+++ hadoop/common/branches/branch-1-win/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NameNodeMXBean.java Tue Jul 17 20:36:07 2012
@@ -135,4 +135,12 @@ public interface NameNodeMXBean {
    * @return the decommissioning node information
    */
   public String getDecomNodes();
+
+  /**
+   * Get status information about the directories storing image and edits logs
+   * of the NN.
+   * 
+   * @return the name dir status information, as a JSON string.
+   */
+  public String getNameDirStatuses();
 }

Modified: hadoop/common/branches/branch-1-win/src/hdfs/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/src/hdfs/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java?rev=1362639&r1=1362638&r2=1362639&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/src/hdfs/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java (original)
+++ hadoop/common/branches/branch-1-win/src/hdfs/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java Tue Jul 17 20:36:07 2012
@@ -137,9 +137,11 @@ public class NamenodeWebHdfsMethods {
         throw new FileNotFoundException("File " + path + " not found.");
       }
       final long len = status.getLen();
-      if (op == GetOpParam.Op.OPEN && (openOffset < 0L || openOffset >= len)) {
-        throw new IOException("Offset=" + openOffset + " out of the range [0, "
-          + len + "); " + op + ", path=" + path);
+      if (op == GetOpParam.Op.OPEN) {
+        if (openOffset < 0L || (openOffset >= len && len > 0)) {
+          throw new IOException("Offset=" + openOffset
+              + " out of the range [0, " + len + "); " + op + ", path=" + path);
+        }
       }
 
       if (len > 0) {

Modified: hadoop/common/branches/branch-1-win/src/hdfs/org/apache/hadoop/hdfs/tools/DelegationTokenFetcher.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/src/hdfs/org/apache/hadoop/hdfs/tools/DelegationTokenFetcher.java?rev=1362639&r1=1362638&r2=1362639&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/src/hdfs/org/apache/hadoop/hdfs/tools/DelegationTokenFetcher.java (original)
+++ hadoop/common/branches/branch-1-win/src/hdfs/org/apache/hadoop/hdfs/tools/DelegationTokenFetcher.java Tue Jul 17 20:36:07 2012
@@ -56,6 +56,11 @@ import org.apache.hadoop.util.GenericOpt
  */
 public class DelegationTokenFetcher {
   
+  static{
+    Configuration.addDefaultResource("hdfs-default.xml");
+    Configuration.addDefaultResource("hdfs-site.xml");
+  }
+
   private static final Log LOG = 
     LogFactory.getLog(DelegationTokenFetcher.class);
   private static final String WEBSERVICE = "webservice";

Propchange: hadoop/common/branches/branch-1-win/src/mapred/
------------------------------------------------------------------------------
  Merged /hadoop/common/branches/branch-1/src/mapred:r1301804,1302058,1302720,1303017,1303027,1325636,1331064,1333564

Modified: hadoop/common/branches/branch-1-win/src/mapred/org/apache/hadoop/mapred/JobClient.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/src/mapred/org/apache/hadoop/mapred/JobClient.java?rev=1362639&r1=1362638&r2=1362639&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/src/mapred/org/apache/hadoop/mapred/JobClient.java (original)
+++ hadoop/common/branches/branch-1-win/src/mapred/org/apache/hadoop/mapred/JobClient.java Tue Jul 17 20:36:07 2012
@@ -878,8 +878,6 @@ public class JobClient extends Configure
           }
           JobContext context = new JobContext(jobCopy, jobId);
 
-          jobCopy = (JobConf)context.getConfiguration();
-
           // Check the output specification
           if (reduces == 0 ? jobCopy.getUseNewMapper() : 
             jobCopy.getUseNewReducer()) {
@@ -890,6 +888,8 @@ public class JobClient extends Configure
           } else {
             jobCopy.getOutputFormat().checkOutputSpecs(fs, jobCopy);
           }
+          
+          jobCopy = (JobConf)context.getConfiguration();
 
           // Create the splits for the job
           FileSystem fs = submitJobDir.getFileSystem(jobCopy);

Modified: hadoop/common/branches/branch-1-win/src/mapred/org/apache/hadoop/mapred/JobInProgress.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/src/mapred/org/apache/hadoop/mapred/JobInProgress.java?rev=1362639&r1=1362638&r2=1362639&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/src/mapred/org/apache/hadoop/mapred/JobInProgress.java (original)
+++ hadoop/common/branches/branch-1-win/src/mapred/org/apache/hadoop/mapred/JobInProgress.java Tue Jul 17 20:36:07 2012
@@ -733,7 +733,7 @@ public class JobInProgress {
     if (numMapTasks > 0) { 
       nonRunningMapCache = createCache(splits, maxLevel);
     }
-        
+
     // set the launch time
     this.launchTime = jobtracker.getClock().getTime();
 
@@ -790,12 +790,15 @@ public class JobInProgress {
     
     synchronized(jobInitKillStatus){
       jobInitKillStatus.initDone = true;
+
+      // set this before the throw to make sure cleanup works properly
+      tasksInited = true;
+
       if(jobInitKillStatus.killed) {
         throw new KillInterruptedException("Job " + jobId + " killed in init");
       }
     }
     
-    tasksInited = true;
     JobHistory.JobInfo.logInited(profile.getJobID(), this.launchTime, 
                                  numMapTasks, numReduceTasks);
     
@@ -3205,11 +3208,16 @@ public class JobInProgress {
       // Cancel task tracker reservation
       cancelReservedSlots();
 
+      //  Waiting metrics are incremented in JobInProgress.initTasks()
+      //  If a job gets an exception before that, we do not want to
+      //  incorrectly decrement.
+      if (tasksInited) {
+        jobtracker.getInstrumentation().decWaitingMaps(getJobID(), pendingMaps());
+        jobtracker.getInstrumentation().decWaitingReduces(getJobID(), pendingReduces());
+        this.queueMetrics.decWaitingMaps(getJobID(), pendingMaps());
+        this.queueMetrics.decWaitingReduces(getJobID(), pendingReduces());
+      }
       // Let the JobTracker know that a job is complete
-      jobtracker.getInstrumentation().decWaitingMaps(getJobID(), pendingMaps());
-      jobtracker.getInstrumentation().decWaitingReduces(getJobID(), pendingReduces());
-      this.queueMetrics.decWaitingMaps(getJobID(), pendingMaps());
-      this.queueMetrics.decWaitingReduces(getJobID(), pendingReduces());
       jobtracker.storeCompletedJob(this);
       jobtracker.finalizeJob(this);
 
@@ -3500,7 +3508,8 @@ public class JobInProgress {
           .add("mapSlotSeconds", mapSlotSeconds)
           .add("reduceSlotsSeconds", reduceSlotSeconds)
           .add("clusterMapCapacity", cluster.getMaxMapTasks())
-          .add("clusterReduceCapacity", cluster.getMaxReduceTasks());
+          .add("clusterReduceCapacity", cluster.getMaxReduceTasks())
+          .add("jobName", profile.getJobName());
 
       LOG.info(summary);
     }

Modified: hadoop/common/branches/branch-1-win/src/mapred/org/apache/hadoop/mapred/LinuxTaskController.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/src/mapred/org/apache/hadoop/mapred/LinuxTaskController.java?rev=1362639&r1=1362638&r2=1362639&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/src/mapred/org/apache/hadoop/mapred/LinuxTaskController.java (original)
+++ hadoop/common/branches/branch-1-win/src/mapred/org/apache/hadoop/mapred/LinuxTaskController.java Tue Jul 17 20:36:07 2012
@@ -190,7 +190,8 @@ class LinuxTaskController extends TaskCo
     } catch (ExitCodeException e) {
       int exitCode = shExec.getExitCode();
       logOutput(shExec.getOutput());
-      throw new IOException("Job initialization failed (" + exitCode + ")", e);
+      throw new IOException("Job initialization failed (" + exitCode + 
+          ") with output: " + shExec.getOutput(), e);
     }
   }
 

Modified: hadoop/common/branches/branch-1-win/src/mapred/org/apache/hadoop/mapred/QueueMetrics.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/src/mapred/org/apache/hadoop/mapred/QueueMetrics.java?rev=1362639&r1=1362638&r2=1362639&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/src/mapred/org/apache/hadoop/mapred/QueueMetrics.java (original)
+++ hadoop/common/branches/branch-1-win/src/mapred/org/apache/hadoop/mapred/QueueMetrics.java Tue Jul 17 20:36:07 2012
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.mapred;
 
+import java.util.ArrayList;
+
 import org.apache.hadoop.metrics2.MetricsBuilder;
 import org.apache.hadoop.metrics2.MetricsSource;
 import org.apache.hadoop.metrics2.lib.MetricMutableCounterInt;
@@ -36,9 +38,14 @@ import org.apache.hadoop.metrics2.lib.De
  */
 @SuppressWarnings("deprecation")
 class QueueMetrics implements MetricsSource {
+
   private static final Log LOG =
     LogFactory.getLog(QueueMetrics.class);
 
+  public static final String BUCKET_PROPERTY = 
+    "mapred.queue.metrics.runtime.buckets";
+  private static final String DEFAULT_BUCKETS = "60,300,1440";
+
   final MetricsRegistry registry = new MetricsRegistry("Queue");
   final MetricMutableCounterInt mapsLaunched =
       registry.newCounter("maps_launched", "", 0);
@@ -76,6 +83,8 @@ class QueueMetrics implements MetricsSou
       registry.newCounter("maps_killed", "", 0);
   final MetricMutableCounterInt redsKilled =
       registry.newCounter("reduces_killed", "", 0);
+  final MetricMutableGaugeInt[] runningTime;
+  TimeBucketMetrics<JobID> runBuckets;
 
   final String sessionId;
   private String queueName;
@@ -85,13 +94,45 @@ class QueueMetrics implements MetricsSou
     sessionId = conf.get("session.id", "");
     registry.setContext("mapred").tag("sessionId", "", sessionId);
     registry.tag("Queue", "Metrics by queue", queueName);
+    runningTime = buildBuckets(conf);
   }
 
   public String getQueueName() {
     return this.queueName;
   }
 
+  private static ArrayList<Integer> parseInts(String value) {
+    ArrayList<Integer> result = new ArrayList<Integer>();
+    for(String word: value.split(",")) {
+      result.add(Integer.parseInt(word.trim()));
+    }
+    return result;
+  }
+
+  private MetricMutableGaugeInt[] buildBuckets(Configuration conf) {
+    ArrayList<Integer> buckets = 
+      parseInts(conf.get(BUCKET_PROPERTY, DEFAULT_BUCKETS));
+    MetricMutableGaugeInt[] result = 
+      new MetricMutableGaugeInt[buckets.size() + 1];
+    result[0] = registry.newGauge("running_0", "", 0);
+    long[] cuts = new long[buckets.size()];
+    for(int i=0; i < buckets.size(); ++i) {
+      result[i+1] = registry.newGauge("running_" + buckets.get(i), "", 0);
+      cuts[i] = buckets.get(i) * 1000 * 60; // covert from min to ms
+    }
+    this.runBuckets = new TimeBucketMetrics<JobID>(cuts);
+    return result;
+  }
+
+  private void updateRunningTime() {
+    int[] counts = runBuckets.getBucketCounts(System.currentTimeMillis());
+    for(int i=0; i < counts.length; ++i) {
+      runningTime[i].set(counts[i]); 
+    }
+  }
+
   public void getMetrics(MetricsBuilder builder, boolean all) {
+    updateRunningTime();
     registry.snapshot(builder.addRecord(registry.name()), all);
   }
 
@@ -181,10 +222,12 @@ class QueueMetrics implements MetricsSou
 
   public void addRunningJob(JobConf conf, JobID id) {
     jobsRunning.incr();
+    runBuckets.add(id, System.currentTimeMillis());
   }
 
   public void decRunningJob(JobConf conf, JobID id) {
     jobsRunning.decr();
+    runBuckets.remove(id);
   }
 
   public void killedMap(TaskAttemptID taskAttemptID) {



Mime
View raw message