hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From st...@apache.org
Subject svn commit: r945395 - in /hadoop/hbase/trunk: ./ core/src/main/java/org/apache/hadoop/hbase/client/ core/src/main/java/org/apache/hadoop/hbase/regionserver/ core/src/test/java/org/apache/hadoop/hbase/
Date Mon, 17 May 2010 21:36:03 GMT
Author: stack
Date: Mon May 17 21:36:02 2010
New Revision: 945395

URL: http://svn.apache.org/viewvc?rev=945395&view=rev
Log:
HBASE-2421 Put hangs for 10 retries on failed region servers; forward-port from branch

Modified:
    hadoop/hbase/trunk/CHANGES.txt
    hadoop/hbase/trunk/core/src/main/java/org/apache/hadoop/hbase/client/HConnection.java
    hadoop/hbase/trunk/core/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java
    hadoop/hbase/trunk/core/src/main/java/org/apache/hadoop/hbase/client/HTable.java
    hadoop/hbase/trunk/core/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
    hadoop/hbase/trunk/core/src/test/java/org/apache/hadoop/hbase/TestMultiParallelPut.java

Modified: hadoop/hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/CHANGES.txt?rev=945395&r1=945394&r2=945395&view=diff
==============================================================================
--- hadoop/hbase/trunk/CHANGES.txt (original)
+++ hadoop/hbase/trunk/CHANGES.txt Mon May 17 21:36:02 2010
@@ -325,6 +325,7 @@ Release 0.21.0 - Unreleased
    HBASE-2382  Don't rely on fs.getDefaultReplication() to roll HLogs
                (Nicolas Spiegelberg via Stack)  
    HBASE-2415  Disable META splitting in 0.20 (Todd Lipcon via Stack)
+   HBASE-2421  Put hangs for 10 retries on failed region servers
 
   IMPROVEMENTS
    HBASE-1760  Cleanup TODOs in HTable

Modified: hadoop/hbase/trunk/core/src/main/java/org/apache/hadoop/hbase/client/HConnection.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/core/src/main/java/org/apache/hadoop/hbase/client/HConnection.java?rev=945395&r1=945394&r2=945395&view=diff
==============================================================================
--- hadoop/hbase/trunk/core/src/main/java/org/apache/hadoop/hbase/client/HConnection.java
(original)
+++ hadoop/hbase/trunk/core/src/main/java/org/apache/hadoop/hbase/client/HConnection.java
Mon May 17 21:36:02 2010
@@ -192,7 +192,7 @@ public interface HConnection {
    * @throws IOException if a remote or network exception occurs
    * @throws RuntimeException other unspecified error
    */
-  public <T> T getRegionServerForWithoutRetries(ServerCallable<T> callable)
+  public <T> T getRegionServerWithoutRetries(ServerCallable<T> callable) 
   throws IOException, RuntimeException;
 
 

Modified: hadoop/hbase/trunk/core/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/core/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java?rev=945395&r1=945394&r2=945395&view=diff
==============================================================================
--- hadoop/hbase/trunk/core/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java
(original)
+++ hadoop/hbase/trunk/core/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java
Mon May 17 21:36:02 2010
@@ -680,7 +680,7 @@ public class HConnectionManager implemen
           // This block guards against two threads trying to load the meta
           // region at the same time. The first will load the meta region and
           // the second will use the value that the first one found.
-          synchronized(regionLockObject) {
+          synchronized (regionLockObject) {
             // Check the cache again for a hit in case some other thread made the
             // same query while we were waiting on the lock. If not supposed to
             // be using the cache, delete any existing cached location so it won't
@@ -1077,15 +1077,19 @@ public class HConnectionManager implemen
       return null;
     }
 
-    public <T> T getRegionServerForWithoutRetries(ServerCallable<T> callable)
+    public <T> T getRegionServerWithoutRetries(ServerCallable<T> callable)
         throws IOException, RuntimeException {
       try {
         callable.instantiateServer(false);
         return callable.call();
       } catch (Throwable t) {
-        t = translateException(t);
+        Throwable t2 = translateException(t);
+        if (t2 instanceof IOException) {
+          throw (IOException)t2;
+        } else {
+          throw new RuntimeException(t2);
+        }
       }
-      return null;
     }
 
     @SuppressWarnings({"ConstantConditions"})
@@ -1299,9 +1303,25 @@ public class HConnectionManager implemen
       }
     }
 
-    @SuppressWarnings({"ConstantConditions"})
+    /**
+     * Process a batch of Puts on the given executor service.
+     *
+     * @param list the puts to make - successful puts will be removed.
+     * @param pool thread pool to execute requests on
+     *
+     * In the case of an exception, we take different actions depending on the
+     * situation:
+     *  - If the exception is a DoNotRetryException, we rethrow it and leave the
+     *    'list' parameter in an indeterminate state.
+     *  - If the 'list' parameter is a singleton, we directly throw the specific
+     *    exception for that put.
+     *  - Otherwise, we throw a generic exception indicating that an error occurred.
+     *    The 'list' parameter is mutated to contain those puts that did not succeed.
+     */
     public void processBatchOfPuts(List<Put> list,
                                    final byte[] tableName, ExecutorService pool) throws IOException
{
+      boolean singletonList = list.size() == 1;
+      Throwable singleRowCause = null;
       for ( int tries = 0 ; tries < numRetries && !list.isEmpty(); ++tries) {
         Collections.sort(list);
         Map<HServerAddress, MultiPut> regionPuts =
@@ -1367,10 +1387,19 @@ public class HConnectionManager implemen
             LOG.debug("Failed all from " + request.address, e);
             failed.addAll(request.allPuts());
           } catch (ExecutionException e) {
-            System.out.println(e);
             // all go into the failed list.
             LOG.debug("Failed all from " + request.address, e);
             failed.addAll(request.allPuts());
+
+            // Just give up, leaving the batch put list in an untouched/semi-committed state
+            if (e.getCause() instanceof DoNotRetryIOException) {
+              throw (DoNotRetryIOException) e.getCause();
+            }
+
+            if (singletonList) {
+              // be richer for reporting in a 1 row case.
+              singleRowCause = e.getCause();
+            }
           }
         }
         list.clear();
@@ -1391,9 +1420,13 @@ public class HConnectionManager implemen
         }
       }
       if (!list.isEmpty()) {
+        if (singletonList && singleRowCause != null) {
+          throw new IOException(singleRowCause);
+        }
+
         // ran out of retries and didnt succeed everything!
         throw new RetriesExhaustedException("Still had " + list.size() + " puts left after
retrying " +
-            numRetries + " times. Should have detail on which Regions failed the most");
+            numRetries + " times.");
       }
     }
 
@@ -1404,7 +1437,7 @@ public class HConnectionManager implemen
       final HConnection connection = this;
       return new Callable<MultiPutResponse>() {
         public MultiPutResponse call() throws IOException {
-          return getRegionServerWithRetries(
+          return getRegionServerWithoutRetries(
               new ServerCallable<MultiPutResponse>(connection, tableName, null) {
                 public MultiPutResponse call() throws IOException {
                   MultiPutResponse resp = server.multiPut(puts);

Modified: hadoop/hbase/trunk/core/src/main/java/org/apache/hadoop/hbase/client/HTable.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/core/src/main/java/org/apache/hadoop/hbase/client/HTable.java?rev=945395&r1=945394&r2=945395&view=diff
==============================================================================
--- hadoop/hbase/trunk/core/src/main/java/org/apache/hadoop/hbase/client/HTable.java (original)
+++ hadoop/hbase/trunk/core/src/main/java/org/apache/hadoop/hbase/client/HTable.java Mon May
17 21:36:02 2010
@@ -542,8 +542,8 @@ public class HTable implements HTableInt
     } finally {
       // the write buffer was adjusted by processBatchOfPuts
       currentWriteBufferSize = 0;
-      for (Put aWriteBuffer : writeBuffer) {
-        currentWriteBufferSize += aWriteBuffer.heapSize();
+      for (Put aPut : writeBuffer) {
+        currentWriteBufferSize += aPut.heapSize();
       }
     }
   }

Modified: hadoop/hbase/trunk/core/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/core/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=945395&r1=945394&r2=945395&view=diff
==============================================================================
--- hadoop/hbase/trunk/core/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
(original)
+++ hadoop/hbase/trunk/core/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
Mon May 17 21:36:02 2010
@@ -233,7 +233,7 @@ public class HRegionServer implements HC
 
   // Run HDFS shutdown on exit if this is set. We clear this out when
   // doing a restart() to prevent closing of HDFS.
-  private final AtomicBoolean shutdownHDFS = new AtomicBoolean(true);
+  public final AtomicBoolean shutdownHDFS = new AtomicBoolean(true);
 
   private final String machineName;
 

Modified: hadoop/hbase/trunk/core/src/test/java/org/apache/hadoop/hbase/TestMultiParallelPut.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/core/src/test/java/org/apache/hadoop/hbase/TestMultiParallelPut.java?rev=945395&r1=945394&r2=945395&view=diff
==============================================================================
--- hadoop/hbase/trunk/core/src/test/java/org/apache/hadoop/hbase/TestMultiParallelPut.java
(original)
+++ hadoop/hbase/trunk/core/src/test/java/org/apache/hadoop/hbase/TestMultiParallelPut.java
Mon May 17 21:36:02 2010
@@ -20,15 +20,15 @@
 
 package org.apache.hadoop.hbase;
 
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
 import org.apache.hadoop.hbase.client.HTable;
 import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Get;
 import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.HBaseAdmin;
 import org.apache.hadoop.hbase.util.Bytes;
 
-import java.util.List;
 import java.util.ArrayList;
+import java.util.List;
 
 public class TestMultiParallelPut extends MultiRegionTable {
   private static final byte[] VALUE = Bytes.toBytes("value");
@@ -58,7 +58,14 @@ public class TestMultiParallelPut extend
 
   List<byte[]> keys = new ArrayList<byte[]>();
 
-  public void testMultiPut() throws Exception {
+  public void testParallelPut() throws Exception {
+    doATest(false);
+  }
+  public void testParallelPutWithRSAbort() throws Exception {
+    doATest(true);
+  }
+
+  public void doATest(boolean doAbort) throws Exception {
 
     HTable table = new HTable(TEST_TABLE);
     table.setAutoFlush(false);
@@ -73,6 +80,19 @@ public class TestMultiParallelPut extend
 
     table.flushCommits();
 
+    if (doAbort) {
+      cluster.abortRegionServer(0);
+
+      // try putting more keys after the abort.
+      for ( byte [] k : keys ) {
+        Put put = new Put(k);
+        put.add(BYTES_FAMILY, QUALIFIER, VALUE);
+
+        table.put(put);
+      }
+      table.flushCommits();
+    }
+
     for (byte [] k : keys ) {
       Get get = new Get(k);
       get.addColumn(BYTES_FAMILY, QUALIFIER);
@@ -88,10 +108,15 @@ public class TestMultiParallelPut extend
     HBaseAdmin admin = new HBaseAdmin(conf);
     ClusterStatus cs = admin.getClusterStatus();
 
-    assertEquals(2, cs.getServers());
+    int expectedServerCount = 2;
+    if (doAbort)
+      expectedServerCount = 1;
+
+    assertEquals(expectedServerCount, cs.getServers());
     for ( HServerInfo info : cs.getServerInfo()) {
       System.out.println(info);
       assertTrue( info.getLoad().getNumberOfRegions() > 10);
     }
   }
+
 }



Mime
View raw message