hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From st...@apache.org
Subject svn commit: r958153 - in /hbase/trunk: ./ src/assembly/ src/main/java/org/apache/hadoop/hbase/master/ src/test/java/org/apache/hadoop/hbase/ src/test/java/org/apache/hadoop/hbase/master/
Date Sat, 26 Jun 2010 00:05:17 GMT
Author: stack
Date: Sat Jun 26 00:05:17 2010
New Revision: 958153

URL: http://svn.apache.org/viewvc?rev=958153&view=rev
Log:
HBASE-2707 Can't recover from a dead ROOT server if any exceptions happens during log splitting

Added:
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestROOTAssignment.java
Modified:
    hbase/trunk/CHANGES.txt
    hbase/trunk/src/assembly/bin.xml
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/RegionServerOperationQueue.java
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterTransitions.java

Modified: hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/trunk/CHANGES.txt?rev=958153&r1=958152&r2=958153&view=diff
==============================================================================
--- hbase/trunk/CHANGES.txt (original)
+++ hbase/trunk/CHANGES.txt Sat Jun 26 00:05:17 2010
@@ -423,6 +423,8 @@ Release 0.21.0 - Unreleased
                no progress running YCSB on clean cluster startup                     
    HBASE-2785  TestScannerTimeout.test2772 is flaky
    HBASE-2787  PE is confused about flushCommits
+   HBASE-2707  Can't recover from a dead ROOT server if any exceptions happens
+               during log splitting
 
   IMPROVEMENTS
    HBASE-1760  Cleanup TODOs in HTable

Modified: hbase/trunk/src/assembly/bin.xml
URL: http://svn.apache.org/viewvc/hbase/trunk/src/assembly/bin.xml?rev=958153&r1=958152&r2=958153&view=diff
==============================================================================
--- hbase/trunk/src/assembly/bin.xml (original)
+++ hbase/trunk/src/assembly/bin.xml Sat Jun 26 00:05:17 2010
@@ -28,6 +28,7 @@
       <outputDirectory>/</outputDirectory>
       <includes>
           <include>hbase-${project.version}.jar</include>
+          <include>hbase-${project.version}-tests.jar</include>
       </includes>
     </fileSet>
     <fileSet>

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java?rev=958153&r1=958152&r2=958153&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java Sat Jun 26 00:05:17
2010
@@ -504,8 +504,7 @@ public class HMaster extends Thread impl
               this.serverManager.getServersToServerInfo().keySet().toString());
           }
         }
-        final HServerAddress root = this.regionManager.getRootRegionLocation();
-        switch (this.regionServerOperationQueue.process(root)) {
+        switch (this.regionServerOperationQueue.process()) {
         case FAILED:
             // If FAILED op processing, bad. Exit.
           break FINISHED;

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java?rev=958153&r1=958152&r2=958153&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java Sat Jun 26
00:05:17 2010
@@ -19,6 +19,22 @@
  */
 package org.apache.hadoop.hbase.master;
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.NavigableMap;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
+import java.util.concurrent.ConcurrentSkipListMap;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicReference;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -33,33 +49,14 @@ import org.apache.hadoop.hbase.HServerIn
 import org.apache.hadoop.hbase.HServerLoad;
 import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.executor.RegionTransitionEventData;
-import org.apache.hadoop.hbase.executor.HBaseEventHandler;
 import org.apache.hadoop.hbase.executor.HBaseEventHandler.HBaseEventType;
 import org.apache.hadoop.hbase.ipc.HRegionInterface;
 import org.apache.hadoop.hbase.regionserver.HRegion;
-import org.apache.hadoop.hbase.regionserver.wal.HLog;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.Pair;
 import org.apache.hadoop.hbase.util.Threads;
 import org.apache.hadoop.hbase.util.Writables;
 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWrapper;
-import org.apache.hadoop.io.WritableUtils;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.NavigableMap;
-import java.util.Set;
-import java.util.SortedMap;
-import java.util.TreeMap;
-import java.util.concurrent.ConcurrentSkipListMap;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.atomic.AtomicReference;
 
 /**
  * Class to manage assigning regions to servers, state of root and meta, etc.

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/RegionServerOperationQueue.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/RegionServerOperationQueue.java?rev=958153&r1=958152&r2=958153&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/RegionServerOperationQueue.java
(original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/RegionServerOperationQueue.java
Sat Jun 26 00:05:17 2010
@@ -111,17 +111,16 @@ public class RegionServerOperationQueue 
 
   /**
    * Try to get an operation off of the queue and process it.
-   * @param rootRegionLocation Location of the root region.
    * @return {@link ProcessingResultCode#PROCESSED},
    * {@link ProcessingResultCode#REQUEUED},
    * {@link ProcessingResultCode#REQUEUED_BUT_PROBLEM}
    */
-  public synchronized ProcessingResultCode process(final HServerAddress rootRegionLocation)
{
+  public synchronized ProcessingResultCode process() {
     RegionServerOperation op = null;
     // Only process the delayed queue if root region is online.  If offline,
     // the operation to put it online is probably in the toDoQueue.  Process
     // it first.
-    if (rootRegionLocation != null) {
+    if (toDoQueue.isEmpty()) {
       op = delayedToDoQueue.poll();
     }
     if (op == null) {

Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java?rev=958153&r1=958152&r2=958153&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java Sat Jun 26
00:05:17 2010
@@ -50,6 +50,7 @@ import org.apache.hadoop.hbase.master.HM
 import org.apache.hadoop.hbase.regionserver.HRegionServer;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.util.Threads;
 import org.apache.hadoop.hbase.util.Writables;
 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWrapper;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
@@ -920,4 +921,35 @@ public class HBaseTestingUtility {
       LOG.info("Could not set max recovery field", e);
     }
   }
-}
+
+
+  /**
+   * Wait until <code>countOfRegion</code> in .META. have a non-empty
+   * info:server.  This means all regions have been deployed, master has been
+   * informed and updated .META. with the regions deployed server.
+   * @param conf Configuration
+   * @param countOfRegions How many regions in .META.
+   * @throws IOException
+   */
+  public void waitUntilAllRegionsAssigned(final int countOfRegions)
+  throws IOException {
+    HTable meta = new HTable(getConfiguration(), HConstants.META_TABLE_NAME);
+    while (true) {
+      int rows = 0;
+      Scan scan = new Scan();
+      scan.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
+      ResultScanner s = meta.getScanner(scan);
+      for (Result r = null; (r = s.next()) != null;) {
+        byte [] b =
+          r.getValue(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
+        if (b == null || b.length <= 0) break;
+        rows++;
+      }
+      s.close();
+      // If I get to here and all rows have a Server, then all have been assigned.
+      if (rows == countOfRegions) break;
+      LOG.info("Found=" + rows);
+      Threads.sleep(1000); 
+    }
+  }
+}
\ No newline at end of file

Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterTransitions.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterTransitions.java?rev=958153&r1=958152&r2=958153&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterTransitions.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterTransitions.java Sat
Jun 26 00:05:17 2010
@@ -84,7 +84,7 @@ public class TestMasterTransitions {
     TEST_UTIL.createTable(Bytes.toBytes(TABLENAME), FAMILIES);
     HTable t = new HTable(TEST_UTIL.getConfiguration(), TABLENAME);
     int countOfRegions = TEST_UTIL.createMultiRegions(t, getTestFamily());
-    waitUntilAllRegionsAssigned(countOfRegions);
+    TEST_UTIL.waitUntilAllRegionsAssigned(countOfRegions);
     addToEachStartKey(countOfRegions);
   }
 
@@ -457,36 +457,6 @@ public class TestMasterTransitions {
   }
 
   /*
-   * Wait until all rows in .META. have a non-empty info:server.  This means
-   * all regions have been deployed, master has been informed and updated
-   * .META. with the regions deployed server.
-   * @param countOfRegions How many regions in .META.
-   * @throws IOException
-   */
-  private static void waitUntilAllRegionsAssigned(final int countOfRegions)
-  throws IOException {
-    HTable meta = new HTable(TEST_UTIL.getConfiguration(),
-      HConstants.META_TABLE_NAME);
-    while (true) {
-      int rows = 0;
-      Scan scan = new Scan();
-      scan.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
-      ResultScanner s = meta.getScanner(scan);
-      for (Result r = null; (r = s.next()) != null;) {
-        byte [] b =
-          r.getValue(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
-        if (b == null || b.length <= 0) break;
-        rows++;
-      }
-      s.close();
-      // If I get to here and all rows have a Server, then all have been assigned.
-      if (rows == countOfRegions) break;
-      LOG.info("Found=" + rows);
-      Threads.sleep(1000); 
-    }
-  }
-
-  /*
    * @return Count of regions in meta table.
    * @throws IOException
    */

Added: hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestROOTAssignment.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestROOTAssignment.java?rev=958153&view=auto
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestROOTAssignment.java (added)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestROOTAssignment.java Sat Jun
26 00:05:17 2010
@@ -0,0 +1,169 @@
+/**
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master;
+
+import java.io.IOException;
+import java.util.Set;
+import java.util.concurrent.CopyOnWriteArraySet;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HMsg;
+import org.apache.hadoop.hbase.HServerAddress;
+import org.apache.hadoop.hbase.HServerInfo;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.regionserver.HRegionServer;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.Threads;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ * Test issues assigning ROOT.
+ */
+public class TestROOTAssignment {
+  private static final Log LOG = LogFactory.getLog(TestROOTAssignment.class);
+  private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+  private static final byte [] TABLENAME = Bytes.toBytes("root_assignments");
+  private static final byte [][] FAMILIES =
+    new byte [][] {Bytes.toBytes("family")};
+
+  /**
+   * Start up a mini cluster and put a small table of many empty regions into it.
+   * @throws Exception
+   */
+  @BeforeClass public static void beforeAllTests() throws Exception {
+    TEST_UTIL.getConfiguration().setInt("hbase.regions.percheckin", 2);
+    // Start a cluster of two regionservers.
+    TEST_UTIL.startMiniCluster(2);
+    // Create a table of three families.  This will assign a region.
+    TEST_UTIL.createTable(TABLENAME, FAMILIES);
+    HTable t = new HTable(TEST_UTIL.getConfiguration(), TABLENAME);
+    int countOfRegions = TEST_UTIL.createMultiRegions(t, FAMILIES[0]);
+    TEST_UTIL.waitUntilAllRegionsAssigned(countOfRegions);
+    HTable table = new HTable(TEST_UTIL.getConfiguration(), TABLENAME);
+    TEST_UTIL.loadTable(table, FAMILIES[0]);
+    table.close();
+  }
+
+  @AfterClass public static void afterAllTests() throws IOException {
+    TEST_UTIL.shutdownMiniCluster();
+  }
+
+  @Before public void setup() throws IOException {
+    TEST_UTIL.ensureSomeRegionServersAvailable(2);
+  }
+
+  /**
+   * Interrupt processing of server shutdown so it gets put on delay queue.
+   */
+  static class PostponeShutdownProcessing implements RegionServerOperationListener {
+    // Map of what we've delayed so we don't do do repeated delays.
+    private final Set<RegionServerOperation> postponed =
+      new CopyOnWriteArraySet<RegionServerOperation>();
+    private boolean done = false;
+    private final HServerAddress rootServerAddress;
+    private final HMaster master;
+ 
+    PostponeShutdownProcessing(final HMaster master,
+        final HServerAddress rootServerAddress) {
+      this.master = master;
+      this.rootServerAddress = rootServerAddress;
+    }
+
+    @Override
+    public boolean process(final RegionServerOperation op) throws IOException {
+      // If a regionserver shutdown and its of the root server, then we want to
+      // delay the processing of the shutdown
+      boolean result = true;
+      if (op instanceof ProcessServerShutdown) {
+        ProcessServerShutdown pss = (ProcessServerShutdown)op;
+        if (pss.getDeadServerAddress().equals(this.rootServerAddress)) {
+          // Don't postpone more than once.
+          if (!this.postponed.contains(pss)) {
+            this.postponed.add(pss);
+            Assert.assertNull(this.master.getRegionManager().getRootRegionLocation());
+            pss.setDelay(1 * 1000);
+            // Return false.  This will add this op to the delayed queue.
+            result = false;
+          }
+        }
+      }
+      return result;
+    }
+
+    @Override
+    public boolean process(HServerInfo serverInfo, HMsg incomingMsg) {
+      return true;
+    }
+
+    @Override
+    public void processed(RegionServerOperation op) {
+      if (op instanceof ProcessServerShutdown) {
+        ProcessServerShutdown pss = (ProcessServerShutdown)op;
+        if (pss.getDeadServerAddress().equals(this.rootServerAddress)) {
+          this.done = true;
+        }
+      }
+    }
+
+    public boolean isDone() {
+      return this.done;
+    }
+  }
+
+  /**
+   * If the split of the log for the regionserver hosting ROOT doesn't go off
+   * smoothly, if the process server shutdown gets added to the delayed queue
+   * of events to process, then ROOT was not being allocated, ever.
+   * @see <a href="https://issues.apache.org/jira/browse/HBASE-2707">HBASE-2707</a>

+   */
+  @Test (timeout=300000) public void testROOTDeployedThoughProblemSplittingLog()
+  throws Exception {
+    LOG.info("Running testROOTDeployedThoughProblemSplittingLog");
+    MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
+    final HMaster master = cluster.getMaster();
+    byte [] rootRegion = Bytes.toBytes("-ROOT-,,0");
+    int rootIndex = cluster.getServerWith(rootRegion);
+    final HRegionServer rootHRS = cluster.getRegionServer(rootIndex);
+ 
+    // Add our RegionServerOperationsListener
+    PostponeShutdownProcessing listener = new PostponeShutdownProcessing(master,
+      rootHRS.getHServerInfo().getServerAddress());
+    master.getRegionServerOperationQueue().
+      registerRegionServerOperationListener(listener);
+    try {
+      // Now close the server carrying meta.
+      cluster.abortRegionServer(rootIndex);
+
+      // Wait for processing of the shutdown server.
+      while(!listener.isDone()) Threads.sleep(100);
+      master.getRegionManager().waitForRootRegionLocation();
+    } finally {
+      master.getRegionServerOperationQueue().
+        unregisterRegionServerOperationListener(listener);
+    }
+  }
+}



Mime
View raw message