hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jdcry...@apache.org
Subject svn commit: r1050542 - in /hbase/trunk: ./ src/main/java/org/apache/hadoop/hbase/master/ src/main/java/org/apache/hadoop/hbase/regionserver/ src/main/java/org/apache/hadoop/hbase/regionserver/wal/ src/main/java/org/apache/hadoop/hbase/replication/regio...
Date Sat, 18 Dec 2010 00:13:13 GMT
Author: jdcryans
Date: Sat Dec 18 00:13:13 2010
New Revision: 1050542

URL: http://svn.apache.org/viewvc?rev=1050542&view=rev
Log:
   HBASE-3366  WALObservers should be notified before the lock
   HBASE-3367  Failed log split not retried 
   HBASE-3370  ReplicationSource.openReader fails to locate HLogs when they
               aren't split yet
   HBASE-3371  Race in TestReplication can make it fail

   HBASE-3372  HRS shouldn't print a full stack for ServerNotRunningException

Added:
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/OrphanHLogAfterSplitException.java
Modified:
    hbase/trunk/CHANGES.txt
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogSplitter.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLogSplit.java
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/replication/TestReplication.java

Modified: hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/trunk/CHANGES.txt?rev=1050542&r1=1050541&r2=1050542&view=diff
==============================================================================
--- hbase/trunk/CHANGES.txt (original)
+++ hbase/trunk/CHANGES.txt Sat Dec 18 00:13:13 2010
@@ -790,6 +790,11 @@ Release 0.90.0 - Unreleased
                location in .META. is possible
    HBASE-3368  Split message can come in before region opened message; results
                in 'Region has been PENDING_CLOSE for too long' cycle
+   HBASE-3366  WALObservers should be notified before the lock
+   HBASE-3367  Failed log split not retried
+   HBASE-3370  ReplicationSource.openReader fails to locate HLogs when they
+               aren't split yet
+   HBASE-3371  Race in TestReplication can make it fail
 
 
   IMPROVEMENTS
@@ -1274,6 +1279,7 @@ Release 0.90.0 - Unreleased
    HBASE-3303  Lower hbase.regionserver.handler.count from 25 back to 10
    HBASE-2467  Concurrent flushers in HLog sync using HDFS-895
    HBASE-3349  Pass HBase configuration to HttpServer
+   HBASE-3372  HRS shouldn't print a full stack for ServerNotRunningException
 
 
   NEW FEATURES

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java?rev=1050542&r1=1050541&r2=1050542&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java Sat Dec
18 00:13:13 2010
@@ -41,6 +41,7 @@ import org.apache.hadoop.hbase.regionser
 import org.apache.hadoop.hbase.regionserver.Store;
 import org.apache.hadoop.hbase.regionserver.wal.HLog;
 import org.apache.hadoop.hbase.regionserver.wal.HLogSplitter;
+import org.apache.hadoop.hbase.regionserver.wal.OrphanHLogAfterSplitException;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.FSUtils;
 
@@ -190,7 +191,12 @@ public class MasterFileSystem {
     Path logDir = new Path(this.rootdir, HLog.getHLogDirectoryName(serverName));
     try {
       HLogSplitter splitter = HLogSplitter.createLogSplitter(conf);
-      splitter.splitLog(this.rootdir, logDir, oldLogDir, this.fs, conf);
+      try {
+        splitter.splitLog(this.rootdir, logDir, oldLogDir, this.fs, conf);
+      } catch (OrphanHLogAfterSplitException e) {
+        LOG.warn("Retrying splitting because of:", e);
+        splitter.splitLog(this.rootdir, logDir, oldLogDir, this.fs, conf);
+      }
       splitTime = splitter.getTime();
       splitLogSize = splitter.getSize();
     } catch (IOException e) {

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=1050542&r1=1050541&r2=1050542&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Sat
Dec 18 00:13:13 2010
@@ -103,6 +103,7 @@ import org.apache.hadoop.hbase.ipc.HMast
 import org.apache.hadoop.hbase.ipc.HRegionInterface;
 import org.apache.hadoop.hbase.ipc.Invocation;
 import org.apache.hadoop.hbase.ipc.RpcServer;
+import org.apache.hadoop.hbase.ipc.ServerNotRunningException;
 import org.apache.hadoop.hbase.regionserver.Leases.LeaseStillHeldException;
 import org.apache.hadoop.hbase.regionserver.handler.CloseMetaHandler;
 import org.apache.hadoop.hbase.regionserver.handler.CloseRegionHandler;
@@ -1419,7 +1420,13 @@ public class HRegionServer implements HR
             masterAddress.getInetSocketAddress(), this.conf, -1,
             this.rpcTimeout, this.rpcTimeout);
       } catch (IOException e) {
-        LOG.warn("Unable to connect to master. Retrying. Error was:", e);
+        e = e instanceof RemoteException ?
+            ((RemoteException)e).unwrapRemoteException() : e;
+        if (e instanceof ServerNotRunningException) {
+          LOG.info("Master isn't available yet, retrying");
+        } else {
+          LOG.warn("Unable to connect to master. Retrying. Error was:", e);
+        }
         sleeper.sleep();
       }
     }

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java?rev=1050542&r1=1050541&r2=1050542&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java Sat Dec 18
00:13:13 2010
@@ -493,6 +493,13 @@ public class HLog implements Syncable {
         nextHdfsOut =
           ((SequenceFileLogWriter)nextWriter).getDFSCOutputStream();
       }
+      // Tell our listeners that a new log was created
+      if (!this.listeners.isEmpty()) {
+        for (WALObserver i : this.listeners) {
+          i.logRolled(newPath);
+        }
+      }
+
       synchronized (updateLock) {
         // Clean up current writer.
         Path oldFile = cleanupCurrentWriter(currentFilenum);
@@ -509,12 +516,6 @@ public class HLog implements Syncable {
         this.numEntries.set(0);
         this.logRollRequested = false;
       }
-      // Tell our listeners that a new log was created
-      if (!this.listeners.isEmpty()) {
-        for (WALObserver i : this.listeners) {
-          i.logRolled(newPath);
-        }
-      }
       // Can we delete any of the old log files?
       if (this.outputfiles.size() > 0) {
         if (this.lastSeqWritten.isEmpty()) {

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogSplitter.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogSplitter.java?rev=1050542&r1=1050541&r2=1050542&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogSplitter.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogSplitter.java Sat
Dec 18 00:13:13 2010
@@ -287,7 +287,8 @@ public class HLogSplitter {
       }
       if (fs.listStatus(srcDir).length > processedLogs.size()
           + corruptedLogs.size()) {
-        throw new IOException("Discovered orphan hlog after split. Maybe "
+        throw new OrphanHLogAfterSplitException(
+            "Discovered orphan hlog after split. Maybe the "
             + "HRegionServer was not dead when we started");
       }
       archiveLogs(corruptedLogs, processedLogs, oldLogDir, fs, conf);

Added: hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/OrphanHLogAfterSplitException.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/OrphanHLogAfterSplitException.java?rev=1050542&view=auto
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/OrphanHLogAfterSplitException.java
(added)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/OrphanHLogAfterSplitException.java
Sat Dec 18 00:13:13 2010
@@ -0,0 +1,40 @@
+/**
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver.wal;
+
+import java.io.IOException;
+
+public class OrphanHLogAfterSplitException extends IOException {
+
+  /**
+   * Create this exception without a message
+   */
+  public OrphanHLogAfterSplitException() {
+    super();
+  }
+
+  /**
+   * Create this exception with a message
+   * @param message why it failed
+   */
+  public OrphanHLogAfterSplitException(String message) {
+    super(message);
+  }
+}

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java?rev=1050542&r1=1050541&r2=1050542&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java
(original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java
Sat Dec 18 00:13:13 2010
@@ -438,17 +438,20 @@ public class ReplicationSource extends T
           // We didn't find the log in the archive directory, look if it still
           // exists in the dead RS folder (there could be a chain of failures
           // to look at)
-          for (int i = this.deadRegionServers.length - 1; i > 0; i--) {
+          LOG.info("NB dead servers : " + deadRegionServers.length);
+          for (int i = this.deadRegionServers.length - 1; i >= 0; i--) {
+
             Path deadRsDirectory =
-                new Path(this.manager.getLogDir(), this.deadRegionServers[i]);
+                new Path(manager.getLogDir().getParent(), this.deadRegionServers[i]);
             Path possibleLogLocation =
                 new Path(deadRsDirectory, currentPath.getName());
+            LOG.info("Possible location " + possibleLogLocation.toUri().toString());
             if (this.manager.getFs().exists(possibleLogLocation)) {
               // We found the right new location
               LOG.info("Log " + this.currentPath + " still exists at " +
                   possibleLogLocation);
               // Breaking here will make us sleep since reader is null
-              break;
+              return true;
             }
           }
           // TODO What happens if the log was missing from every single location?

Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java?rev=1050542&r1=1050541&r2=1050542&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java Sat Dec 18
00:13:13 2010
@@ -599,6 +599,7 @@ public class HBaseTestingUtility {
       Delete del = new Delete(res.getRow());
       table.delete(del);
     }
+    resScan = table.getScanner(scan);
     return table;
   }
 

Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLogSplit.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLogSplit.java?rev=1050542&r1=1050541&r2=1050542&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLogSplit.java
(original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLogSplit.java
Sat Dec 18 00:13:13 2010
@@ -157,7 +157,7 @@ public class TestHLogSplit {
     assertEquals(parentOfParent, HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
   }
 
-  @Test(expected = IOException.class)
+  @Test(expected = OrphanHLogAfterSplitException.class)
   public void testSplitFailsIfNewHLogGetsCreatedAfterSplitStarted()
   throws IOException {
     AtomicBoolean stop = new AtomicBoolean(false);

Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/replication/TestReplication.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/replication/TestReplication.java?rev=1050542&r1=1050541&r2=1050542&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/replication/TestReplication.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/replication/TestReplication.java Sat
Dec 18 00:13:13 2010
@@ -44,6 +44,7 @@ import org.apache.hadoop.hbase.client.Sc
 import org.apache.hadoop.hbase.client.replication.ReplicationAdmin;
 import org.apache.hadoop.hbase.mapreduce.replication.VerifyReplication;
 import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.JVMClusterUtil;
 import org.apache.hadoop.hbase.zookeeper.MiniZooKeeperCluster;
 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
 import org.apache.hadoop.mapreduce.Job;
@@ -126,7 +127,6 @@ public class TestReplication {
     utility2.startMiniCluster(2);
 
     HTableDescriptor table = new HTableDescriptor(tableName);
-    table.setDeferredLogFlush(false);
     HColumnDescriptor fam = new HColumnDescriptor(famName);
     fam.setScope(HConstants.REPLICATION_SCOPE_GLOBAL);
     table.addFamily(fam);
@@ -153,8 +153,15 @@ public class TestReplication {
    */
   @Before
   public void setUp() throws Exception {
+
+    // Starting and stopping replication can make us miss new logs,
+    // rolling like this makes sure the most recent one gets added to the queue
+    for ( JVMClusterUtil.RegionServerThread r :
+        utility1.getHBaseCluster().getRegionServerThreads()) {
+      r.getRegionServer().getWAL().rollWriter();
+    }
     utility1.truncateTable(tableName);
-    // truncating the table will send on Delete per row to the slave cluster
+    // truncating the table will send one Delete per row to the slave cluster
     // in an async fashion, which is why we cannot just call truncateTable on
     // utility2 since late writes could make it to the slave in some way.
     // Instead, we truncate the first table and wait for all the Deletes to
@@ -172,6 +179,7 @@ public class TestReplication {
        if (lastCount < res.length) {
           i--; // Don't increment timeout if we make progress
         }
+        lastCount = res.length;
         LOG.info("Still got " + res.length + " rows");
         Thread.sleep(SLEEP_TIME);
       } else {



Mime
View raw message