hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jg...@apache.org
Subject svn commit: r1068239 - in /hbase/branches/0.90: ./ src/main/java/org/apache/hadoop/hbase/regionserver/ src/main/java/org/apache/hadoop/hbase/regionserver/handler/ src/main/java/org/apache/hadoop/hbase/util/
Date Tue, 08 Feb 2011 00:41:23 GMT
Author: jgray
Date: Tue Feb  8 00:41:22 2011
New Revision: 1068239

URL: http://svn.apache.org/viewvc?rev=1068239&view=rev
Log:
HBASE-3419 If re-transition to OPENING during log replay fails, server aborts. Instead, should
just cancel region open.

Added:
    hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/util/CancelableProgressable.java
Modified:
    hbase/branches/0.90/CHANGES.txt
    hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
    hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java
    hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java

Modified: hbase/branches/0.90/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/CHANGES.txt?rev=1068239&r1=1068238&r2=1068239&view=diff
==============================================================================
--- hbase/branches/0.90/CHANGES.txt (original)
+++ hbase/branches/0.90/CHANGES.txt Tue Feb  8 00:41:22 2011
@@ -27,6 +27,8 @@ Release 0.90.1 - Unreleased
                AlreadyBeingCreatedException
    HBASE-3501  Remove the deletion limit in LogCleaner
    HBASE-3500  Documentation update for replicatio
+   HBASE-3419  If re-transition to OPENING during log replay fails, server
+               aborts. Instead, should just cancel region open.
 
 
   IMPROVEMENTS

Modified: hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java?rev=1068239&r1=1068238&r2=1068239&view=diff
==============================================================================
--- hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java (original)
+++ hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java Tue
Feb  8 00:41:22 2011
@@ -56,12 +56,12 @@ import org.apache.hadoop.hbase.DroppedSn
 import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.HColumnDescriptor;
 import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HConstants.OperationStatusCode;
 import org.apache.hadoop.hbase.HRegionInfo;
 import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.NotServingRegionException;
 import org.apache.hadoop.hbase.UnknownScannerException;
+import org.apache.hadoop.hbase.HConstants.OperationStatusCode;
 import org.apache.hadoop.hbase.client.Delete;
 import org.apache.hadoop.hbase.client.Get;
 import org.apache.hadoop.hbase.client.Increment;
@@ -79,6 +79,7 @@ import org.apache.hadoop.hbase.regionser
 import org.apache.hadoop.hbase.regionserver.wal.HLogKey;
 import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
 import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.CancelableProgressable;
 import org.apache.hadoop.hbase.util.ClassSize;
 import org.apache.hadoop.hbase.util.CompressionTest;
 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
@@ -326,7 +327,7 @@ public class HRegion implements HeapSize
    * @return What the next sequence (edit) id should be.
    * @throws IOException e
    */
-  public long initialize(final Progressable reporter)
+  public long initialize(final CancelableProgressable reporter)
   throws IOException {
     // A region can be reopened if failed a split; reset flags
     this.closing.set(false);
@@ -1436,7 +1437,7 @@ public class HRegion implements HeapSize
         lastIndexExclusive++;
         numReadyToWrite++;
       }
-      // Nothing to put -- an exception in the above such as NoSuchColumnFamily? 
+      // Nothing to put -- an exception in the above such as NoSuchColumnFamily?
       if (numReadyToWrite <= 0) return 0L;
 
       // We've now grabbed as many puts off the list as we can
@@ -1812,7 +1813,7 @@ public class HRegion implements HeapSize
    * @throws IOException
    */
   protected long replayRecoveredEditsIfAny(final Path regiondir,
-      final long minSeqId, final Progressable reporter)
+      final long minSeqId, final CancelableProgressable reporter)
   throws UnsupportedEncodingException, IOException {
     long seqid = minSeqId;
     NavigableSet<Path> files = HLog.getSplitEditFilesSorted(this.fs, regiondir);
@@ -1861,7 +1862,7 @@ public class HRegion implements HeapSize
    * @throws IOException
    */
   private long replayRecoveredEdits(final Path edits,
-      final long minSeqId, final Progressable reporter)
+      final long minSeqId, final CancelableProgressable reporter)
     throws IOException {
     LOG.info("Replaying edits from " + edits + "; minSequenceid=" + minSeqId);
     HLog.Reader reader = HLog.getReader(this.fs, edits, conf);
@@ -1870,15 +1871,42 @@ public class HRegion implements HeapSize
     long firstSeqIdInLog = -1;
     long skippedEdits = 0;
     long editsCount = 0;
+    long intervalEdits = 0;
     HLog.Entry entry;
     Store store = null;
 
     try {
-      // How many edits to apply before we send a progress report.
-      int interval = this.conf.getInt("hbase.hstore.report.interval.edits", 2000);
+      // How many edits seen before we check elapsed time
+      int interval = this.conf.getInt("hbase.hstore.report.interval.edits",
+          2000);
+      // How often to send a progress report (default 1/2 master timeout)
+      int period = this.conf.getInt("hbase.hstore.report.period",
+          this.conf.getInt("hbase.master.assignment.timeoutmonitor.timeout",
+              30000) / 2);
+      long lastReport = EnvironmentEdgeManager.currentTimeMillis();
+
       while ((entry = reader.next()) != null) {
         HLogKey key = entry.getKey();
         WALEdit val = entry.getEdit();
+
+        if (reporter != null) {
+          intervalEdits += val.size();
+          if (intervalEdits >= interval) {
+            // Number of edits interval reached
+            intervalEdits = 0;
+            long cur = EnvironmentEdgeManager.currentTimeMillis();
+            if (lastReport + period <= cur) {
+              // Timeout reached
+              if(!reporter.progress()) {
+                String msg = "Progressable reporter failed, stopping replay";
+                LOG.warn(msg);
+                throw new IOException(msg);
+              }
+              lastReport = cur;
+            }
+          }
+        }
+
         if (firstSeqIdInLog == -1) {
           firstSeqIdInLog = key.getLogSeqNum();
         }
@@ -1915,12 +1943,6 @@ public class HRegion implements HeapSize
           editsCount++;
         }
         if (flush) internalFlushcache(null, currentEditSeqId);
-
-        // Every 'interval' edits, tell the reporter we're making progress.
-        // Have seen 60k edits taking 3minutes to complete.
-        if (reporter != null && (editsCount % interval) == 0) {
-          reporter.progress();
-        }
       }
     } catch (EOFException eof) {
       Path p = HLog.moveAsideBadEditsFile(fs, edits);
@@ -2495,7 +2517,7 @@ public class HRegion implements HeapSize
    */
   public static HRegion openHRegion(final HRegionInfo info, final HLog wal,
     final Configuration conf, final FlushRequester flusher,
-    final Progressable reporter)
+    final CancelableProgressable reporter)
   throws IOException {
     if (LOG.isDebugEnabled()) {
       LOG.debug("Opening region: " + info);
@@ -2517,7 +2539,7 @@ public class HRegion implements HeapSize
    * @return Returns <code>this</code>
    * @throws IOException
    */
-  protected HRegion openHRegion(final Progressable reporter)
+  protected HRegion openHRegion(final CancelableProgressable reporter)
   throws IOException {
     checkCompressionCodecs();
 

Modified: hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java?rev=1068239&r1=1068238&r2=1068239&view=diff
==============================================================================
--- hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java
(original)
+++ hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java
Tue Feb  8 00:41:22 2011
@@ -31,7 +31,6 @@ import java.util.concurrent.ThreadFactor
 import java.util.concurrent.ThreadPoolExecutor;
 import java.util.concurrent.TimeUnit;
 
-import com.google.common.util.concurrent.ThreadFactoryBuilder;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -43,12 +42,14 @@ import org.apache.hadoop.hbase.Server;
 import org.apache.hadoop.hbase.catalog.MetaEditor;
 import org.apache.hadoop.hbase.io.Reference.Range;
 import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.CancelableProgressable;
 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
 import org.apache.hadoop.hbase.util.FSUtils;
 import org.apache.hadoop.hbase.util.PairOfSameType;
-import org.apache.hadoop.util.Progressable;
 import org.apache.zookeeper.KeeperException;
 
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
+
 /**
  * Executes region split as a "transaction".  Call {@link #prepare()} to setup
  * the transaction, {@link #execute(OnlineRegions)} to run the transaction and
@@ -222,7 +223,7 @@ class SplitTransaction {
       services.removeFromOnlineRegions(this.parent.getRegionInfo().getEncodedName());
     }
     this.journal.add(JournalEntry.OFFLINED_PARENT);
-    
+
     // TODO: If the below were multithreaded would we complete steps in less
     // elapsed time?  St.Ack 20100920
 
@@ -328,7 +329,7 @@ class SplitTransaction {
     services.postOpenDeployTasks(r, server.getCatalogTracker(), true);
   }
 
-  static class LoggingProgressable implements Progressable {
+  static class LoggingProgressable implements CancelableProgressable {
     private final HRegionInfo hri;
     private long lastLog = -1;
     private final long interval;
@@ -340,12 +341,13 @@ class SplitTransaction {
     }
 
     @Override
-    public void progress() {
+    public boolean progress() {
       long now = System.currentTimeMillis();
       if (now - lastLog > this.interval) {
         LOG.info("Opening " + this.hri.getRegionNameAsString());
         this.lastLog = now;
       }
+      return true;
     }
   }
 
@@ -590,7 +592,7 @@ class SplitTransaction {
    * Call this method on initial region deploy.  Cleans up any mess
    * left by previous deploys of passed <code>r</code> region.
    * @param r
-   * @throws IOException 
+   * @throws IOException
    */
   static void cleanupAnySplitDetritus(final HRegion r) throws IOException {
     Path splitdir = getSplitDir(r);

Modified: hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java?rev=1068239&r1=1068238&r2=1068239&view=diff
==============================================================================
--- hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java
(original)
+++ hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java
Tue Feb  8 00:41:22 2011
@@ -29,8 +29,8 @@ import org.apache.hadoop.hbase.Server;
 import org.apache.hadoop.hbase.executor.EventHandler;
 import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.regionserver.RegionServerServices;
+import org.apache.hadoop.hbase.util.CancelableProgressable;
 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
-import org.apache.hadoop.util.Progressable;
 import org.apache.zookeeper.KeeperException;
 
 /**
@@ -261,12 +261,12 @@ public class OpenRegionHandler extends E
       // state so master doesn't timeout this region in transition.
       region = HRegion.openHRegion(this.regionInfo, this.rsServices.getWAL(),
         this.server.getConfiguration(), this.rsServices.getFlushRequester(),
-        new Progressable() {
-          public void progress() {
+        new CancelableProgressable() {
+          public boolean progress() {
             // We may lose the znode ownership during the open.  Currently its
             // too hard interrupting ongoing region open.  Just let it complete
             // and check we still have the znode after region open.
-            tickleOpening("open_region_progress");
+            return tickleOpening("open_region_progress");
           }
         });
     } catch (IOException e) {
@@ -325,6 +325,7 @@ public class OpenRegionHandler extends E
     } catch (KeeperException e) {
       server.abort("Exception refreshing OPENING; region=" + encodedName +
         ", context=" + context, e);
+      this.version = -1;
     }
     boolean b = isGoodVersion();
     if (!b) {

Added: hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/util/CancelableProgressable.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/util/CancelableProgressable.java?rev=1068239&view=auto
==============================================================================
--- hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/util/CancelableProgressable.java
(added)
+++ hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/util/CancelableProgressable.java
Tue Feb  8 00:41:22 2011
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.util;
+
+/**
+ * Similar interface as {@link org.apache.hadoop.util.Progressable} but returns
+ * a boolean to support canceling the operation.
+ * <p>
+ * Used for doing updating of OPENING znode during log replay on region open.
+ */
+public interface CancelableProgressable {
+
+  /**
+   * Report progress.  Returns true if operations should continue, false if the
+   * operation should be canceled and rolled back.
+   * @return whether to continue (true) or cancel (false) the operation
+   */
+  public boolean progress();
+
+}
\ No newline at end of file



Mime
View raw message