hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From st...@apache.org
Subject svn commit: r636849 - in /hadoop/hbase: branches/0.1/ branches/0.1/conf/ branches/0.1/src/java/org/apache/hadoop/hbase/ branches/0.1/src/java/org/apache/hadoop/hbase/util/ trunk/ trunk/conf/ trunk/src/java/org/apache/hadoop/hbase/master/ trunk/src/java...
Date Thu, 13 Mar 2008 19:37:23 GMT
Author: stack
Date: Thu Mar 13 12:37:21 2008
New Revision: 636849

URL: http://svn.apache.org/viewvc?rev=636849&view=rev
Log:
HBASE-501 Empty region server address in info:server entry and a startcode of -1 in .META.
M conf/hbase-default.xml
Add hbase.hbasemaster.maxregionopen property.
M src/java/org/apache/hadoop/hbase/HStore.java
Change way we log. Do way less. Just emit sums of edits applied
and skipped rather than individual edits.
M src/java/org/apache/hadoop/hbase/HRegionServer.java
Make sleeper instance a local rather than data member.
(reportForDuty): Take a sleeper instance.
(run): Removed redundant wrap of a 'for' by a 'while'.
(constructor): If IOE, do not offline the region. Seen to be
an overreaction.
M src/java/org/apache/hadoop/hbase/HLog.java
Don't output map of all files being cleaned everytime a new
entry is added; instead just log new entry. Remove emission
of every 10k edits.
M src/java/org/apache/hadoop/hbase/HMaster.java
Up default for maxregionopen. Was seeing that playing edits
could take a long time (mostly because we used log every
edit) but no harm in this being longer. On REPORT_CLOSE,
emit region info, not just region so can see the properties
(W/o, made it hard to figure who was responsible for offlining).
Add logging of attempt # in shutdown processing.
Add logging of state flags passed to the close region. Helps
debugging. Also in close offline ONLY if we are NOT reassigning
the region (jimk find).
M src/java/org/apache/hadoop/hbase/util/Sleeper.java
Add logging of extraordinary sleeps or calculated periods
(suspicion is that we're sleeping way longer on loaded machies
and the regionserver appears hung).

Modified:
    hadoop/hbase/branches/0.1/CHANGES.txt
    hadoop/hbase/branches/0.1/conf/hbase-default.xml
    hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HLog.java
    hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HMaster.java
    hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HRegionServer.java
    hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HStore.java
    hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/util/Sleeper.java
    hadoop/hbase/trunk/CHANGES.txt
    hadoop/hbase/trunk/conf/hbase-default.xml
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessRegionClose.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ServerManager.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HLog.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/Sleeper.java

Modified: hadoop/hbase/branches/0.1/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.1/CHANGES.txt?rev=636849&r1=636848&r2=636849&view=diff
==============================================================================
--- hadoop/hbase/branches/0.1/CHANGES.txt (original)
+++ hadoop/hbase/branches/0.1/CHANGES.txt Thu Mar 13 12:37:21 2008
@@ -39,6 +39,8 @@
    HBASE-433 HBASE-251 Region server should delete restore log after successful
                restore, Stuck replaying the edits of crashed machine.
    HBASE-27    hregioninfo cell empty in meta table
+   HBASE-501   Empty region server address in info:server entry and a
+               startcode of -1 in .META.
 
   IMPROVEMENTS
    HADOOP-2555 Refactor the HTable#get and HTable#getRow methods to avoid

Modified: hadoop/hbase/branches/0.1/conf/hbase-default.xml
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.1/conf/hbase-default.xml?rev=636849&r1=636848&r2=636849&view=diff
==============================================================================
--- hadoop/hbase/branches/0.1/conf/hbase-default.xml (original)
+++ hadoop/hbase/branches/0.1/conf/hbase-default.xml Thu Mar 13 12:37:21 2008
@@ -107,6 +107,13 @@
     period.</description>
   </property>
   <property>
+    <name>hbase.hbasemaster.maxregionopen</name>
+    <value>60000</value>
+    <description>Period to wait for a region open.  If regionserver
+    takes longer than this interval, assign to a new regionserver.
+    </description>
+  </property>
+  <property>
     <name>hbase.regionserver.lease.period</name>
     <value>30000</value>
     <description>HRegion server lease period in milliseconds. Default is

Modified: hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HLog.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HLog.java?rev=636849&r1=636848&r2=636849&view=diff
==============================================================================
--- hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HLog.java (original)
+++ hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HLog.java Thu Mar 13 12:37:21 2008
@@ -547,16 +547,15 @@
                 fs.rename(logfile, oldlogfile);
                 old = new SequenceFile.Reader(fs, oldlogfile, conf);
               }
-              
-              if (LOG.isDebugEnabled()) {
-                LOG.debug("Creating new log file writer for path " + logfile +
-                  "; map content " + logWriters.toString());
-              }
               w = SequenceFile.createWriter(fs, conf, logfile, HLogKey.class,
                 HLogEdit.class, getCompressionType(conf));
               // Use copy of regionName; regionName object is reused inside in
               // HStoreKey.getRegionName so its content changes as we iterate.
               logWriters.put(new Text(regionName), w);
+              if (LOG.isDebugEnabled()) {
+                LOG.debug("Creating new log file writer for path " + logfile +
+                  " and region " + regionName);
+              }
               
               if (old != null) {
                 // Copy from existing log file
@@ -571,9 +570,6 @@
                 old.close();
                 fs.delete(oldlogfile);
               }
-            }
-            if (LOG.isDebugEnabled() && count > 0 && count % 10000 == 0) {
-              LOG.debug("Applied " + count + " edits");
             }
             w.append(key, val);
           }

Modified: hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HMaster.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HMaster.java?rev=636849&r1=636848&r2=636849&view=diff
==============================================================================
--- hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HMaster.java (original)
+++ hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HMaster.java Thu Mar 13 12:37:21 2008
@@ -928,7 +928,7 @@
     this.threadWakeFrequency = conf.getInt(THREAD_WAKE_FREQUENCY, 10 * 1000);
     this.numRetries =  conf.getInt("hbase.client.retries.number", 2);
     this.maxRegionOpenTime =
-      conf.getLong("hbase.hbasemaster.maxregionopen", 30 * 1000);
+      conf.getLong("hbase.hbasemaster.maxregionopen", 60 * 1000);
 
     this.leaseTimeout = conf.getInt("hbase.master.lease.period", 30 * 1000);
     this.serverLeases = new Leases(this.leaseTimeout, 
@@ -1639,33 +1639,26 @@
 
       case HMsg.MSG_REPORT_CLOSE:
         LOG.info(info.getServerAddress().toString() + " no longer serving " +
-            region.getRegionName());
-
+          region);
         if (region.getRegionName().compareTo(
             HRegionInfo.rootRegionInfo.getRegionName()) == 0) {
-
           // Root region
-
           if (region.isOffline()) {
             // Can't proceed without root region. Shutdown.
             LOG.fatal("root region is marked offline");
             shutdown();
           }
           unassignRootRegion();
-
         } else {
           boolean reassignRegion = !region.isOffline();
           boolean deleteRegion = false;
-
           if (killedRegions.remove(region.getRegionName())) {
             reassignRegion = false;
           }
-
           if (regionsToDelete.remove(region.getRegionName())) {
             reassignRegion = false;
             deleteRegion = true;
           }
-
           if (region.isMetaTable()) {
             // Region is part of the meta table. Remove it from onlineMetaRegions
             onlineMetaRegions.remove(region.getStartKey());
@@ -1674,9 +1667,7 @@
           // NOTE: we cannot put the region into unassignedRegions as that
           //       could create a race with the pending close if it gets 
           //       reassigned before the close is processed.
-
           unassignedRegions.remove(region);
-
           try {
             toDoQueue.put(new ProcessRegionClose(region, reassignRegion,
                 deleteRegion));
@@ -2233,7 +2224,7 @@
             if (LOG.isDebugEnabled()) {
               LOG.debug("process server shutdown scanning " +
                   r.getRegionName() + " on " + r.getServer() + " " +
-                  Thread.currentThread().getName());
+                  Thread.currentThread().getName() + " attempt " + tries);
             }
             server = connection.getHRegionConnection(r.getServer());
 
@@ -2357,7 +2348,8 @@
     /** {@inheritDoc} */
     @Override
     public String toString() {
-      return "ProcessRegionClose of " + this.regionInfo.getRegionName();
+      return "ProcessRegionClose of " + this.regionInfo.getRegionName() +
+        ", " + this.reassignRegion + ", " + this.deleteRegion;
     }
 
     @Override
@@ -2382,7 +2374,7 @@
           if (deleteRegion) {
             HRegion.removeRegionFromMETA(getMetaServer(), metaRegionName,
               regionInfo.getRegionName());
-          } else {
+          } else if (!this.reassignRegion) {
             HRegion.offlineRegionInMETA(getMetaServer(), metaRegionName,
               regionInfo);
           }
@@ -2549,6 +2541,7 @@
 
   private void createTable(final HRegionInfo newRegion) throws IOException {
     Text tableName = newRegion.getTableDesc().getName();
+    // TODO: Not thread safe check.
     if (tableInCreation.contains(tableName)) {
       throw new TableExistsException("Table " + tableName + " in process "
           + "of being created");

Modified: hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HRegionServer.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HRegionServer.java?rev=636849&r1=636848&r2=636849&view=diff
==============================================================================
--- hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HRegionServer.java (original)
+++ hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HRegionServer.java Thu Mar 13 12:37:21 2008
@@ -103,7 +103,7 @@
     new ConcurrentHashMap<Text, HRegion>();
  
   protected final ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
-  private volatile List<HMsg> outboundMsgs =
+  private final List<HMsg> outboundMsgs =
     Collections.synchronizedList(new ArrayList<HMsg>());
 
   final int numRetries;
@@ -123,9 +123,6 @@
   
   // Request counter
   private volatile AtomicInteger requestCount = new AtomicInteger();
-  
-  // A sleeper that sleeps for msgInterval.
-  private final Sleeper sleeper;
 
   // Info server.  Default access so can be used by unit tests.  REGIONSERVER
   // is name of the webapp and the attribute name used stuffing this instance
@@ -604,7 +601,7 @@
     // Task thread to process requests from Master
     this.worker = new Worker();
     this.workerThread = new Thread(worker);
-    this.sleeper = new Sleeper(this.msgInterval, this.stopRequested);
+
     // Server to handle client requests
     this.server = HbaseRPC.getServer(this, address.getBindAddress(), 
       address.getPort(), conf.getInt("hbase.regionserver.handler.count", 10),
@@ -629,145 +626,146 @@
    */
   public void run() {
     boolean quiesceRequested = false;
+    // A sleeper that sleeps for msgInterval.
+    Sleeper sleeper =
+      new Sleeper(this.msgInterval, this.stopRequested);
     try {
-      init(reportForDuty());
+      init(reportForDuty(sleeper));
       long lastMsg = 0;
-      while(!stopRequested.get()) {
-        // Now ask master what it wants us to do and tell it what we have done
-        for (int tries = 0; !stopRequested.get();) {
-          long now = System.currentTimeMillis();
-          if (lastMsg != 0 && (now - lastMsg) >= serverLeaseTimeout) {
-            // It has been way too long since we last reported to the master.
-            // Commit suicide.
-            LOG.fatal("unable to report to master for " + (now - lastMsg) +
-                " milliseconds - aborting server");
-            abort();
-            break;
+      // Now ask master what it wants us to do and tell it what we have done
+      for (int tries = 0; !stopRequested.get();) {
+        long now = System.currentTimeMillis();
+        if (lastMsg != 0 && (now - lastMsg) >= serverLeaseTimeout) {
+          // It has been way too long since we last reported to the master.
+          // Commit suicide.
+          LOG.fatal("unable to report to master for " + (now - lastMsg) +
+            " milliseconds - aborting server");
+          abort();
+          break;
+        }
+        if ((now - lastMsg) >= msgInterval) {
+          HMsg outboundArray[] = null;
+          synchronized(this.outboundMsgs) {
+            outboundArray =
+              this.outboundMsgs.toArray(new HMsg[outboundMsgs.size()]);
+            this.outboundMsgs.clear();
           }
-          if ((now - lastMsg) >= msgInterval) {
-            HMsg outboundArray[] = null;
-            synchronized(outboundMsgs) {
-              outboundArray =
-                this.outboundMsgs.toArray(new HMsg[outboundMsgs.size()]);
+
+          try {
+            this.serverInfo.setLoad(new HServerLoad(requestCount.get(),
+                onlineRegions.size()));
+            this.requestCount.set(0);
+            HMsg msgs[] =
+              this.hbaseMaster.regionServerReport(serverInfo, outboundArray);
+            lastMsg = System.currentTimeMillis();
+
+            if (this.quiesced.get() && onlineRegions.size() == 0) {
+              // We've just told the master we're exiting because we aren't
+              // serving any regions. So set the stop bit and exit.
+              LOG.info("Server quiesced and not serving any regions. " +
+              "Starting shutdown");
+              stopRequested.set(true);
+              continue;
             }
-            this.outboundMsgs.clear();
 
-            try {
-              this.serverInfo.setLoad(new HServerLoad(requestCount.get(),
-                  onlineRegions.size()));
-              this.requestCount.set(0);
-              HMsg msgs[] =
-                this.hbaseMaster.regionServerReport(serverInfo, outboundArray);
-              lastMsg = System.currentTimeMillis();
-              
-              if (this.quiesced.get() && onlineRegions.size() == 0) {
-                // We've just told the master we're exiting because we aren't
-                // serving any regions. So set the stop bit and exit.
-                LOG.info("Server quiesced and not serving any regions. " +
-                    "Starting shutdown");
-                stopRequested.set(true);
-                continue;
-              }
-              
-              // Queue up the HMaster's instruction stream for processing
-              boolean restart = false;
-              for(int i = 0; i < msgs.length && !stopRequested.get() &&
-                  !restart; i++) {
-                switch(msgs[i].getMsg()) {
-                
-                case HMsg.MSG_CALL_SERVER_STARTUP:
-                  LOG.info("Got call server startup message");
-                  // We the MSG_CALL_SERVER_STARTUP on startup but we can also
-                  // get it when the master is panicing because for instance
-                  // the HDFS has been yanked out from under it.  Be wary of
-                  // this message.
-                  if (checkFileSystem()) {
-                    closeAllRegions();
-                    synchronized (logRollerLock) {
-                      try {
-                        log.closeAndDelete();
-
-                      } catch (Exception e) {
-                        LOG.error("error closing and deleting HLog", e);
-                      }
-                      try {
-                        serverInfo.setStartCode(System.currentTimeMillis());
-                        log = setupHLog();
-                      } catch (IOException e) {
-                        this.abortRequested = true;
-                        this.stopRequested.set(true);
-                        e = RemoteExceptionHandler.checkIOException(e); 
-                        LOG.fatal("error restarting server", e);
-                        break;
-                      }
-                    }
-                    reportForDuty();
-                    restart = true;
-                  } else {
-                    LOG.fatal("file system available check failed. " +
-                        "Shutting down server.");
-                  }
-                  break;
+            // Queue up the HMaster's instruction stream for processing
+            boolean restart = false;
+            for(int i = 0; i < msgs.length && !stopRequested.get() &&
+            !restart; i++) {
+              switch(msgs[i].getMsg()) {
+
+              case HMsg.MSG_CALL_SERVER_STARTUP:
+                LOG.info("Got call server startup message");
+                // We the MSG_CALL_SERVER_STARTUP on startup but we can also
+                // get it when the master is panicing because for instance
+                // the HDFS has been yanked out from under it.  Be wary of
+                // this message.
+                if (checkFileSystem()) {
+                  closeAllRegions();
+                  synchronized (logRollerLock) {
+                    try {
+                      log.closeAndDelete();
 
-                case HMsg.MSG_REGIONSERVER_STOP:
-                  LOG.info("Got regionserver stop message");
-                  stopRequested.set(true);
-                  break;
-                  
-                case HMsg.MSG_REGIONSERVER_QUIESCE:
-                  if (!quiesceRequested) {
-                    LOG.info("Got quiesce server message");
+                    } catch (Exception e) {
+                      LOG.error("error closing and deleting HLog", e);
+                    }
                     try {
-                      toDo.put(new ToDoEntry(msgs[i]));
-                    } catch (InterruptedException e) {
-                      throw new RuntimeException("Putting into msgQueue was " +
-                        "interrupted.", e);
+                      serverInfo.setStartCode(System.currentTimeMillis());
+                      log = setupHLog();
+                    } catch (IOException e) {
+                      this.abortRequested = true;
+                      this.stopRequested.set(true);
+                      e = RemoteExceptionHandler.checkIOException(e); 
+                      LOG.fatal("error restarting server", e);
+                      break;
                     }
-                    quiesceRequested = true;
                   }
-                  break;
+                  reportForDuty(sleeper);
+                  restart = true;
+                } else {
+                  LOG.fatal("file system available check failed. " +
+                  "Shutting down server.");
+                }
+                break;
 
-                default:
-                  if (fsOk) {
-                    try {
-                      toDo.put(new ToDoEntry(msgs[i]));
-                    } catch (InterruptedException e) {
-                      throw new RuntimeException("Putting into msgQueue was " +
+              case HMsg.MSG_REGIONSERVER_STOP:
+                LOG.info("Got regionserver stop message");
+                stopRequested.set(true);
+                break;
+
+              case HMsg.MSG_REGIONSERVER_QUIESCE:
+                if (!quiesceRequested) {
+                  LOG.info("Got quiesce server message");
+                  try {
+                    toDo.put(new ToDoEntry(msgs[i]));
+                  } catch (InterruptedException e) {
+                    throw new RuntimeException("Putting into msgQueue was " +
                         "interrupted.", e);
-                    }
-                    if (msgs[i].getMsg() == HMsg.MSG_REGION_OPEN) {
-                      outboundMsgs.add(new HMsg(HMsg.MSG_REPORT_PROCESS_OPEN,
-                          msgs[i].getRegionInfo()));
-                    }
                   }
+                  quiesceRequested = true;
                 }
-              }
-              if (restart || this.stopRequested.get()) {
-                toDo.clear();
                 break;
-              }
-              // Reset tries count if we had a successful transaction.
-              tries = 0;
-            } catch (Exception e) {
-              if (e instanceof IOException) {
-                e = RemoteExceptionHandler.checkIOException((IOException) e);
-              }
-              if(tries < this.numRetries) {
-                LOG.warn("Processing message (Retry: " + tries + ")", e);
-                tries++;
-              } else {
-                LOG.fatal("Exceeded max retries: " + this.numRetries, e);
-                if (!checkFileSystem()) {
-                  continue;
+
+              default:
+                if (fsOk) {
+                  try {
+                    toDo.put(new ToDoEntry(msgs[i]));
+                  } catch (InterruptedException e) {
+                    throw new RuntimeException("Putting into msgQueue was " +
+                        "interrupted.", e);
+                  }
+                  if (msgs[i].getMsg() == HMsg.MSG_REGION_OPEN) {
+                    this.outboundMsgs.add(new HMsg(HMsg.MSG_REPORT_PROCESS_OPEN,
+                      msgs[i].getRegionInfo()));
+                  }
                 }
-                // Something seriously wrong. Shutdown.
-                stop();
               }
             }
+            if (restart || this.stopRequested.get()) {
+              toDo.clear();
+              break;
+            }
+            // Reset tries count if we had a successful transaction.
+            tries = 0;
+          } catch (Exception e) {
+            if (e instanceof IOException) {
+              e = RemoteExceptionHandler.checkIOException((IOException) e);
+            }
+            if (tries < this.numRetries) {
+              LOG.warn("Processing message (Retry: " + tries + ")", e);
+              tries++;
+            } else {
+              LOG.fatal("Exceeded max retries: " + this.numRetries, e);
+              if (!checkFileSystem()) {
+                continue;
+              }
+              // Something seriously wrong. Shutdown.
+              stop();
+            }
           }
-          this.sleeper.sleep(lastMsg);
-        } // for
-      } // while (!stopRequested.get())
+        }
+        sleeper.sleep(lastMsg);
+      } // for
     } catch (Throwable t) {
       LOG.fatal("Unhandled exception. Aborting...", t);
       abort();
@@ -1001,7 +999,8 @@
    * Let the master know we're here
    * Run initialization using parameters passed us by the master.
    */
-  private HbaseMapWritable reportForDuty() throws IOException {
+  private HbaseMapWritable reportForDuty(final Sleeper sleeper)
+  throws IOException {
     if (LOG.isDebugEnabled()) {
       LOG.debug("Telling master at " +
         conf.get(MASTER_ADDRESS) + " that we are up");
@@ -1025,7 +1024,7 @@
         break;
       } catch(IOException e) {
         LOG.warn("error telling master we are up", e);
-        this.sleeper.sleep(lastMsg);
+        sleeper.sleep(lastMsg);
         continue;
       }
     }
@@ -1168,12 +1167,9 @@
       } catch (IOException e) {
         LOG.error("error opening region " + regionInfo.getRegionName(), e);
         
-        // Mark the region offline.
         // TODO: add an extra field in HRegionInfo to indicate that there is
         // an error. We can't do that now because that would be an incompatible
         // change that would require a migration
-        
-        regionInfo.setOffline(true);
         reportClose(regionInfo);
         return;
       }

Modified: hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HStore.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HStore.java?rev=636849&r1=636848&r2=636849&view=diff
==============================================================================
--- hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HStore.java (original)
+++ hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HStore.java Thu Mar 13 12:37:21 2008
@@ -772,7 +772,8 @@
    * reflected in the MapFiles.)
    */
   private void doReconstructionLog(final Path reconstructionLog,
-      final long maxSeqID) throws UnsupportedEncodingException, IOException {
+    final long maxSeqID)
+  throws UnsupportedEncodingException, IOException {
     
     if (reconstructionLog == null || !fs.exists(reconstructionLog)) {
       // Nothing to do.
@@ -789,16 +790,13 @@
       HLogKey key = new HLogKey();
       HLogEdit val = new HLogEdit();
       long skippedEdits = 0;
+      long editsCount = 0;
       while (logReader.next(key, val)) {
         maxSeqIdInLog = Math.max(maxSeqIdInLog, key.getLogSeqNum());
         if (key.getLogSeqNum() <= maxSeqID) {
           skippedEdits++;
           continue;
         }
-        if (skippedEdits > 0 && LOG.isDebugEnabled()) {
-          LOG.debug("Skipped " + skippedEdits +
-            " edits because sequence id <= " + maxSeqID);
-        }
         // Check this edit is for me. Also, guard against writing
         // METACOLUMN info such as HBASE::CACHEFLUSH entries
         Text column = val.getColumn();
@@ -808,11 +806,12 @@
           continue;
         }
         HStoreKey k = new HStoreKey(key.getRow(), column, val.getTimestamp());
-        if (LOG.isDebugEnabled()) {
-          LOG.debug("Applying edit <" + k.toString() + "=" + val.toString() +
-              ">");
-        }
         reconstructedCache.put(k, val.getVal());
+        editsCount++;
+      }
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Applied " + editsCount + ", skipped " + skippedEdits +
+          " because sequence id <= " + maxSeqID);
       }
     } finally {
       logReader.close();

Modified: hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/util/Sleeper.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/util/Sleeper.java?rev=636849&r1=636848&r2=636849&view=diff
==============================================================================
--- hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/util/Sleeper.java (original)
+++ hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/util/Sleeper.java Thu Mar 13 12:37:21 2008
@@ -21,6 +21,9 @@
 
 import java.util.concurrent.atomic.AtomicBoolean;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
 /**
  * Sleeper for current thread.
  * Sleeps for passed period.  Also checks passed boolean and if interrupted,
@@ -28,6 +31,7 @@
  * sleep time is up).
  */
 public class Sleeper {
+  private final Log LOG = LogFactory.getLog(this.getClass().getName());
   private final int period;
   private AtomicBoolean stop;
   
@@ -56,10 +60,19 @@
     if (this.stop.get()) {
       return;
     }
-    long waitTime = this.period - (System.currentTimeMillis() - startTime);
+    long now = System.currentTimeMillis();
+    long waitTime = this.period - (now - startTime);
+    if (waitTime > this.period) {
+      LOG.warn("Calculated wait time > " + this.period +
+        "; setting to this.period: " + System.currentTimeMillis() + ", " +
+        startTime);
+    }
     if (waitTime > 0) {
       try {
         Thread.sleep(waitTime);
+        if ((System.currentTimeMillis() - now) > (10 * this.period)) {
+          LOG.warn("We slept ten times longer than scheduled: " + this.period);
+        }
       } catch(InterruptedException iex) {
         // We we interrupted because we're meant to stop?  If not, just
         // continue ignoring the interruption

Modified: hadoop/hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/CHANGES.txt?rev=636849&r1=636848&r2=636849&view=diff
==============================================================================
--- hadoop/hbase/trunk/CHANGES.txt (original)
+++ hadoop/hbase/trunk/CHANGES.txt Thu Mar 13 12:37:21 2008
@@ -38,6 +38,8 @@
    HBASE-433 HBASE-251 Region server should delete restore log after successful
                restore, Stuck replaying the edits of crashed machine.
    HBASE-27    hregioninfo cell empty in meta table
+   HBASE-501   Empty region server address in info:server entry and a
+               startcode of -1 in .META.
    
   IMPROVEMENTS
    HBASE-415   Rewrite leases to use DelayedBlockingQueue instead of polling

Modified: hadoop/hbase/trunk/conf/hbase-default.xml
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/conf/hbase-default.xml?rev=636849&r1=636848&r2=636849&view=diff
==============================================================================
--- hadoop/hbase/trunk/conf/hbase-default.xml (original)
+++ hadoop/hbase/trunk/conf/hbase-default.xml Thu Mar 13 12:37:21 2008
@@ -108,6 +108,13 @@
     period.</description>
   </property>
   <property>
+    <name>hbase.hbasemaster.maxregionopen</name>
+    <value>60000</value>
+    <description>Period to wait for a region open.  If regionserver
+    takes longer than this interval, assign to a new regionserver.
+    </description>
+  </property>
+  <property>
     <name>hbase.regionserver.lease.period</name>
     <value>30000</value>
     <description>HRegion server lease period in milliseconds. Default is

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java?rev=636849&r1=636848&r2=636849&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java Thu Mar 13 12:37:21 2008
@@ -229,7 +229,7 @@
     this.threadWakeFrequency = conf.getInt(THREAD_WAKE_FREQUENCY, 10 * 1000);
     this.numRetries =  conf.getInt("hbase.client.retries.number", 2);
     this.maxRegionOpenTime =
-      conf.getLong("hbase.hbasemaster.maxregionopen", 30 * 1000);
+      conf.getLong("hbase.hbasemaster.maxregionopen", 60 * 1000);
     this.leaseTimeout = conf.getInt("hbase.master.lease.period", 30 * 1000);
     
     this.server = HbaseRPC.getServer(this, address.getBindAddress(),
@@ -589,6 +589,7 @@
 
   private void createTable(final HRegionInfo newRegion) throws IOException {
     Text tableName = newRegion.getTableDesc().getName();
+    // TODO: Not thread safe check.
     if (tableInCreation.contains(tableName)) {
       throw new TableExistsException("Table " + tableName + " in process "
         + "of being created");

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessRegionClose.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessRegionClose.java?rev=636849&r1=636848&r2=636849&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessRegionClose.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessRegionClose.java Thu Mar 13 12:37:21 2008
@@ -49,7 +49,8 @@
   /** {@inheritDoc} */
   @Override
   public String toString() {
-    return "ProcessRegionClose of " + this.regionInfo.getRegionName();
+    return "ProcessRegionClose of " + this.regionInfo.getRegionName() +
+      ", " + this.reassignRegion + ", " + this.deleteRegion;
   }
 
   @Override
@@ -74,7 +75,7 @@
         if (deleteRegion) {
           HRegion.removeRegionFromMETA(getMetaServer(), metaRegionName,
             regionInfo.getRegionName());
-        } else {
+        } else if (!this.reassignRegion) {
           HRegion.offlineRegionInMETA(getMetaServer(), metaRegionName,
             regionInfo);
         }
@@ -102,4 +103,4 @@
     }
     return true;
   }
-}
\ No newline at end of file
+}

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java?rev=636849&r1=636848&r2=636849&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java Thu Mar 13 12:37:21 2008
@@ -295,7 +295,7 @@
           if (LOG.isDebugEnabled()) {
             LOG.debug("process server shutdown scanning " +
               r.getRegionName() + " on " + r.getServer() + " " +
-              Thread.currentThread().getName());
+              Thread.currentThread().getName() + " attempt " + tries);
           }
           server = master.connection.getHRegionConnection(r.getServer());
 

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ServerManager.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ServerManager.java?rev=636849&r1=636848&r2=636849&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ServerManager.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ServerManager.java Thu Mar 13 12:37:21 2008
@@ -315,7 +315,7 @@
 
         case HMsg.MSG_REPORT_CLOSE:
           LOG.info(serverInfo.getServerAddress().toString() + " no longer serving " +
-              region.getRegionName());
+              region);
 
           if (region.isRootRegion()) {
             // Root region

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HLog.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HLog.java?rev=636849&r1=636848&r2=636849&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HLog.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HLog.java Thu Mar 13 12:37:21 2008
@@ -546,7 +546,6 @@
                   ),
                   HREGION_OLDLOGFILE_NAME
               );
-              
               Path oldlogfile = null;
               SequenceFile.Reader old = null;
               if (fs.exists(logfile)) {
@@ -556,16 +555,15 @@
                 fs.rename(logfile, oldlogfile);
                 old = new SequenceFile.Reader(fs, oldlogfile, conf);
               }
-
-              if (LOG.isDebugEnabled()) {
-                LOG.debug("Creating new log file writer for path " + logfile +
-                  "; map content " + logWriters.toString());
-              }
               w = SequenceFile.createWriter(fs, conf, logfile, HLogKey.class,
                 HLogEdit.class, getCompressionType(conf));
               // Use copy of regionName; regionName object is reused inside in
               // HStoreKey.getRegionName so its content changes as we iterate.
               logWriters.put(new Text(regionName), w);
+              if (LOG.isDebugEnabled()) {
+                LOG.debug("Creating new log file writer for path " + logfile +
+                  " and region " + regionName);
+              }
               
               if (old != null) {
                 // Copy from existing log file
@@ -580,9 +578,6 @@
                 old.close();
                 fs.delete(oldlogfile);
               }
-            }
-            if (LOG.isDebugEnabled() && count > 0 && count % 10000 == 0) {
-              LOG.debug("Applied " + count + " edits");
             }
             w.append(key, val);
           }

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=636849&r1=636848&r2=636849&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Thu Mar 13 12:37:21 2008
@@ -121,7 +121,7 @@
     new ConcurrentHashMap<Text, HRegion>();
  
   protected final ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
-  private volatile List<HMsg> outboundMsgs =
+  private final List<HMsg> outboundMsgs =
     Collections.synchronizedList(new ArrayList<HMsg>());
 
   final int numRetries;
@@ -141,9 +141,6 @@
   
   // Request counter
   private volatile AtomicInteger requestCount = new AtomicInteger();
-  
-  // A sleeper that sleeps for msgInterval.
-  private final Sleeper sleeper;
 
   // Info server.  Default access so can be used by unit tests.  REGIONSERVER
   // is name of the webapp and the attribute name used stuffing this instance
@@ -234,7 +231,7 @@
     // Task thread to process requests from Master
     this.worker = new Worker();
     this.workerThread = new Thread(worker);
-    this.sleeper = new Sleeper(this.msgInterval, this.stopRequested);
+
     // Server to handle client requests
     this.server = HbaseRPC.getServer(this, address.getBindAddress(), 
       address.getPort(), conf.getInt("hbase.regionserver.handler.count", 10),
@@ -259,145 +256,146 @@
    */
   public void run() {
     boolean quiesceRequested = false;
+    // A sleeper that sleeps for msgInterval.
+    Sleeper sleeper =
+      new Sleeper(this.msgInterval, this.stopRequested);
     try {
-      init(reportForDuty());
+      init(reportForDuty(sleeper));
       long lastMsg = 0;
-      while(!stopRequested.get()) {
-        // Now ask master what it wants us to do and tell it what we have done
-        for (int tries = 0; !stopRequested.get();) {
-          long now = System.currentTimeMillis();
-          if (lastMsg != 0 && (now - lastMsg) >= serverLeaseTimeout) {
-            // It has been way too long since we last reported to the master.
-            // Commit suicide.
-            LOG.fatal("unable to report to master for " + (now - lastMsg) +
-                " milliseconds - aborting server");
-            abort();
-            break;
+      // Now ask master what it wants us to do and tell it what we have done
+      for (int tries = 0; !stopRequested.get();) {
+        long now = System.currentTimeMillis();
+        if (lastMsg != 0 && (now - lastMsg) >= serverLeaseTimeout) {
+          // It has been way too long since we last reported to the master.
+          // Commit suicide.
+          LOG.fatal("unable to report to master for " + (now - lastMsg) +
+            " milliseconds - aborting server");
+          abort();
+          break;
+        }
+        if ((now - lastMsg) >= msgInterval) {
+          HMsg outboundArray[] = null;
+          synchronized(this.outboundMsgs) {
+            outboundArray =
+              this.outboundMsgs.toArray(new HMsg[outboundMsgs.size()]);
+            this.outboundMsgs.clear();
           }
-          if ((now - lastMsg) >= msgInterval) {
-            HMsg outboundArray[] = null;
-            synchronized(outboundMsgs) {
-              outboundArray =
-                this.outboundMsgs.toArray(new HMsg[outboundMsgs.size()]);
+
+          try {
+            this.serverInfo.setLoad(new HServerLoad(requestCount.get(),
+                onlineRegions.size()));
+            this.requestCount.set(0);
+            HMsg msgs[] =
+              this.hbaseMaster.regionServerReport(serverInfo, outboundArray);
+            lastMsg = System.currentTimeMillis();
+
+            if (this.quiesced.get() && onlineRegions.size() == 0) {
+              // We've just told the master we're exiting because we aren't
+              // serving any regions. So set the stop bit and exit.
+              LOG.info("Server quiesced and not serving any regions. " +
+              "Starting shutdown");
+              stopRequested.set(true);
+              continue;
             }
-            this.outboundMsgs.clear();
 
-            try {
-              this.serverInfo.setLoad(new HServerLoad(requestCount.get(),
-                  onlineRegions.size()));
-              this.requestCount.set(0);
-              HMsg msgs[] =
-                this.hbaseMaster.regionServerReport(serverInfo, outboundArray);
-              lastMsg = System.currentTimeMillis();
-              
-              if (this.quiesced.get() && onlineRegions.size() == 0) {
-                // We've just told the master we're exiting because we aren't
-                // serving any regions. So set the stop bit and exit.
-                LOG.info("Server quiesced and not serving any regions. " +
-                    "Starting shutdown");
-                stopRequested.set(true);
-                continue;
-              }
-              
-              // Queue up the HMaster's instruction stream for processing
-              boolean restart = false;
-              for(int i = 0; i < msgs.length && !stopRequested.get() &&
-                  !restart; i++) {
-                switch(msgs[i].getMsg()) {
-                
-                case HMsg.MSG_CALL_SERVER_STARTUP:
-                  LOG.info("Got call server startup message");
-                  // We the MSG_CALL_SERVER_STARTUP on startup but we can also
-                  // get it when the master is panicing because for instance
-                  // the HDFS has been yanked out from under it.  Be wary of
-                  // this message.
-                  if (checkFileSystem()) {
-                    closeAllRegions();
-                    synchronized (logRollerLock) {
-                      try {
-                        log.closeAndDelete();
-
-                      } catch (Exception e) {
-                        LOG.error("error closing and deleting HLog", e);
-                      }
-                      try {
-                        serverInfo.setStartCode(System.currentTimeMillis());
-                        log = setupHLog();
-                      } catch (IOException e) {
-                        this.abortRequested = true;
-                        this.stopRequested.set(true);
-                        e = RemoteExceptionHandler.checkIOException(e); 
-                        LOG.fatal("error restarting server", e);
-                        break;
-                      }
-                    }
-                    reportForDuty();
-                    restart = true;
-                  } else {
-                    LOG.fatal("file system available check failed. " +
-                        "Shutting down server.");
-                  }
-                  break;
+            // Queue up the HMaster's instruction stream for processing
+            boolean restart = false;
+            for(int i = 0; i < msgs.length && !stopRequested.get() &&
+            !restart; i++) {
+              switch(msgs[i].getMsg()) {
+
+              case HMsg.MSG_CALL_SERVER_STARTUP:
+                LOG.info("Got call server startup message");
+                // We the MSG_CALL_SERVER_STARTUP on startup but we can also
+                // get it when the master is panicing because for instance
+                // the HDFS has been yanked out from under it.  Be wary of
+                // this message.
+                if (checkFileSystem()) {
+                  closeAllRegions();
+                  synchronized (logRollerLock) {
+                    try {
+                      log.closeAndDelete();
 
-                case HMsg.MSG_REGIONSERVER_STOP:
-                  LOG.info("Got regionserver stop message");
-                  stopRequested.set(true);
-                  break;
-                  
-                case HMsg.MSG_REGIONSERVER_QUIESCE:
-                  if (!quiesceRequested) {
-                    LOG.info("Got quiesce server message");
+                    } catch (Exception e) {
+                      LOG.error("error closing and deleting HLog", e);
+                    }
                     try {
-                      toDo.put(new ToDoEntry(msgs[i]));
-                    } catch (InterruptedException e) {
-                      throw new RuntimeException("Putting into msgQueue was " +
-                        "interrupted.", e);
+                      serverInfo.setStartCode(System.currentTimeMillis());
+                      log = setupHLog();
+                    } catch (IOException e) {
+                      this.abortRequested = true;
+                      this.stopRequested.set(true);
+                      e = RemoteExceptionHandler.checkIOException(e); 
+                      LOG.fatal("error restarting server", e);
+                      break;
                     }
-                    quiesceRequested = true;
                   }
-                  break;
+                  reportForDuty(sleeper);
+                  restart = true;
+                } else {
+                  LOG.fatal("file system available check failed. " +
+                  "Shutting down server.");
+                }
+                break;
 
-                default:
-                  if (fsOk) {
-                    try {
-                      toDo.put(new ToDoEntry(msgs[i]));
-                    } catch (InterruptedException e) {
-                      throw new RuntimeException("Putting into msgQueue was " +
+              case HMsg.MSG_REGIONSERVER_STOP:
+                LOG.info("Got regionserver stop message");
+                stopRequested.set(true);
+                break;
+
+              case HMsg.MSG_REGIONSERVER_QUIESCE:
+                if (!quiesceRequested) {
+                  LOG.info("Got quiesce server message");
+                  try {
+                    toDo.put(new ToDoEntry(msgs[i]));
+                  } catch (InterruptedException e) {
+                    throw new RuntimeException("Putting into msgQueue was " +
                         "interrupted.", e);
-                    }
-                    if (msgs[i].getMsg() == HMsg.MSG_REGION_OPEN) {
-                      outboundMsgs.add(new HMsg(HMsg.MSG_REPORT_PROCESS_OPEN,
-                          msgs[i].getRegionInfo()));
-                    }
                   }
+                  quiesceRequested = true;
                 }
-              }
-              if (restart || this.stopRequested.get()) {
-                toDo.clear();
                 break;
-              }
-              // Reset tries count if we had a successful transaction.
-              tries = 0;
-            } catch (Exception e) {
-              if (e instanceof IOException) {
-                e = RemoteExceptionHandler.checkIOException((IOException) e);
-              }
-              if(tries < this.numRetries) {
-                LOG.warn("Processing message (Retry: " + tries + ")", e);
-                tries++;
-              } else {
-                LOG.fatal("Exceeded max retries: " + this.numRetries, e);
-                if (!checkFileSystem()) {
-                  continue;
+
+              default:
+                if (fsOk) {
+                  try {
+                    toDo.put(new ToDoEntry(msgs[i]));
+                  } catch (InterruptedException e) {
+                    throw new RuntimeException("Putting into msgQueue was " +
+                        "interrupted.", e);
+                  }
+                  if (msgs[i].getMsg() == HMsg.MSG_REGION_OPEN) {
+                    this.outboundMsgs.add(new HMsg(HMsg.MSG_REPORT_PROCESS_OPEN,
+                      msgs[i].getRegionInfo()));
+                  }
                 }
-                // Something seriously wrong. Shutdown.
-                stop();
               }
             }
+            if (restart || this.stopRequested.get()) {
+              toDo.clear();
+              break;
+            }
+            // Reset tries count if we had a successful transaction.
+            tries = 0;
+          } catch (Exception e) {
+            if (e instanceof IOException) {
+              e = RemoteExceptionHandler.checkIOException((IOException) e);
+            }
+            if (tries < this.numRetries) {
+              LOG.warn("Processing message (Retry: " + tries + ")", e);
+              tries++;
+            } else {
+              LOG.fatal("Exceeded max retries: " + this.numRetries, e);
+              if (!checkFileSystem()) {
+                continue;
+              }
+              // Something seriously wrong. Shutdown.
+              stop();
+            }
           }
-          this.sleeper.sleep(lastMsg);
-        } // for
-      } // while (!stopRequested.get())
+        }
+        sleeper.sleep(lastMsg);
+      } // for
     } catch (Throwable t) {
       LOG.fatal("Unhandled exception. Aborting...", t);
       abort();
@@ -627,7 +625,8 @@
    * Let the master know we're here
    * Run initialization using parameters passed us by the master.
    */
-  private HbaseMapWritable reportForDuty() throws IOException {
+  private HbaseMapWritable reportForDuty(final Sleeper sleeper)
+  throws IOException {
     if (LOG.isDebugEnabled()) {
       LOG.debug("Telling master at " +
         conf.get(MASTER_ADDRESS) + " that we are up");
@@ -651,7 +650,7 @@
         break;
       } catch(IOException e) {
         LOG.warn("error telling master we are up", e);
-        this.sleeper.sleep(lastMsg);
+        sleeper.sleep(lastMsg);
         continue;
       }
     }
@@ -794,12 +793,9 @@
       } catch (IOException e) {
         LOG.error("error opening region " + regionInfo.getRegionName(), e);
         
-        // Mark the region offline.
         // TODO: add an extra field in HRegionInfo to indicate that there is
         // an error. We can't do that now because that would be an incompatible
         // change that would require a migration
-        
-        regionInfo.setOffline(true);
         reportClose(regionInfo);
         return;
       }

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java?rev=636849&r1=636848&r2=636849&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java Thu Mar 13 12:37:21 2008
@@ -299,7 +299,8 @@
    * reflected in the MapFiles.)
    */
   private void doReconstructionLog(final Path reconstructionLog,
-      final long maxSeqID) throws UnsupportedEncodingException, IOException {
+    final long maxSeqID)
+  throws UnsupportedEncodingException, IOException {
     
     if (reconstructionLog == null || !fs.exists(reconstructionLog)) {
       // Nothing to do.
@@ -316,16 +317,13 @@
       HLogKey key = new HLogKey();
       HLogEdit val = new HLogEdit();
       long skippedEdits = 0;
+      long editsCount = 0;
       while (logReader.next(key, val)) {
         maxSeqIdInLog = Math.max(maxSeqIdInLog, key.getLogSeqNum());
         if (key.getLogSeqNum() <= maxSeqID) {
           skippedEdits++;
           continue;
         }
-        if (skippedEdits > 0 && LOG.isDebugEnabled()) {
-          LOG.debug("Skipped " + skippedEdits +
-            " edits because sequence id <= " + maxSeqID);
-        }
         // Check this edit is for me. Also, guard against writing
         // METACOLUMN info such as HBASE::CACHEFLUSH entries
         Text column = val.getColumn();
@@ -335,11 +333,12 @@
           continue;
         }
         HStoreKey k = new HStoreKey(key.getRow(), column, val.getTimestamp());
-        if (LOG.isDebugEnabled()) {
-          LOG.debug("Applying edit <" + k.toString() + "=" + val.toString() +
-              ">");
-        }
         reconstructedCache.put(k, val.getVal());
+        editsCount++;
+      }
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Applied " + editsCount + ", skipped " + skippedEdits +
+          " because sequence id <= " + maxSeqID);
       }
     } finally {
       logReader.close();

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/Sleeper.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/Sleeper.java?rev=636849&r1=636848&r2=636849&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/Sleeper.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/Sleeper.java Thu Mar 13 12:37:21 2008
@@ -21,6 +21,9 @@
 
 import java.util.concurrent.atomic.AtomicBoolean;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
 /**
  * Sleeper for current thread.
  * Sleeps for passed period.  Also checks passed boolean and if interrupted,
@@ -28,6 +31,7 @@
  * sleep time is up).
  */
 public class Sleeper {
+  private final Log LOG = LogFactory.getLog(this.getClass().getName());
   private final int period;
   private AtomicBoolean stop;
   
@@ -56,10 +60,19 @@
     if (this.stop.get()) {
       return;
     }
-    long waitTime = this.period - (System.currentTimeMillis() - startTime);
+    long now = System.currentTimeMillis();
+    long waitTime = this.period - (now - startTime);
+    if (waitTime > this.period) {
+      LOG.warn("Calculated wait time > " + this.period +
+        "; setting to this.period: " + System.currentTimeMillis() + ", " +
+        startTime);
+    }
     if (waitTime > 0) {
       try {
         Thread.sleep(waitTime);
+        if ((System.currentTimeMillis() - now) > (10 * this.period)) {
+          LOG.warn("We slept ten times longer than scheduled: " + this.period);
+        }
       } catch(InterruptedException iex) {
         // We we interrupted because we're meant to stop?  If not, just
         // continue ignoring the interruption



Mime
View raw message