hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nspiegelb...@apache.org
Subject svn commit: r1181468 - in /hbase/branches/0.89/src: main/java/org/apache/hadoop/hbase/ main/java/org/apache/hadoop/hbase/master/ main/java/org/apache/hadoop/hbase/regionserver/wal/ test/java/org/apache/hadoop/hbase/regionserver/ test/java/org/apache/ha...
Date Tue, 11 Oct 2011 02:11:22 GMT
Author: nspiegelberg
Date: Tue Oct 11 02:11:22 2011
New Revision: 1181468

URL: http://svn.apache.org/viewvc?rev=1181468&view=rev
Log:
Rename HLog Dir during Recovery

Summary:
If a RegionServer goes rogue (GC pause, ops issue), the master
needs to be able to forcibly prevent further writes before it can log
split.  Do a non-recursive create and rename the dir on HLog recovery.

Test Plan:
- mvn test -Dtest=TestHLogSplit

DiffCamp Revision: 195313
Reviewed By: kannan
Reviewers: jgray, aaiyer, hkuang, kannan, kranganathan
CC: nspiegelberg, kannan
Tasks:
#207588: HBASE-2312: splitLog race condition

Revert Plan:
OK

Modified:
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/HConstants.java
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/wal/SequenceFileLogWriter.java
    hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestStore.java
    hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLogSplit.java

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/HConstants.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/HConstants.java?rev=1181468&r1=1181467&r2=1181468&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/HConstants.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/HConstants.java Tue Oct 11 02:11:22
2011
@@ -148,6 +148,9 @@ public final class HConstants {
   /** Used to construct the name of the compaction directory during compaction */
   public static final String HREGION_COMPACTIONDIR_NAME = "compaction.dir";
 
+  /** File Extension used while splitting an HLog into regions (HBASE-2312) */
+  public static final String HLOG_SPLITTING_EXT = "-splitting";
+
   /** Default maximum file size */
   public static final long DEFAULT_MAX_FILE_SIZE = 256 * 1024 * 1024;
 

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/HMaster.java?rev=1181468&r1=1181467&r2=1181468&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/HMaster.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/HMaster.java Tue Oct
11 02:11:22 2011
@@ -656,7 +656,8 @@ public class HMaster extends Thread impl
       return;
     }
     for (FileStatus status : logFolders) {
-      String serverName = status.getPath().getName();
+      Path logDir = status.getPath();
+      String serverName = logDir.getName();
       LOG.info("Found log folder : " + serverName);
       if(this.serverManager.getServerInfo(serverName) == null) {
         LOG.info("Log folder doesn't belong " +
@@ -664,9 +665,18 @@ public class HMaster extends Thread impl
         long splitTime = 0, splitSize = 0;
 
         this.splitLogLock.lock();
-        Path logDir =
-          new Path(this.rootdir, HLog.getHLogDirectoryName(serverName));
         try {
+          // rename the directory so a rogue RS doesn't create more HLogs
+          if (!serverName.endsWith(HConstants.HLOG_SPLITTING_EXT)) {
+            Path splitDir = new Path(logDir.getParent(),
+                                     logDir.getName()
+                                     + HConstants.HLOG_SPLITTING_EXT);
+            if (!this.fs.rename(logDir, splitDir)) {
+              throw new IOException("Failed fs.rename of " + logDir);
+            }
+            logDir = splitDir;
+            LOG.debug("Renamed region directory: " + splitDir);
+          }
           HLog.splitLog(this.rootdir, logDir, oldLogDir, this.fs, getConfiguration());
           splitTime = HLog.lastSplitTime;
           splitSize = HLog.lastSplitSize;

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java?rev=1181468&r1=1181467&r2=1181468&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java
(original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java
Tue Oct 11 02:11:22 2011
@@ -19,6 +19,7 @@
  */
 package org.apache.hadoop.hbase.master;
 
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HRegionInfo;
@@ -54,7 +55,7 @@ class ProcessServerShutdown extends Regi
   private List<MetaRegion> metaRegions;
 
   private Path rsLogDir;
-  private boolean logSplit;
+  private boolean isSplitFinished;
   private boolean rootRescanned;
   private HServerAddress deadServerAddress;
 
@@ -76,7 +77,7 @@ class ProcessServerShutdown extends Regi
     super(master);
     this.deadServer = serverInfo.getServerName();
     this.deadServerAddress = serverInfo.getServerAddress();
-    this.logSplit = false;
+    this.isSplitFinished = false;
     this.rootRescanned = false;
     this.rsLogDir =
       new Path(master.getRootDir(), HLog.getHLogDirectoryName(serverInfo));
@@ -286,21 +287,38 @@ class ProcessServerShutdown extends Regi
   @Override
   protected boolean process() throws IOException {
     LOG.info("Process shutdown of server " + this.deadServer +
-      ": logSplit: " + logSplit + ", rootRescanned: " + rootRescanned +
+      ": logSplit: " + isSplitFinished + ", rootRescanned: " + rootRescanned +
       ", numberOfMetaRegions: " + master.getRegionManager().numMetaRegions() +
       ", onlineMetaRegions.size(): " +
       master.getRegionManager().numOnlineMetaRegions());
-    if (!logSplit) {
-      // Process the old log file
-      if (this.master.getFileSystem().exists(rsLogDir)) {
-        long splitTime = 0, splitSize = 0;
+    if (!isSplitFinished) {
+      long splitTime = 0, splitSize = 0;
+      FileSystem fs = this.master.getFileSystem();
+      // we rename during split, so check both names
+      Path rsSplitDir = new Path(rsLogDir.getParent(),
+                                 rsLogDir.getName()
+                                 + HConstants.HLOG_SPLITTING_EXT);
+      boolean logDirExists = fs.exists(rsLogDir);
+      boolean splitDirExists = fs.exists(rsSplitDir);
+      assert !(logDirExists && splitDirExists)
+        : "Both files shouldn't exist: " + rsLogDir + " and " + rsSplitDir;
 
+      if (logDirExists || splitDirExists) {
         if (!master.splitLogLock.tryLock()) {
           return false;
         }
         try {
-          HLog.splitLog(master.getRootDir(), rsLogDir,
-              this.master.getOldLogDir(), this.master.getFileSystem(),
+          // rename the directory so a rogue RS doesn't create more HLogs
+          if (logDirExists) {
+            if (!fs.rename(rsLogDir, rsSplitDir)) {
+              throw new IOException("Failed fs.rename of " + rsLogDir);
+            }
+            LOG.debug("Renamed region directory: " + rsSplitDir);
+          }
+
+          // Process the old log files
+          HLog.splitLog(master.getRootDir(), rsSplitDir,
+            this.master.getOldLogDir(), this.master.getFileSystem(),
             this.master.getConfiguration());
           splitTime = HLog.lastSplitTime;
           splitSize = HLog.lastSplitSize;
@@ -310,7 +328,7 @@ class ProcessServerShutdown extends Regi
 
         this.master.getMetrics().addSplit(splitTime, splitSize);
       }
-      logSplit = true;
+      isSplitFinished = true;
     }
     LOG.info("Log split complete, meta reassignment and scanning:");
     if (this.isRootServer) {

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java?rev=1181468&r1=1181467&r2=1181468&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java Tue
Oct 11 02:11:22 2011
@@ -658,13 +658,14 @@ public class HLog implements Syncable {
    * @return Path to current writer or null if none.
    * @throws IOException
    */
-  private Path cleanupCurrentWriter(final long currentfilenum)
+  Path cleanupCurrentWriter(final long currentfilenum)
   throws IOException {
     Path oldFile = null;
     if (this.writer != null) {
       // Close the current writer, get a new one.
       try {
         this.writer.close();
+        this.writer = null;
       } catch (IOException e) {
         // Failed close of log file.  Means we're losing edits.  For now,
         // shut ourselves down to minimize loss.  Alternative is to try and
@@ -745,7 +746,9 @@ public class HLog implements Syncable {
         if (LOG.isDebugEnabled()) {
           LOG.debug("closing hlog writer in " + this.dir.toString());
         }
-        this.writer.close();
+        if (this.writer != null) {
+          this.writer.close();
+        }
       }
     } finally {
       cacheFlushLock.unlock();

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/wal/SequenceFileLogWriter.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/wal/SequenceFileLogWriter.java?rev=1181468&r1=1181467&r2=1181468&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/wal/SequenceFileLogWriter.java
(original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/wal/SequenceFileLogWriter.java
Tue Oct 11 02:11:22 2011
@@ -23,6 +23,7 @@ package org.apache.hadoop.hbase.regionse
 import java.io.IOException;
 import java.io.OutputStream;
 import java.lang.reflect.Field;
+import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Method;
 
 import org.apache.commons.logging.Log;
@@ -32,7 +33,9 @@ import org.apache.hadoop.fs.FSDataOutput
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.SequenceFile.CompressionType;
 import org.apache.hadoop.io.SequenceFile.Metadata;
+import org.apache.hadoop.io.compress.CompressionCodec;
 import org.apache.hadoop.io.compress.DefaultCodec;
 
 /**
@@ -56,17 +59,52 @@ public class SequenceFileLogWriter imple
   public void init(FileSystem fs, Path path, Configuration conf)
       throws IOException {
     // Create a SF.Writer instance.
-    this.writer = SequenceFile.createWriter(fs, conf, path,
-      HLog.getKeyClass(conf), WALEdit.class,
-      fs.getConf().getInt("io.file.buffer.size", 4096),
-      (short) conf.getInt("hbase.regionserver.hlog.replication",
-        fs.getDefaultReplication()),
-      conf.getLong("hbase.regionserver.hlog.blocksize",
-        fs.getDefaultBlockSize()),
-      SequenceFile.CompressionType.NONE,
-      new DefaultCodec(),
-      null,
-      new Metadata());
+    try {
+      // reflection for a version of SequenceFile.createWriter that doesn't
+      // automatically create the parent directory (see HBASE-2312)
+      this.writer = (SequenceFile.Writer) SequenceFile.class
+        .getMethod("createWriter", new Class[] {FileSystem.class,
+            Configuration.class, Path.class, Class.class, Class.class,
+            Integer.TYPE, Short.TYPE, Long.TYPE, Boolean.TYPE,
+            CompressionType.class, CompressionCodec.class, Metadata.class})
+        .invoke(null, new Object[] {fs, conf, path, HLog.getKeyClass(conf),
+            WALEdit.class,
+            new Integer(fs.getConf().getInt("io.file.buffer.size", 4096)),
+            new Short((short)
+              conf.getInt("hbase.regionserver.hlog.replication",
+              fs.getDefaultReplication())),
+            new Long(conf.getLong("hbase.regionserver.hlog.blocksize",
+                fs.getDefaultBlockSize())),
+            new Boolean(false) /*createParent*/,
+            SequenceFile.CompressionType.NONE, new DefaultCodec(),
+            new Metadata()
+            });
+    } catch (InvocationTargetException ite) {
+      // function was properly called, but threw it's own exception
+      throw new IOException(ite.getCause());
+    } catch (Exception e) {
+      // ignore all other exceptions. related to reflection failure
+    }
+
+
+
+    // if reflection failed, use the old createWriter
+    if (this.writer == null) {
+      LOG.warn("new createWriter -- HADOOP-6840 -- not available");
+      this.writer = SequenceFile.createWriter(fs, conf, path,
+        HLog.getKeyClass(conf), WALEdit.class,
+        fs.getConf().getInt("io.file.buffer.size", 4096),
+        (short) conf.getInt("hbase.regionserver.hlog.replication",
+          fs.getDefaultReplication()),
+        conf.getLong("hbase.regionserver.hlog.blocksize",
+          fs.getDefaultBlockSize()),
+        SequenceFile.CompressionType.NONE,
+        new DefaultCodec(),
+        null,
+        new Metadata());
+    } else {
+      LOG.debug("using new createWriter -- HADOOP-6840");
+    }
 
     // Get at the private FSDataOutputStream inside in SequenceFile so we can
     // call sync on it.  Make it accessible.  Stash it aside for call up in
@@ -140,4 +178,4 @@ public class SequenceFileLogWriter imple
   public OutputStream getDFSCOutputStream() {
     return this.dfsClient_out;
   }
-}
\ No newline at end of file
+}

Modified: hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestStore.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestStore.java?rev=1181468&r1=1181467&r2=1181468&view=diff
==============================================================================
--- hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestStore.java
(original)
+++ hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestStore.java
Tue Oct 11 02:11:22 2011
@@ -411,6 +411,14 @@ public class TestStore extends TestCase 
 		blockSize, bytesPerChecksum, progress), faultPos);
     }
 
+    @Override
+    public FSDataOutputStream createNonRecursive(Path f,
+        FsPermission permission, boolean overwrite,
+        int bufferSize, short replication, long blockSize,
+        Progressable progress) throws IOException {
+      return create(f, permission, overwrite, bufferSize,
+          replication, blockSize, progress);
+    }
   }
 
   static class FaultyOutputStream extends FSDataOutputStream {
@@ -519,4 +527,4 @@ public class TestStore extends TestCase 
     result = HBaseTestingUtility.getFromStoreFile(store, get);
     assertTrue(result.size()==0);
   }
-}
\ No newline at end of file
+}

Modified: hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLogSplit.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLogSplit.java?rev=1181468&r1=1181467&r2=1181468&view=diff
==============================================================================
--- hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLogSplit.java
(original)
+++ hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLogSplit.java
Tue Oct 11 02:11:22 2011
@@ -31,6 +31,8 @@ import java.util.List;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicLong;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSDataOutputStream;
@@ -38,6 +40,7 @@ import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HRegionInfo;
 import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.KeyValue;
@@ -47,14 +50,19 @@ import org.apache.hadoop.hbase.util.Thre
 import org.apache.hadoop.ipc.RemoteException;
 import org.junit.After;
 import org.junit.AfterClass;
+import org.junit.Assert;
 import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Test;
 
 /**
+ *
+ */
+/**
  * Testing {@link HLog} splitting code.
  */
 public class TestHLogSplit {
+  private static final Log LOG = LogFactory.getLog(TestHLogSplit.class);
 
   private Configuration conf;
   private FileSystem fs;
@@ -117,6 +125,8 @@ public class TestHLogSplit {
     for (FileStatus dir : entries){
       fs.delete(dir.getPath(), true);
     }
+    // create the HLog directory because recursive log creates are not allowed
+    fs.mkdirs(hlogDir);
     seq = 0;
     regions = new ArrayList<String>();
     Collections.addAll(regions, "bbb", "ccc");
@@ -510,6 +520,77 @@ public class TestHLogSplit {
     assertEquals(0, compareHLogSplitDirs(firstSplitPath, splitPath));
   }
 
+  /* HBASE-2312: tests the case where a RegionServer enters a GC pause or goes
+   * renegade due to Connect/ZK bugs.  When the master declares it dead, HDFS
+   * should deny writes and prevent it from rolling HLogs so Log Splitting can
+   * safely commence on the master.
+   * */
+  @Test
+  public void testLogRollAfterSplitStart() throws IOException {
+    // set flush interval to a large number so it doesn't interrupt us
+    final String F_INTERVAL = "hbase.regionserver.optionallogflushinterval";
+    long oldFlushInterval = conf.getLong(F_INTERVAL, 1000);
+    conf.setLong(F_INTERVAL, 1000*1000*100);
+    HLog log = null;
+    Path thisTestsDir = new Path(hbaseDir, "testLogRollAfterSplitStart");
+    Path rsSplitDir = new Path(thisTestsDir.getParent(),
+                               thisTestsDir.getName()
+                               + HConstants.HLOG_SPLITTING_EXT);
+
+    try {
+      // put some entries in an HLog
+      byte [] tableName = Bytes.toBytes(this.getClass().getName());
+      HRegionInfo regioninfo = new HRegionInfo(new HTableDescriptor(tableName),
+          HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW, false);
+      log = new HLog(fs, thisTestsDir, oldLogDir, conf, null);
+      final int total = 20;
+      for (int i = 0; i < total; i++) {
+        WALEdit kvs = new WALEdit();
+        kvs.add(new KeyValue(Bytes.toBytes(i), tableName, tableName));
+        log.append(regioninfo, tableName, kvs, System.currentTimeMillis());
+      }
+      // Send the data to HDFS datanodes and close the HDFS writer
+      log.sync(true);
+      log.cleanupCurrentWriter(log.getFilenum());
+
+      /* code taken from ProcessServerShutdown.process()
+       * handles RS shutdowns (as observed by the Master)
+       */
+      // rename the directory so a rogue RS doesn't create more HLogs
+      fs.rename(thisTestsDir, rsSplitDir);
+      LOG.debug("Renamed region directory: " + rsSplitDir);
+
+      // Process the old log files
+      // TODO: find a way a keep the log.writer around and call this
+      //       currently, you look like the current leaseholder
+      HLog.splitLog(hbaseDir, rsSplitDir, oldLogDir, fs, conf);
+
+      // Now, try to write more data.
+      // verify that this fails and the subsequent roll of the HLog also fails
+      try {
+        log.rollWriter();
+        Assert.fail("rollWriter() did not throw any exception.");
+      } catch (IOException ioe) {
+        if (ioe.getCause().getMessage().contains("FileNotFound")) {
+          LOG.info("Got the expected exception: ", ioe.getCause());
+        } else {
+          Assert.fail("Unexpected exception: " + ioe);
+        }
+      }
+    } finally {
+      conf.setLong(F_INTERVAL, oldFlushInterval);
+      if (log != null) {
+        log.close();
+      }
+      if (fs.exists(thisTestsDir)) {
+        fs.delete(thisTestsDir, true);
+      }
+      if (fs.exists(rsSplitDir)) {
+        fs.delete(rsSplitDir, true);
+      }
+    }
+  }
+
   /**
    * This thread will keep writing to the file after the split process has started
    * It simulates a region server that was considered dead but woke up and wrote



Mime
View raw message