hadoop-hdfs-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject svn commit: r1575448 - in /hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs: ./ src/main/java/org/apache/hadoop/hdfs/server/namenode/ src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ src/main/java/org/apache/hadoop/hdfs/util/ src/test/java/o...
Date Fri, 07 Mar 2014 23:35:47 GMT
Author: wang
Date: Fri Mar  7 23:35:47 2014
New Revision: 1575448

URL: http://svn.apache.org/r1575448
Log:
HDFS-5064. Standby checkpoints should not block concurrent readers. Contributed by Aaron Twining
Myers.

Modified:
    hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
    hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
    hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
    hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystemLock.java
    hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
    hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
    hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/RwLock.java
    hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1575448&r1=1575447&r2=1575448&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Fri Mar  7 23:35:47 2014
@@ -717,6 +717,9 @@ Release 2.4.0 - UNRELEASED
     HDFS-6065. HDFS zero-copy reads should return null on EOF when doing ZCR
     (cmccabe)
 
+    HDFS-5064. Standby checkpoints should not block concurrent readers.
+    (atm via wang)
+
   BREAKDOWN OF HDFS-5698 SUBTASKS AND RELATED JIRAS
 
     HDFS-5717. Save FSImage header in protobuf. (Haohui Mai via jing9)

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java?rev=1575448&r1=1575447&r2=1575448&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
(original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
Fri Mar  7 23:35:47 2014
@@ -1355,7 +1355,11 @@ public class FSImage implements Closeabl
     this.lastAppliedTxId = editLog.getLastWrittenTxId();
   }
 
-  public synchronized long getMostRecentCheckpointTxId() {
+  // Should be OK for this to not be synchronized since all of the places which
+  // mutate this value are themselves synchronized so it shouldn't be possible
+  // to see this flop back and forth. In the worst case this will just return an
+  // old value.
+  public long getMostRecentCheckpointTxId() {
     return storage.getMostRecentCheckpointTxId();
   }
 }

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1575448&r1=1575447&r2=1575448&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
(original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
Fri Mar  7 23:35:47 2014
@@ -116,6 +116,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.locks.ReentrantLock;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
 
 import javax.management.NotCompliantMBeanException;
@@ -1355,20 +1356,47 @@ public class FSNamesystem implements Nam
     this.fsLock.readLock().lock();
   }
   @Override
+  public void longReadLockInterruptibly() throws InterruptedException {
+    this.fsLock.longReadLock().lockInterruptibly();
+    try {
+      this.fsLock.readLock().lockInterruptibly();
+    } catch (InterruptedException ie) {
+      // In the event we're interrupted while getting the normal FSNS read lock,
+      // release the long read lock.
+      this.fsLock.longReadLock().unlock();
+      throw ie;
+    }
+  }
+  @Override
+  public void longReadUnlock() {
+    this.fsLock.readLock().unlock();
+    this.fsLock.longReadLock().unlock();
+  }
+  @Override
   public void readUnlock() {
     this.fsLock.readLock().unlock();
   }
   @Override
   public void writeLock() {
+    this.fsLock.longReadLock().lock();
     this.fsLock.writeLock().lock();
   }
   @Override
   public void writeLockInterruptibly() throws InterruptedException {
-    this.fsLock.writeLock().lockInterruptibly();
+    this.fsLock.longReadLock().lockInterruptibly();
+    try {
+      this.fsLock.writeLock().lockInterruptibly();
+    } catch (InterruptedException ie) {
+      // In the event we're interrupted while getting the normal FSNS write
+      // lock, release the long read lock.
+      this.fsLock.longReadLock().unlock();
+      throw ie;
+    }
   }
   @Override
   public void writeUnlock() {
     this.fsLock.writeLock().unlock();
+    this.fsLock.longReadLock().unlock();
   }
   @Override
   public boolean hasWriteLock() {
@@ -6838,9 +6866,14 @@ public class FSNamesystem implements Nam
   }
   
   @VisibleForTesting
-  ReentrantReadWriteLock getFsLockForTests() {
+  public ReentrantReadWriteLock getFsLockForTests() {
     return fsLock.coarseLock;
   }
+  
+  @VisibleForTesting
+  public ReentrantLock getLongReadLockForTests() {
+    return fsLock.longReadLock;
+  }
 
   @VisibleForTesting
   public SafeModeInfo getSafeModeInfoForTests() {

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystemLock.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystemLock.java?rev=1575448&r1=1575447&r2=1575448&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystemLock.java
(original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystemLock.java
Fri Mar  7 23:35:47 2014
@@ -21,6 +21,7 @@ package org.apache.hadoop.hdfs.server.na
 
 import java.util.concurrent.locks.Lock;
 import java.util.concurrent.locks.ReadWriteLock;
+import java.util.concurrent.locks.ReentrantLock;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
 
 import com.google.common.annotations.VisibleForTesting;
@@ -33,6 +34,24 @@ class FSNamesystemLock implements ReadWr
   @VisibleForTesting
   protected ReentrantReadWriteLock coarseLock;
   
+  /**
+   * When locking the FSNS for a read that may take a long time, we take this
+   * lock before taking the regular FSNS read lock. All writers also take this
+   * lock before taking the FSNS write lock. Regular (short) readers do not
+   * take this lock at all, instead relying solely on the synchronization of the
+   * regular FSNS lock.
+   * 
+   * This scheme ensures that:
+   * 1) In the case of normal (fast) ops, readers proceed concurrently and
+   *    writers are not starved.
+   * 2) In the case of long read ops, short reads are allowed to proceed
+   *    concurrently during the duration of the long read.
+   * 
+   * See HDFS-5064 for more context.
+   */
+  @VisibleForTesting
+  protected ReentrantLock longReadLock = new ReentrantLock(true);
+  
   FSNamesystemLock(boolean fair) {
     this.coarseLock = new ReentrantReadWriteLock(fair);
   }
@@ -46,6 +65,10 @@ class FSNamesystemLock implements ReadWr
   public Lock writeLock() {
     return coarseLock.writeLock();
   }
+
+  public Lock longReadLock() {
+    return longReadLock;
+  }
   
   public int getReadHoldCount() {
     return coarseLock.getReadHoldCount();
@@ -58,4 +81,4 @@ class FSNamesystemLock implements ReadWr
   public boolean isWriteLockedByCurrentThread() {
     return coarseLock.isWriteLockedByCurrentThread();
   }
-}
\ No newline at end of file
+}

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java?rev=1575448&r1=1575447&r2=1575448&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
(original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
Fri Mar  7 23:35:47 2014
@@ -126,7 +126,7 @@ public class NNStorage extends Storage i
    * recent fsimage file. This does not include any transactions
    * that have since been written to the edit log.
    */
-  protected long mostRecentCheckpointTxId = HdfsConstants.INVALID_TXID;
+  protected volatile long mostRecentCheckpointTxId = HdfsConstants.INVALID_TXID;
   
   /**
    * Time of the last checkpoint, in milliseconds since the epoch.

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java?rev=1575448&r1=1575447&r2=1575448&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
(original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
Fri Mar  7 23:35:47 2014
@@ -151,7 +151,7 @@ public class StandbyCheckpointer {
     final long txid;
     final NameNodeFile imageType;
     
-    namesystem.writeLockInterruptibly();
+    namesystem.longReadLockInterruptibly();
     try {
       assert namesystem.getEditLog().isOpenForRead() :
         "Standby Checkpointer should only attempt a checkpoint when " +
@@ -182,7 +182,7 @@ public class StandbyCheckpointer {
       assert txid == thisCheckpointTxId : "expected to save checkpoint at txid=" +
         thisCheckpointTxId + " but instead saved at txid=" + txid;
     } finally {
-      namesystem.writeUnlock();
+      namesystem.longReadUnlock();
     }
     
     // Upload the saved checkpoint back to the active

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/RwLock.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/RwLock.java?rev=1575448&r1=1575447&r2=1575448&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/RwLock.java
(original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/RwLock.java
Fri Mar  7 23:35:47 2014
@@ -21,6 +21,15 @@ package org.apache.hadoop.hdfs.util;
 public interface RwLock {
   /** Acquire read lock. */
   public void readLock();
+  
+  /**
+   * Acquire the long read lock, unless interrupted while waiting. The long
+   * read lock should also serve to block all concurrent writers.
+   **/
+  void longReadLockInterruptibly() throws InterruptedException;
+  
+  /** Release the long read lock. */
+  public void longReadUnlock();
 
   /** Release read lock. */
   public void readUnlock();

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java?rev=1575448&r1=1575447&r2=1575448&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
(original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
Fri Mar  7 23:35:47 2014
@@ -18,6 +18,7 @@
 package org.apache.hadoop.hdfs.server.namenode.ha;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
@@ -25,6 +26,7 @@ import java.io.File;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.net.URI;
+import java.net.URL;
 import java.util.List;
 
 import org.apache.commons.logging.Log;
@@ -33,6 +35,7 @@ import org.apache.hadoop.conf.Configurat
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
 import org.apache.hadoop.hdfs.server.namenode.FSImage;
@@ -311,6 +314,67 @@ public class TestStandbyCheckpoints {
     assertTrue("SBN should have finished checkpointing.",
         answerer.getFireCount() == 1 && answerer.getResultCount() == 1);
   }
+  
+  @Test(timeout=300000)
+  public void testReadsAllowedDuringCheckpoint() throws Exception {
+    
+    // Set it up so that we know when the SBN checkpoint starts and ends.
+    FSImage spyImage1 = NameNodeAdapter.spyOnFsImage(nn1);
+    DelayAnswer answerer = new DelayAnswer(LOG);
+    Mockito.doAnswer(answerer).when(spyImage1)
+        .saveNamespace(Mockito.any(FSNamesystem.class),
+            Mockito.any(NameNodeFile.class),
+            Mockito.any(Canceler.class));
+    
+    // Perform some edits and wait for a checkpoint to start on the SBN.
+    doEdits(0, 1000);
+    nn0.getRpcServer().rollEditLog();
+    answerer.waitForCall();
+    assertTrue("SBN is not performing checkpoint but it should be.",
+        answerer.getFireCount() == 1 && answerer.getResultCount() == 0);
+    
+    // Make sure that the lock has actually been taken by the checkpointing
+    // thread.
+    ThreadUtil.sleepAtLeastIgnoreInterrupts(1000);
+    
+    // Perform an RPC that needs to take the write lock.
+    Thread t = new Thread() {
+      @Override
+      public void run() {
+        try {
+          nn1.getRpcServer().restoreFailedStorage("false");
+        } catch (IOException e) {
+          e.printStackTrace();
+        }
+      }
+    };
+    t.start();
+    
+    // Make sure that our thread is waiting for the lock.
+    ThreadUtil.sleepAtLeastIgnoreInterrupts(1000);
+    
+    assertFalse(nn1.getNamesystem().getFsLockForTests().hasQueuedThreads());
+    assertFalse(nn1.getNamesystem().getFsLockForTests().isWriteLocked());
+    assertTrue(nn1.getNamesystem().getLongReadLockForTests().hasQueuedThreads());
+    
+    // Get /jmx of the standby NN web UI, which will cause the FSNS read lock to
+    // be taken.
+    String pageContents = DFSTestUtil.urlGet(new URL("http://" +
+        nn1.getHttpAddress().getHostName() + ":" +
+        nn1.getHttpAddress().getPort() + "/jmx"));
+    assertTrue(pageContents.contains("NumLiveDataNodes"));
+    
+    // Make sure that the checkpoint is still going on, implying that the client
+    // RPC to the SBN happened during the checkpoint.
+    assertTrue("SBN should have still been checkpointing.",
+        answerer.getFireCount() == 1 && answerer.getResultCount() == 0);
+    answerer.proceed();
+    answerer.waitForResult();
+    assertTrue("SBN should have finished checkpointing.",
+        answerer.getFireCount() == 1 && answerer.getResultCount() == 1);
+    
+    t.join();
+  }
 
   private void doEdits(int start, int stop) throws IOException {
     for (int i = start; i < stop; i++) {



Mime
View raw message