Return-Path: X-Original-To: apmail-hadoop-common-commits-archive@www.apache.org Delivered-To: apmail-hadoop-common-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 80AC618861 for ; Fri, 17 Jul 2015 20:55:14 +0000 (UTC) Received: (qmail 74209 invoked by uid 500); 17 Jul 2015 20:55:11 -0000 Delivered-To: apmail-hadoop-common-commits-archive@hadoop.apache.org Received: (qmail 74046 invoked by uid 500); 17 Jul 2015 20:55:11 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: common-dev@hadoop.apache.org Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 73682 invoked by uid 99); 17 Jul 2015 20:55:11 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 17 Jul 2015 20:55:11 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id DE518E35D1; Fri, 17 Jul 2015 20:55:10 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: arp@apache.org To: common-commits@hadoop.apache.org Date: Fri, 17 Jul 2015 20:55:14 -0000 Message-Id: <99546ac70b00498f9d212c138efc2f3c@git.apache.org> In-Reply-To: <38ddf9ff1eff4f5daa176c0c772cb240@git.apache.org> References: <38ddf9ff1eff4f5daa176c0c772cb240@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [05/14] hadoop git commit: HDFS-7314. When the DFSClient lease cannot be renewed, abort open-for-write files rather than the entire DFSClient. (mingma) HDFS-7314. When the DFSClient lease cannot be renewed, abort open-for-write files rather than the entire DFSClient. (mingma) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/fbd88f10 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/fbd88f10 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/fbd88f10 Branch: refs/heads/HDFS-7240 Commit: fbd88f1062f3c4b208724d208e3f501eb196dfab Parents: 1ba2986 Author: Ming Ma Authored: Thu Jul 16 12:33:57 2015 -0700 Committer: Ming Ma Committed: Thu Jul 16 12:33:57 2015 -0700 ---------------------------------------------------------------------- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../java/org/apache/hadoop/hdfs/DFSClient.java | 16 +---- .../hadoop/hdfs/client/impl/LeaseRenewer.java | 12 +++- .../hadoop/hdfs/TestDFSClientRetries.java | 66 +++++++++++++++++++- 4 files changed, 79 insertions(+), 18 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/fbd88f10/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 8f6dd41..c6685e1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -165,6 +165,9 @@ Trunk (Unreleased) HDFS-5033. Bad error message for fs -put/copyFromLocal if user doesn't have permissions to read the source (Darrell Taylor via aw) + HDFS-7314. When the DFSClient lease cannot be renewed, abort open-for-write + files rather than the entire DFSClient. (mingma) + OPTIMIZATIONS BUG FIXES http://git-wip-us.apache.org/repos/asf/hadoop/blob/fbd88f10/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java index 6629a83..6f9e613 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java @@ -567,23 +567,9 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, void closeConnectionToNamenode() { RPC.stopProxy(namenode); } - - /** Abort and release resources held. Ignore all errors. */ - public void abort() { - clientRunning = false; - closeAllFilesBeingWritten(true); - try { - // remove reference to this client and stop the renewer, - // if there is no more clients under the renewer. - getLeaseRenewer().closeClient(this); - } catch (IOException ioe) { - LOG.info("Exception occurred while aborting the client " + ioe); - } - closeConnectionToNamenode(); - } /** Close/abort all files being written. */ - private void closeAllFilesBeingWritten(final boolean abort) { + public void closeAllFilesBeingWritten(final boolean abort) { for(;;) { final long inodeId; final DFSOutputStream out; http://git-wip-us.apache.org/repos/asf/hadoop/blob/fbd88f10/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/impl/LeaseRenewer.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/impl/LeaseRenewer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/impl/LeaseRenewer.java index 99323bb..c689b73 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/impl/LeaseRenewer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/impl/LeaseRenewer.java @@ -215,6 +215,12 @@ public class LeaseRenewer { return renewal; } + /** Used for testing only. */ + @VisibleForTesting + public synchronized void setRenewalTime(final long renewal) { + this.renewal = renewal; + } + /** Add a client. */ private synchronized void addClient(final DFSClient dfsc) { for(DFSClient c : dfsclients) { @@ -453,8 +459,12 @@ public class LeaseRenewer { + (elapsed/1000) + " seconds. Aborting ...", ie); synchronized (this) { while (!dfsclients.isEmpty()) { - dfsclients.get(0).abort(); + DFSClient dfsClient = dfsclients.get(0); + dfsClient.closeAllFilesBeingWritten(true); + closeClient(dfsClient); } + //Expire the current LeaseRenewer thread. + emptyTime = 0; } break; } catch (IOException ie) { http://git-wip-us.apache.org/repos/asf/hadoop/blob/fbd88f10/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientRetries.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientRetries.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientRetries.java index 43e0eb9..441ef9c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientRetries.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientRetries.java @@ -31,6 +31,7 @@ import static org.mockito.Matchers.anyString; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.timeout; import static org.mockito.Mockito.when; import java.io.FileNotFoundException; @@ -62,6 +63,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; +import org.apache.hadoop.hdfs.client.impl.LeaseRenewer; import org.apache.hadoop.hdfs.client.HdfsUtils; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol; @@ -354,7 +356,59 @@ public class TestDFSClientRetries { cluster.shutdown(); } } - + + /** + * Test DFSClient can continue to function after renewLease RPC + * receives SocketTimeoutException. + */ + @Test + public void testLeaseRenewSocketTimeout() throws Exception + { + String file1 = "/testFile1"; + String file2 = "/testFile2"; + // Set short retry timeouts so this test runs faster + conf.setInt(DFSConfigKeys.DFS_CLIENT_RETRY_WINDOW_BASE, 10); + conf.setInt(DFS_CLIENT_SOCKET_TIMEOUT_KEY, 2 * 1000); + MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build(); + try { + cluster.waitActive(); + NamenodeProtocols spyNN = spy(cluster.getNameNodeRpc()); + Mockito.doThrow(new SocketTimeoutException()).when(spyNN).renewLease( + Mockito.anyString()); + DFSClient client = new DFSClient(null, spyNN, conf, null); + // Get hold of the lease renewer instance used by the client + LeaseRenewer leaseRenewer = client.getLeaseRenewer(); + leaseRenewer.setRenewalTime(100); + OutputStream out1 = client.create(file1, false); + + Mockito.verify(spyNN, timeout(10000).times(1)).renewLease( + Mockito.anyString()); + verifyEmptyLease(leaseRenewer); + try { + out1.write(new byte[256]); + fail("existing output stream should be aborted"); + } catch (IOException e) { + } + + // Verify DFSClient can do read operation after renewLease aborted. + client.exists(file2); + // Verify DFSClient can do write operation after renewLease no longer + // throws SocketTimeoutException. + Mockito.doNothing().when(spyNN).renewLease( + Mockito.anyString()); + leaseRenewer = client.getLeaseRenewer(); + leaseRenewer.setRenewalTime(100); + OutputStream out2 = client.create(file2, false); + Mockito.verify(spyNN, timeout(10000).times(2)).renewLease( + Mockito.anyString()); + out2.write(new byte[256]); + out2.close(); + verifyEmptyLease(leaseRenewer); + } finally { + cluster.shutdown(); + } + } + /** * Test that getAdditionalBlock() and close() are idempotent. This allows * a client to safely retry a call and still produce a correct @@ -673,7 +727,15 @@ public class TestDFSClientRetries { } return ret; } - + + private void verifyEmptyLease(LeaseRenewer leaseRenewer) throws Exception { + int sleepCount = 0; + while (!leaseRenewer.isEmpty() && sleepCount++ < 20) { + Thread.sleep(500); + } + assertTrue("Lease should be empty.", leaseRenewer.isEmpty()); + } + class DFSClientReader implements Runnable { DFSClient client;