Return-Path: Delivered-To: apmail-hadoop-common-commits-archive@www.apache.org Received: (qmail 38541 invoked from network); 4 Mar 2011 03:36:55 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3) by minotaur.apache.org with SMTP; 4 Mar 2011 03:36:55 -0000 Received: (qmail 47437 invoked by uid 500); 4 Mar 2011 03:36:55 -0000 Delivered-To: apmail-hadoop-common-commits-archive@hadoop.apache.org Received: (qmail 47390 invoked by uid 500); 4 Mar 2011 03:36:55 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: common-dev@hadoop.apache.org Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 47336 invoked by uid 99); 4 Mar 2011 03:36:54 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 04 Mar 2011 03:36:54 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 04 Mar 2011 03:36:49 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 5A05D2388901; Fri, 4 Mar 2011 03:36:28 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1077058 - in /hadoop/common/branches/branch-0.20-security-patches/src: hdfs/org/apache/hadoop/hdfs/server/namenode/ test/org/apache/hadoop/hdfs/server/namenode/ webapps/hdfs/ Date: Fri, 04 Mar 2011 03:36:28 -0000 To: common-commits@hadoop.apache.org From: omalley@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20110304033628.5A05D2388901@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: omalley Date: Fri Mar 4 03:36:27 2011 New Revision: 1077058 URL: http://svn.apache.org/viewvc?rev=1077058&view=rev Log: commit 6e41b35b96303a453078320539126493f786a3dd Author: Jitendra Nath Pandey Date: Tue Nov 24 11:42:17 2009 -0800 HDFS-758. Changes to report decommissioning status on namenode web UI. Patch URL: https://issues.apache.org/jira/secure/attachment/12426000/HDFS-758.5.0-20.patch +++ b/YAHOO-CHANGES.txt + HDFS-758. Changes to report status of decommissioining on the namenode web + UI. (jitendra) + Added: hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/hdfs/server/namenode/TestDecommissioningStatus.java Modified: hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/DatanodeDescriptor.java hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java hadoop/common/branches/branch-0.20-security-patches/src/webapps/hdfs/dfshealth.jsp hadoop/common/branches/branch-0.20-security-patches/src/webapps/hdfs/dfsnodelist.jsp Modified: hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/DatanodeDescriptor.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/DatanodeDescriptor.java?rev=1077058&r1=1077057&r2=1077058&view=diff ============================================================================== --- hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/DatanodeDescriptor.java (original) +++ hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/DatanodeDescriptor.java Fri Mar 4 03:36:27 2011 @@ -44,6 +44,11 @@ import org.apache.hadoop.io.WritableUtil **************************************************/ public class DatanodeDescriptor extends DatanodeInfo { + + // Stores status of decommissioning. + // If node is not decommissioning, do not use this object for anything. + DecommissioningStatus decommissioningStatus = new DecommissioningStatus(); + /** Block and targets pair */ public static class BlockTargetPair { public final Block block; @@ -462,4 +467,53 @@ public class DatanodeDescriptor extends lastBlocksScheduledRollTime = now; } } + + class DecommissioningStatus { + int underReplicatedBlocks; + int decommissionOnlyReplicas; + int underReplicatedInOpenFiles; + long startTime; + + synchronized void set(int underRep, int onlyRep, int underConstruction) { + if (isDecommissionInProgress() == false) { + return; + } + underReplicatedBlocks = underRep; + decommissionOnlyReplicas = onlyRep; + underReplicatedInOpenFiles = underConstruction; + } + + synchronized int getUnderReplicatedBlocks() { + if (isDecommissionInProgress() == false) { + return 0; + } + return underReplicatedBlocks; + } + + synchronized int getDecommissionOnlyReplicas() { + if (isDecommissionInProgress() == false) { + return 0; + } + return decommissionOnlyReplicas; + } + + synchronized int getUnderReplicatedInOpenFiles() { + if (isDecommissionInProgress() == false) { + return 0; + } + return underReplicatedInOpenFiles; + } + + synchronized void setStartTime(long time) { + startTime = time; + } + + synchronized long getStartTime() { + if (isDecommissionInProgress() == false) { + return 0; + } + return startTime; + } + } // End of class DecommissioningStatus + } Modified: hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1077058&r1=1077057&r2=1077058&view=diff ============================================================================== --- hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java (original) +++ hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Fri Mar 4 03:36:27 2011 @@ -3626,14 +3626,11 @@ public class FSNamesystem implements FSC if (!node.isDecommissionInProgress() && !node.isDecommissioned()) { LOG.info("Start Decommissioning node " + node.getName()); node.startDecommission(); + node.decommissioningStatus.setStartTime(now()); // // all the blocks that reside on this node have to be // replicated. - Iterator decommissionBlocks = node.getBlockIterator(); - while(decommissionBlocks.hasNext()) { - Block block = decommissionBlocks.next(); - updateNeededReplications(block, -1, 0); - } + checkDecommissionStateInternal(node); } } @@ -3748,12 +3745,40 @@ public class FSNamesystem implements FSC return countNodes(b, blocksMap.nodeIterator(b)); } + private void logBlockReplicationInfo(Block block, DatanodeDescriptor srcNode, + NumberReplicas num) { + int curReplicas = num.liveReplicas(); + int curExpectedReplicas = getReplication(block); + INode fileINode = blocksMap.getINode(block); + Iterator nodeIter = blocksMap.nodeIterator(block); + StringBuffer nodeList = new StringBuffer(); + while (nodeIter.hasNext()) { + DatanodeDescriptor node = nodeIter.next(); + nodeList.append(node.name); + nodeList.append(" "); + } + FSNamesystem.LOG.info("Block: " + block + ", Expected Replicas: " + + curExpectedReplicas + ", live replicas: " + curReplicas + + ", corrupt replicas: " + num.corruptReplicas() + + ", decommissioned replicas: " + num.decommissionedReplicas() + + ", excess replicas: " + num.excessReplicas() + ", Is Open File: " + + fileINode.isUnderConstruction() + ", Datanodes having this block: " + + nodeList + ", Current Datanode: " + srcNode.name + + ", Is current datanode decommissioning: " + + srcNode.isDecommissionInProgress()); + } + + /** * Return true if there are any blocks on this node that have not * yet reached their replication factor. Otherwise returns false. */ private boolean isReplicationInProgress(DatanodeDescriptor srcNode) { boolean status = false; + int underReplicatedBlocks = 0; + int decommissionOnlyReplicas = 0; + int underReplicatedInOpenFiles = 0; + for(final Iterator i = srcNode.getBlockIterator(); i.hasNext(); ) { final Block block = i.next(); INode fileINode = blocksMap.getINode(block); @@ -3763,7 +3788,19 @@ public class FSNamesystem implements FSC int curReplicas = num.liveReplicas(); int curExpectedReplicas = getReplication(block); if (curExpectedReplicas > curReplicas) { - status = true; + // Log info about one block for this node which needs replication + if (!status) { + status = true; + logBlockReplicationInfo(block, srcNode, num); + } + underReplicatedBlocks++; + if ((curReplicas == 0) && (num.decommissionedReplicas() > 0)) { + decommissionOnlyReplicas++; + } + if (fileINode.isUnderConstruction()) { + underReplicatedInOpenFiles++; + } + if (!neededReplications.contains(block) && pendingReplications.getNumReplicas(block) == 0) { // @@ -3779,6 +3816,9 @@ public class FSNamesystem implements FSC } } } + srcNode.decommissioningStatus.set(underReplicatedBlocks, + decommissionOnlyReplicas, underReplicatedInOpenFiles); + return status; } @@ -4746,4 +4786,16 @@ public class FSNamesystem implements FSC } } } + + public synchronized ArrayList getDecommissioningNodes() { + ArrayList decommissioningNodes = new ArrayList(); + ArrayList results = getDatanodeListForReport(DatanodeReportType.LIVE); + for (Iterator it = results.iterator(); it.hasNext();) { + DatanodeDescriptor node = it.next(); + if (node.isDecommissionInProgress()) { + decommissioningNodes.add(node); + } + } + return decommissioningNodes; + } } Added: hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/hdfs/server/namenode/TestDecommissioningStatus.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/hdfs/server/namenode/TestDecommissioningStatus.java?rev=1077058&view=auto ============================================================================== --- hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/hdfs/server/namenode/TestDecommissioningStatus.java (added) +++ hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/hdfs/server/namenode/TestDecommissioningStatus.java Fri Mar 4 03:36:27 2011 @@ -0,0 +1,228 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import java.io.IOException; +import java.net.InetSocketAddress; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.Random; + +import org.junit.BeforeClass; +import org.junit.AfterClass; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.DFSClient; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.protocol.DatanodeInfo; +import org.apache.hadoop.hdfs.protocol.FSConstants.DatanodeReportType; +import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; +import org.apache.hadoop.hdfs.server.namenode.DatanodeDescriptor; + +/** + * This class tests the decommissioning of nodes. + */ +public class TestDecommissioningStatus { + private static final long seed = 0xDEADBEEFL; + private static final int blockSize = 8192; + private static final int fileSize = 16384; + private static final int numDatanodes = 2; + private static MiniDFSCluster cluster; + private static FileSystem fileSys; + private static Path excludeFile; + private static FileSystem localFileSys; + private static Configuration conf; + private static Path dir; + + ArrayList decommissionedNodes = new ArrayList(numDatanodes); + + @BeforeClass + public static void setUp() throws Exception { + conf = new Configuration(); + conf.setBoolean("dfs.replication.considerLoad", false); + + // Set up the hosts/exclude files. + localFileSys = FileSystem.getLocal(conf); + Path workingDir = localFileSys.getWorkingDirectory(); + dir = new Path(workingDir, "build/test/data/work-dir/decommission"); + assertTrue(localFileSys.mkdirs(dir)); + excludeFile = new Path(dir, "exclude"); + conf.set("dfs.hosts.exclude", excludeFile.toUri().getPath()); + conf.setInt("heartbeat.recheck.interval", 2000); + conf.setInt("dfs.heartbeat.interval", 1); + conf.setInt("dfs.replication.pending.timeout.sec", 4); + conf.setInt("dfs.replication.interval", 1000); + conf.setInt("dfs.namenode.decommission.interval", 1); + writeConfigFile(localFileSys, excludeFile, null); + + cluster = new MiniDFSCluster(conf, numDatanodes, true, null); + cluster.waitActive(); + fileSys = cluster.getFileSystem(); + } + + @AfterClass + public static void tearDown() throws Exception { + if(fileSys != null) fileSys.close(); + if(cluster != null) cluster.shutdown(); + } + + private static void writeConfigFile(FileSystem fs, Path name, + ArrayList nodes) throws IOException { + + // delete if it already exists + if (fs.exists(name)) { + fs.delete(name, true); + } + + FSDataOutputStream stm = fs.create(name); + + if (nodes != null) { + for (Iterator it = nodes.iterator(); it.hasNext();) { + String node = it.next(); + stm.writeBytes(node); + stm.writeBytes("\n"); + } + } + stm.close(); + } + + private void writeFile(FileSystem fileSys, Path name, short repl) + throws IOException { + // create and write a file that contains three blocks of data + FSDataOutputStream stm = fileSys.create(name, true, fileSys.getConf() + .getInt("io.file.buffer.size", 4096), repl, (long) blockSize); + byte[] buffer = new byte[fileSize]; + Random rand = new Random(seed); + rand.nextBytes(buffer); + stm.write(buffer); + stm.close(); + } + + private FSDataOutputStream writeIncompleteFile(FileSystem fileSys, Path name, + short repl) throws IOException { + // create and write a file that contains three blocks of data + FSDataOutputStream stm = fileSys.create(name, true, fileSys.getConf() + .getInt("io.file.buffer.size", 4096), repl, (long) blockSize); + byte[] buffer = new byte[fileSize]; + Random rand = new Random(seed); + rand.nextBytes(buffer); + stm.write(buffer); + // Do not close stream, return it + // so that it is not garbage collected + return stm; + } + + private void cleanupFile(FileSystem fileSys, Path name) throws IOException { + assertTrue(fileSys.exists(name)); + fileSys.delete(name, true); + assertTrue(!fileSys.exists(name)); + } + + /* + * Decommissions the node at the given index + */ + private String decommissionNode(FSNamesystem namesystem, Configuration conf, + DFSClient client, FileSystem localFileSys, int nodeIndex) + throws IOException { + DatanodeInfo[] info = client.datanodeReport(DatanodeReportType.LIVE); + + String nodename = info[nodeIndex].getName(); + System.out.println("Decommissioning node: " + nodename); + + // write nodename into the exclude file. + ArrayList nodes = new ArrayList(decommissionedNodes); + nodes.add(nodename); + writeConfigFile(localFileSys, excludeFile, nodes); + namesystem.refreshNodes(conf); + return nodename; + } + + private void checkDecommissionStatus(DatanodeDescriptor decommNode, + int expectedUnderRep, int expectedDecommissionOnly, + int expectedUnderRepInOpenFiles) { + assertEquals(decommNode.decommissioningStatus.getUnderReplicatedBlocks(), + expectedUnderRep); + assertEquals( + decommNode.decommissioningStatus.getDecommissionOnlyReplicas(), + expectedDecommissionOnly); + assertEquals(decommNode.decommissioningStatus + .getUnderReplicatedInOpenFiles(), expectedUnderRepInOpenFiles); + } + + /** + * Tests Decommissioning Status in DFS. + */ + + @Test + public void testDecommissionStatus() throws IOException, InterruptedException { + InetSocketAddress addr = new InetSocketAddress("localhost", cluster + .getNameNodePort()); + DFSClient client = new DFSClient(addr, conf); + DatanodeInfo[] info = client.datanodeReport(DatanodeReportType.LIVE); + assertEquals("Number of Datanodes ", 2, info.length); + FileSystem fileSys = cluster.getFileSystem(); + + short replicas = 2; + // + // Decommission one node. Verify the decommission status + // + Path file1 = new Path("decommission.dat"); + writeFile(fileSys, file1, replicas); + + Path file2 = new Path("decommission1.dat"); + FSDataOutputStream st1 = writeIncompleteFile(fileSys, file2, replicas); + Thread.sleep(5000); + + FSNamesystem fsn = cluster.getNameNode().getNamesystem(); + for (int iteration = 0; iteration < numDatanodes; iteration++) { + String downnode = decommissionNode(fsn, conf, client, localFileSys, + iteration); + decommissionedNodes.add(downnode); + Thread.sleep(5000); + ArrayList decommissioningNodes = fsn + .getDecommissioningNodes(); + if (iteration == 0) { + assertEquals(decommissioningNodes.size(), 1); + DatanodeDescriptor decommNode = decommissioningNodes.get(0); + checkDecommissionStatus(decommNode, 4, 0, 2); + } else { + assertEquals(decommissioningNodes.size(), 2); + DatanodeDescriptor decommNode1 = decommissioningNodes.get(0); + DatanodeDescriptor decommNode2 = decommissioningNodes.get(1); + checkDecommissionStatus(decommNode1, 4, 4, 2); + checkDecommissionStatus(decommNode2, 4, 4, 2); + } + } + // Call refreshNodes on FSNamesystem with empty exclude file. + // This will remove the datanodes from decommissioning list and + // make them available again. + writeConfigFile(localFileSys, excludeFile, null); + fsn.refreshNodes(conf); + st1.close(); + cleanupFile(fileSys, file1); + cleanupFile(fileSys, file2); + cleanupFile(localFileSys, dir); + } +} Modified: hadoop/common/branches/branch-0.20-security-patches/src/webapps/hdfs/dfshealth.jsp URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/webapps/hdfs/dfshealth.jsp?rev=1077058&r1=1077057&r2=1077058&view=diff ============================================================================== --- hadoop/common/branches/branch-0.20-security-patches/src/webapps/hdfs/dfshealth.jsp (original) +++ hadoop/common/branches/branch-0.20-security-patches/src/webapps/hdfs/dfshealth.jsp Fri Mar 4 03:36:27 2011 @@ -163,6 +163,9 @@ ArrayList dead = new ArrayList(); jspHelper.DFSNodesStatus(live, dead); + ArrayList decommissioning = fsn + .getDecommissioningNodes(); + sorterField = request.getParameter("sorter/field"); sorterOrder = request.getParameter("sorter/order"); if ( sorterField == null ) @@ -214,8 +217,14 @@ colTxt() + ":" + colTxt() + live.size() + rowTxt() + colTxt() + "Dead Nodes " + - colTxt() + ":" + colTxt() + dead.size() + - "
\n" ); + colTxt() + ":" + colTxt() + dead.size() + rowTxt() + colTxt() + + "" + + "Decommissioning Nodes " + + colTxt() + ":" + colTxt() + decommissioning.size() + + rowTxt() + colTxt() + + "Number of Under-Replicated Blocks" + colTxt() + ":" + colTxt() + + fsn.getUnderReplicatedBlocks() + + "
\n" ); if (live.isEmpty() && dead.isEmpty()) { out.print("There are no datanodes in the cluster"); Modified: hadoop/common/branches/branch-0.20-security-patches/src/webapps/hdfs/dfsnodelist.jsp URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/webapps/hdfs/dfsnodelist.jsp?rev=1077058&r1=1077057&r2=1077058&view=diff ============================================================================== --- hadoop/common/branches/branch-0.20-security-patches/src/webapps/hdfs/dfsnodelist.jsp (original) +++ hadoop/common/branches/branch-0.20-security-patches/src/webapps/hdfs/dfsnodelist.jsp Fri Mar 4 03:36:27 2011 @@ -49,6 +49,45 @@ String NodeHeaderStr(String name) { return ret; } +void generateDecommissioningNodeData(JspWriter out, DatanodeDescriptor d, + String suffix, boolean alive, int nnHttpPort) throws IOException { + String url = "http://" + d.getHostName() + ":" + d.getInfoPort() + + "/browseDirectory.jsp?namenodeInfoPort=" + nnHttpPort + "&dir=" + + URLEncoder.encode("/", "UTF-8"); + + String name = d.getHostName() + ":" + d.getPort(); + if (!name.matches("\\d+\\.\\d+.\\d+\\.\\d+.*")) + name = name.replaceAll("\\.[^.:]*", ""); + int idx = (suffix != null && name.endsWith(suffix)) ? name + .indexOf(suffix) : -1; + + out.print(rowTxt() + "" + + ((idx > 0) ? name.substring(0, idx) : name) + "" + + ((alive) ? "" : "\n")); + if (!alive) { + return; + } + + long decommRequestTime = d.decommissioningStatus.getStartTime(); + long timestamp = d.getLastUpdate(); + long currentTime = System.currentTimeMillis(); + long hoursSinceDecommStarted = (currentTime - decommRequestTime)/3600000; + long remainderMinutes = ((currentTime - decommRequestTime)/60000) % 60; + out.print(" " + + ((currentTime - timestamp) / 1000) + + "" + + d.decommissioningStatus.getUnderReplicatedBlocks() + + "" + + d.decommissioningStatus.getDecommissionOnlyReplicas() + + "" + + d.decommissioningStatus.getUnderReplicatedInOpenFiles() + + "" + + hoursSinceDecommStarted + " hrs " + remainderMinutes + " mins" + + "\n"); +} + + public void generateNodeData( JspWriter out, DatanodeDescriptor d, String suffix, boolean alive, int nnHttpPort ) @@ -209,7 +248,7 @@ throws IOException { } } out.print("\n"); - } else { + } else if (whatNodes.equals("DEAD")) { out.print("
" + " Dead Datanodes : " +dead.size() + "

\n"); @@ -225,8 +264,36 @@ throws IOException { out.print("\n"); } - } - out.print(""); + } else if (whatNodes.equals("DECOMMISSIONING")) { + // Decommissioning Nodes + ArrayList decommissioning = nn.getNamesystem() + .getDecommissioningNodes(); + out.print("
" + + " Decommissioning Datanodes : " + decommissioning.size() + + "

\n"); + if (decommissioning.size() > 0) { + out.print(" " + + "
Node Last
Contact
Under Replicated Blocks Blocks With No
Live Replicas
Under Replicated Blocks
In Files Under Construction" + + "
Time Since Decommissioning Started" + ); + jspHelper.sortNodeList(decommissioning, "name", "ASC"); + for (int i = 0; i < decommissioning.size(); i++) { + generateDecommissioningNodeData(out, decommissioning.get(i), + port_suffix, true, nnHttpPort); + } + out.print("
\n"); + } + out.print(""); + } } }%>