Return-Path: X-Original-To: apmail-hadoop-hdfs-commits-archive@minotaur.apache.org Delivered-To: apmail-hadoop-hdfs-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 2599C10F15 for ; Fri, 6 Sep 2013 19:05:55 +0000 (UTC) Received: (qmail 42533 invoked by uid 500); 6 Sep 2013 19:05:54 -0000 Delivered-To: apmail-hadoop-hdfs-commits-archive@hadoop.apache.org Received: (qmail 42446 invoked by uid 500); 6 Sep 2013 19:05:54 -0000 Mailing-List: contact hdfs-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hdfs-dev@hadoop.apache.org Delivered-To: mailing list hdfs-commits@hadoop.apache.org Received: (qmail 42438 invoked by uid 99); 6 Sep 2013 19:05:53 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 06 Sep 2013 19:05:53 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 06 Sep 2013 19:05:49 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id F136023888CD; Fri, 6 Sep 2013 19:05:26 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1520667 - in /hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs: ./ src/main/java/org/apache/hadoop/hdfs/server/namenode/ src/main/java/org/apache/hadoop/hdfs/util/ src/test/java/org/apache/hadoop/hdfs/util/ Date: Fri, 06 Sep 2013 19:05:26 -0000 To: hdfs-commits@hadoop.apache.org From: cmccabe@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20130906190526.F136023888CD@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: cmccabe Date: Fri Sep 6 19:05:26 2013 New Revision: 1520667 URL: http://svn.apache.org/r1520667 Log: HDFS-4879. Add BlockedArrayList collection to avoid CMS full GCs (Contributed by Todd Lipcon) Added: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/ChunkedArrayList.java hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestChunkedArrayList.java Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1520667&r1=1520666&r2=1520667&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original) +++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Fri Sep 6 19:05:26 2013 @@ -269,6 +269,9 @@ Release 2.3.0 - UNRELEASED HDFS-4491. Parallel testing HDFS. (Andrey Klochkov via cnauroth) + HDFS-4879. Add "blocked ArrayList" collection to avoid CMS full GCs + (Todd Lipcon via Colin Patrick McCabe) + OPTIMIZATIONS BUG FIXES Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java?rev=1520667&r1=1520666&r2=1520667&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java (original) +++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java Fri Sep 6 19:05:26 2013 @@ -69,6 +69,7 @@ import org.apache.hadoop.hdfs.server.nam import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot; import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot.Root; import org.apache.hadoop.hdfs.util.ByteArray; +import org.apache.hadoop.hdfs.util.ChunkedArrayList; import org.apache.hadoop.hdfs.util.ReadOnlyList; import com.google.common.annotations.VisibleForTesting; @@ -949,7 +950,7 @@ public class FSDirectory implements Clos if (removedDst != null) { undoRemoveDst = false; BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo(); - List removedINodes = new ArrayList(); + List removedINodes = new ChunkedArrayList(); filesDeleted = removedDst.cleanSubtree(null, dstIIP.getLatestSnapshot(), collectedBlocks, removedINodes, true) .get(Quota.NAMESPACE); @@ -1363,7 +1364,7 @@ public class FSDirectory implements Clos QuotaExceededException, SnapshotAccessControlException { assert hasWriteLock(); BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo(); - List removedINodes = new ArrayList(); + List removedINodes = new ChunkedArrayList(); final INodesInPath inodesInPath = rootDir.getINodesInPath4Write( normalizePath(src), false); Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java?rev=1520667&r1=1520666&r2=1520667&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java (original) +++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java Fri Sep 6 19:05:26 2013 @@ -22,7 +22,6 @@ import static org.apache.hadoop.util.Tim import java.io.FilterInputStream; import java.io.IOException; import java.io.InputStream; -import java.util.ArrayList; import java.util.Arrays; import java.util.EnumMap; import java.util.List; @@ -75,6 +74,7 @@ import org.apache.hadoop.hdfs.server.nam import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress; import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter; import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step; +import org.apache.hadoop.hdfs.util.ChunkedArrayList; import org.apache.hadoop.hdfs.util.Holder; import com.google.common.base.Joiner; @@ -582,7 +582,7 @@ public class FSEditLogLoader { case OP_DELETE_SNAPSHOT: { DeleteSnapshotOp deleteSnapshotOp = (DeleteSnapshotOp) op; BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo(); - List removedINodes = new ArrayList(); + List removedINodes = new ChunkedArrayList(); fsNamesys.getSnapshotManager().deleteSnapshot( deleteSnapshotOp.snapshotRoot, deleteSnapshotOp.snapshotName, collectedBlocks, removedINodes); Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1520667&r1=1520666&r2=1520667&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java (original) +++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Fri Sep 6 19:05:26 2013 @@ -201,6 +201,7 @@ import org.apache.hadoop.hdfs.server.pro import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration; import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo; +import org.apache.hadoop.hdfs.util.ChunkedArrayList; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.Text; import org.apache.hadoop.ipc.RetryCache; @@ -3130,7 +3131,7 @@ public class FSNamesystem implements Nam throws AccessControlException, SafeModeException, UnresolvedLinkException, IOException { BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo(); - List removedINodes = new ArrayList(); + List removedINodes = new ChunkedArrayList(); FSPermissionChecker pc = getPermissionChecker(); checkOperation(OperationCategory.WRITE); byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); @@ -3184,21 +3185,17 @@ public class FSNamesystem implements Nam * of blocks that need to be removed from blocksMap */ void removeBlocks(BlocksMapUpdateInfo blocks) { - int start = 0; - int end = 0; List toDeleteList = blocks.getToDeleteList(); - while (start < toDeleteList.size()) { - end = BLOCK_DELETION_INCREMENT + start; - end = end > toDeleteList.size() ? toDeleteList.size() : end; + Iterator iter = toDeleteList.iterator(); + while (iter.hasNext()) { writeLock(); try { - for (int i = start; i < end; i++) { - blockManager.removeBlock(toDeleteList.get(i)); + for (int i = 0; i < BLOCK_DELETION_INCREMENT && iter.hasNext(); i++) { + blockManager.removeBlock(iter.next()); } } finally { writeUnlock(); } - start = end; } } @@ -6778,7 +6775,7 @@ public class FSNamesystem implements Nam checkOwner(pc, snapshotRoot); BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo(); - List removedINodes = new ArrayList(); + List removedINodes = new ChunkedArrayList(); dir.writeLock(); try { snapshotManager.deleteSnapshot(snapshotRoot, snapshotName, Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java?rev=1520667&r1=1520666&r2=1520667&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java (original) +++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java Fri Sep 6 19:05:26 2013 @@ -38,6 +38,7 @@ import org.apache.hadoop.hdfs.server.nam import org.apache.hadoop.hdfs.server.namenode.snapshot.FileWithSnapshot; import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectoryWithSnapshot; import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot; +import org.apache.hadoop.hdfs.util.ChunkedArrayList; import org.apache.hadoop.hdfs.util.Diff; import org.apache.hadoop.util.StringUtils; @@ -707,7 +708,7 @@ public abstract class INode implements I } public BlocksMapUpdateInfo() { - toDeleteList = new ArrayList(); + toDeleteList = new ChunkedArrayList(); } /** Added: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/ChunkedArrayList.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/ChunkedArrayList.java?rev=1520667&view=auto ============================================================================== --- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/ChunkedArrayList.java (added) +++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/ChunkedArrayList.java Fri Sep 6 19:05:26 2013 @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.util; + +import java.util.AbstractList; +import java.util.Iterator; +import java.util.List; + +import org.apache.hadoop.classification.InterfaceAudience; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; + +/** + * Simplified List implementation which stores elements as a list + * of chunks, each chunk having a maximum size. This improves over + * using an ArrayList in that creating a large list will never require + * a large amount of contiguous heap space -- thus reducing the likelihood + * of triggering a CMS compaction pause due to heap fragmentation. + * + * The first chunks allocated are small, but each additional chunk is + * 50% larger than the previous, ramping up to a configurable maximum + * chunk size. Reasonable defaults are provided which should be a good + * balance between not making any large allocations while still retaining + * decent performance. + * + * This currently only supports a small subset of List operations -- + * namely addition and iteration. + */ +@InterfaceAudience.Private +public class ChunkedArrayList extends AbstractList { + + /** + * The chunks which make up the full list. + */ + private final List> chunks = Lists.newArrayList(); + + /** + * Cache of the last element in the 'chunks' array above. + * This speeds up the add operation measurably. + */ + private List lastChunk = null; + + /** + * The capacity with which the last chunk was allocated. + */ + private int lastChunkCapacity; + + /** + * The capacity of the first chunk to allocate in a cleared list. + */ + private final int initialChunkCapacity; + + /** + * The maximum number of elements for any chunk. + */ + private final int maxChunkSize; + + /** + * Total number of elements in the list. + */ + private int size; + + /** + * Default initial size is 6 elements, since typical minimum object + * size is 64 bytes, and this leaves enough space for the object + * header. + */ + private static final int DEFAULT_INITIAL_CHUNK_CAPACITY = 6; + + /** + * Default max size is 8K elements - which, at 8 bytes per element + * should be about 64KB -- small enough to easily fit in contiguous + * free heap space even with a fair amount of fragmentation. + */ + private static final int DEFAULT_MAX_CHUNK_SIZE = 8*1024; + + + public ChunkedArrayList() { + this(DEFAULT_INITIAL_CHUNK_CAPACITY, DEFAULT_MAX_CHUNK_SIZE); + } + + /** + * @param initialChunkCapacity the capacity of the first chunk to be + * allocated + * @param maxChunkSize the maximum size of any chunk allocated + */ + public ChunkedArrayList(int initialChunkCapacity, int maxChunkSize) { + Preconditions.checkArgument(maxChunkSize >= initialChunkCapacity); + this.initialChunkCapacity = initialChunkCapacity; + this.maxChunkSize = maxChunkSize; + } + + @Override + public Iterator iterator() { + return Iterables.concat(chunks).iterator(); + } + + @Override + public boolean add(T e) { + if (lastChunk == null) { + addChunk(initialChunkCapacity); + } else if (lastChunk.size() >= lastChunkCapacity) { + int newCapacity = lastChunkCapacity + (lastChunkCapacity >> 1); + addChunk(Math.min(newCapacity, maxChunkSize)); + } + size++; + return lastChunk.add(e); + } + + @Override + public void clear() { + chunks.clear(); + lastChunk = null; + lastChunkCapacity = 0; + size = 0; + } + + private void addChunk(int capacity) { + lastChunk = Lists.newArrayListWithCapacity(capacity); + chunks.add(lastChunk); + lastChunkCapacity = capacity; + } + + @Override + public boolean isEmpty() { + return size == 0; + } + + @Override + public int size() { + return size; + } + + @VisibleForTesting + int getNumChunks() { + return chunks.size(); + } + + @VisibleForTesting + int getMaxChunkSize() { + int size = 0; + for (List chunk : chunks) { + size = Math.max(size, chunk.size()); + } + return size; + } + + @Override + public T get(int arg0) { + throw new UnsupportedOperationException( + this.getClass().getName() + " does not support random access"); + } +} Added: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestChunkedArrayList.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestChunkedArrayList.java?rev=1520667&view=auto ============================================================================== --- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestChunkedArrayList.java (added) +++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestChunkedArrayList.java Fri Sep 6 19:05:26 2013 @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.util; + +import static org.junit.Assert.*; + +import java.util.ArrayList; + +import org.junit.Test; + +import com.google.common.base.Stopwatch; + +public class TestChunkedArrayList { + + @Test + public void testBasics() { + final int N_ELEMS = 100000; + ChunkedArrayList l = new ChunkedArrayList(); + assertTrue(l.isEmpty()); + // Insert a bunch of elements. + for (int i = 0; i < N_ELEMS; i++) { + l.add(i); + } + assertFalse(l.isEmpty()); + assertEquals(N_ELEMS, l.size()); + + // Check that it got chunked. + assertTrue(l.getNumChunks() > 10); + assertEquals(8192, l.getMaxChunkSize()); + } + + @Test + public void testIterator() { + ChunkedArrayList l = new ChunkedArrayList(); + for (int i = 0; i < 30000; i++) { + l.add(i); + } + + int i = 0; + for (int fromList : l) { + assertEquals(i, fromList); + i++; + } + } + + @Test + public void testPerformance() { + String obj = "hello world"; + + final int numElems = 1000000; + final int numTrials = 5; + + for (int trial = 0; trial < numTrials; trial++) { + System.gc(); + { + ArrayList arrayList = new ArrayList(); + Stopwatch sw = new Stopwatch(); + sw.start(); + for (int i = 0; i < numElems; i++) { + arrayList.add(obj); + } + System.out.println(" ArrayList " + sw.elapsedMillis()); + } + + // test ChunkedArrayList + System.gc(); + { + ChunkedArrayList chunkedList = new ChunkedArrayList(); + Stopwatch sw = new Stopwatch(); + sw.start(); + for (int i = 0; i < numElems; i++) { + chunkedList.add(obj); + } + System.out.println("ChunkedArrayList " + sw.elapsedMillis()); + } + } + } +}