hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cmcc...@apache.org
Subject hadoop git commit: HADOOP-11416. Move ChunkedArrayList into hadoop-common (cmccabe) (cherry picked from commit 4281c96e24739387bc2084f819c0176d0051a5e9)
Date Wed, 17 Dec 2014 18:37:09 GMT
Repository: hadoop
Updated Branches:
  refs/heads/branch-2 36068768d -> 89d6ac498


HADOOP-11416. Move ChunkedArrayList into hadoop-common (cmccabe)
(cherry picked from commit 4281c96e24739387bc2084f819c0176d0051a5e9)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/89d6ac49
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/89d6ac49
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/89d6ac49

Branch: refs/heads/branch-2
Commit: 89d6ac498adccbb9633c854f8f0e4c7ba9040bac
Parents: 3606876
Author: Colin Patrick Mccabe <cmccabe@cloudera.com>
Authored: Tue Dec 16 17:05:27 2014 -0800
Committer: Colin Patrick Mccabe <cmccabe@cloudera.com>
Committed: Wed Dec 17 10:36:59 2014 -0800

----------------------------------------------------------------------
 hadoop-common-project/hadoop-common/CHANGES.txt |   2 +
 .../apache/hadoop/util/ChunkedArrayList.java    | 171 +++++++++++++++++++
 .../hadoop/util/TestChunkedArrayList.java       |  93 ++++++++++
 .../hdfs/server/namenode/FSDirRenameOp.java     |   2 +-
 .../hdfs/server/namenode/FSDirSnapshotOp.java   |   2 +-
 .../hdfs/server/namenode/FSDirectory.java       |   2 +-
 .../hdfs/server/namenode/FSEditLogLoader.java   |   2 +-
 .../hdfs/server/namenode/FSNamesystem.java      |   2 +-
 .../hadoop/hdfs/server/namenode/INode.java      |   2 +-
 .../hadoop/hdfs/util/ChunkedArrayList.java      | 171 -------------------
 .../hadoop/hdfs/util/TestChunkedArrayList.java  |  93 ----------
 11 files changed, 272 insertions(+), 270 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/89d6ac49/hadoop-common-project/hadoop-common/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index 83a7d68..8d23cf1 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -56,6 +56,8 @@ Release 2.7.0 - UNRELEASED
 
     HADOOP-11410. Make the rpath of libhadoop.so configurable (cmccabe)
 
+    HADOOP-11416. Move ChunkedArrayList into hadoop-common (cmccabe)
+
   OPTIMIZATIONS
 
     HADOOP-11323. WritableComparator#compare keeps reference to byte array.

http://git-wip-us.apache.org/repos/asf/hadoop/blob/89d6ac49/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ChunkedArrayList.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ChunkedArrayList.java
b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ChunkedArrayList.java
new file mode 100644
index 0000000..42cd324
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ChunkedArrayList.java
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util;
+
+import java.util.AbstractList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+
+/**
+ * Simplified List implementation which stores elements as a list
+ * of chunks, each chunk having a maximum size. This improves over
+ * using an ArrayList in that creating a large list will never require
+ * a large amount of contiguous heap space -- thus reducing the likelihood
+ * of triggering a CMS compaction pause due to heap fragmentation.
+ * 
+ * The first chunks allocated are small, but each additional chunk is
+ * 50% larger than the previous, ramping up to a configurable maximum
+ * chunk size. Reasonable defaults are provided which should be a good
+ * balance between not making any large allocations while still retaining
+ * decent performance.
+ *
+ * This currently only supports a small subset of List operations --
+ * namely addition and iteration.
+ */
+@InterfaceAudience.Private
+public class ChunkedArrayList<T> extends AbstractList<T> {
+
+  /**
+   * The chunks which make up the full list.
+   */
+  private final List<List<T>> chunks = Lists.newArrayList();
+  
+  /**
+   * Cache of the last element in the 'chunks' array above.
+   * This speeds up the add operation measurably.
+   */
+  private List<T> lastChunk = null;
+
+  /**
+   * The capacity with which the last chunk was allocated.
+   */
+  private int lastChunkCapacity;
+  
+  /**
+   * The capacity of the first chunk to allocate in a cleared list.
+   */
+  private final int initialChunkCapacity;
+  
+  /**
+   * The maximum number of elements for any chunk.
+   */
+  private final int maxChunkSize;
+
+  /**
+   * Total number of elements in the list.
+   */
+  private int size;
+  
+  /**
+   * Default initial size is 6 elements, since typical minimum object
+   * size is 64 bytes, and this leaves enough space for the object
+   * header.
+   */
+  private static final int DEFAULT_INITIAL_CHUNK_CAPACITY = 6;
+  
+  /**
+   * Default max size is 8K elements - which, at 8 bytes per element
+   * should be about 64KB -- small enough to easily fit in contiguous
+   * free heap space even with a fair amount of fragmentation.
+   */
+  private static final int DEFAULT_MAX_CHUNK_SIZE = 8*1024;
+  
+
+  public ChunkedArrayList() {
+    this(DEFAULT_INITIAL_CHUNK_CAPACITY, DEFAULT_MAX_CHUNK_SIZE);
+  }
+
+  /**
+   * @param initialChunkCapacity the capacity of the first chunk to be
+   * allocated
+   * @param maxChunkSize the maximum size of any chunk allocated
+   */
+  public ChunkedArrayList(int initialChunkCapacity, int maxChunkSize) {
+    Preconditions.checkArgument(maxChunkSize >= initialChunkCapacity);
+    this.initialChunkCapacity = initialChunkCapacity;
+    this.maxChunkSize = maxChunkSize;
+  }
+
+  @Override
+  public Iterator<T> iterator() {
+    return Iterables.concat(chunks).iterator();
+  }
+
+  @Override
+  public boolean add(T e) {
+    if (lastChunk == null) {
+      addChunk(initialChunkCapacity);
+    } else if (lastChunk.size() >= lastChunkCapacity) {
+      int newCapacity = lastChunkCapacity + (lastChunkCapacity >> 1);
+      addChunk(Math.min(newCapacity, maxChunkSize));
+    }
+    size++;
+    return lastChunk.add(e);
+  }
+
+  @Override
+  public void clear() {
+    chunks.clear();
+    lastChunk = null;
+    lastChunkCapacity = 0;
+    size = 0;
+  }
+  
+  private void addChunk(int capacity) {
+    lastChunk = Lists.newArrayListWithCapacity(capacity);
+    chunks.add(lastChunk);
+    lastChunkCapacity = capacity;
+  }
+
+  @Override
+  public boolean isEmpty() {
+    return size == 0;
+  }
+
+  @Override
+  public int size() {
+    return size;
+  }
+  
+  @VisibleForTesting
+  int getNumChunks() {
+    return chunks.size();
+  }
+  
+  @VisibleForTesting
+  int getMaxChunkSize() {
+    int size = 0;
+    for (List<T> chunk : chunks) {
+      size = Math.max(size, chunk.size());
+    }
+    return size;
+  }
+
+  @Override
+  public T get(int arg0) {
+    throw new UnsupportedOperationException(
+        this.getClass().getName() + " does not support random access");
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/89d6ac49/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestChunkedArrayList.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestChunkedArrayList.java
b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestChunkedArrayList.java
new file mode 100644
index 0000000..ad17094
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestChunkedArrayList.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util;
+
+import static org.junit.Assert.*;
+
+import java.util.ArrayList;
+
+import org.junit.Test;
+
+import com.google.common.base.Stopwatch;
+
+public class TestChunkedArrayList {
+
+  @Test
+  public void testBasics() {
+    final int N_ELEMS = 100000;
+    ChunkedArrayList<Integer> l = new ChunkedArrayList<Integer>();
+    assertTrue(l.isEmpty());
+    // Insert a bunch of elements.
+    for (int i = 0; i < N_ELEMS; i++) {
+      l.add(i);
+    }
+    assertFalse(l.isEmpty());
+    assertEquals(N_ELEMS, l.size());
+
+    // Check that it got chunked.
+    assertTrue(l.getNumChunks() > 10);
+    assertEquals(8192, l.getMaxChunkSize());
+  }
+  
+  @Test
+  public void testIterator() {
+    ChunkedArrayList<Integer> l = new ChunkedArrayList<Integer>();
+    for (int i = 0; i < 30000; i++) {
+      l.add(i);
+    }
+    
+    int i = 0;
+    for (int fromList : l) {
+      assertEquals(i, fromList);
+      i++;
+    }
+  }
+  
+  @Test
+  public void testPerformance() {
+    String obj = "hello world";
+    
+    final int numElems = 1000000;
+    final int numTrials = 5;
+    
+    for (int trial = 0; trial < numTrials; trial++) {
+      System.gc();
+      {
+        ArrayList<String> arrayList = new ArrayList<String>();
+        Stopwatch sw = new Stopwatch();
+        sw.start();
+        for (int i = 0; i < numElems; i++) {
+          arrayList.add(obj);
+        }
+        System.out.println("       ArrayList " + sw.elapsedMillis());
+      }
+      
+      // test ChunkedArrayList
+      System.gc();
+      {
+        ChunkedArrayList<String> chunkedList = new ChunkedArrayList<String>();
+        Stopwatch sw = new Stopwatch();
+        sw.start();
+        for (int i = 0; i < numElems; i++) {
+          chunkedList.add(obj);
+        }
+        System.out.println("ChunkedArrayList " + sw.elapsedMillis());
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/89d6ac49/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirRenameOp.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirRenameOp.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirRenameOp.java
index e3020ea..4b4dc8c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirRenameOp.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirRenameOp.java
@@ -30,8 +30,8 @@ import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
 import org.apache.hadoop.hdfs.protocol.SnapshotException;
 import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
-import org.apache.hadoop.hdfs.util.ChunkedArrayList;
 import org.apache.hadoop.hdfs.util.ReadOnlyList;
+import org.apache.hadoop.util.ChunkedArrayList;
 import org.apache.hadoop.util.Time;
 
 import java.io.FileNotFoundException;

http://git-wip-us.apache.org/repos/asf/hadoop/blob/89d6ac49/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirSnapshotOp.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirSnapshotOp.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirSnapshotOp.java
index ea7dc24..833bc30 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirSnapshotOp.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirSnapshotOp.java
@@ -29,7 +29,7 @@ import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectorySnapshottableFeature;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotManager;
-import org.apache.hadoop.hdfs.util.ChunkedArrayList;
+import org.apache.hadoop.util.ChunkedArrayList;
 
 import java.io.IOException;
 import java.util.List;

http://git-wip-us.apache.org/repos/asf/hadoop/blob/89d6ac49/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
index fbec786..3f7310a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
@@ -59,7 +59,7 @@ import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
 import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
 import org.apache.hadoop.hdfs.util.ByteArray;
-import org.apache.hadoop.hdfs.util.ChunkedArrayList;
+import org.apache.hadoop.util.ChunkedArrayList;
 import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.slf4j.Logger;

http://git-wip-us.apache.org/repos/asf/hadoop/blob/89d6ac49/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
index b610cc4..b07e1e2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
@@ -96,8 +96,8 @@ import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase;
 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter;
 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step;
-import org.apache.hadoop.hdfs.util.ChunkedArrayList;
 import org.apache.hadoop.hdfs.util.Holder;
+import org.apache.hadoop.util.ChunkedArrayList;
 
 import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;

http://git-wip-us.apache.org/repos/asf/hadoop/blob/89d6ac49/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index e2e0eb0..78b8d84 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -256,7 +256,6 @@ import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
 import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks;
 import org.apache.hadoop.hdfs.server.protocol.StorageReport;
-import org.apache.hadoop.hdfs.util.ChunkedArrayList;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.ipc.RetriableException;
@@ -277,6 +276,7 @@ import org.apache.hadoop.security.token.SecretManager.InvalidToken;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.TokenIdentifier;
 import org.apache.hadoop.security.token.delegation.DelegationKey;
+import org.apache.hadoop.util.ChunkedArrayList;
 import org.apache.hadoop.util.Daemon;
 import org.apache.hadoop.util.DataChecksum;
 import org.apache.hadoop.util.StringUtils;

http://git-wip-us.apache.org/repos/asf/hadoop/blob/89d6ac49/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java
index 55430b7..41b2391 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java
@@ -37,8 +37,8 @@ import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
 import org.apache.hadoop.hdfs.server.namenode.INodeReference.DstReference;
 import org.apache.hadoop.hdfs.server.namenode.INodeReference.WithName;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
-import org.apache.hadoop.hdfs.util.ChunkedArrayList;
 import org.apache.hadoop.hdfs.util.Diff;
+import org.apache.hadoop.util.ChunkedArrayList;
 import org.apache.hadoop.util.StringUtils;
 
 import com.google.common.annotations.VisibleForTesting;

http://git-wip-us.apache.org/repos/asf/hadoop/blob/89d6ac49/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/ChunkedArrayList.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/ChunkedArrayList.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/ChunkedArrayList.java
deleted file mode 100644
index 89a0db6..0000000
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/ChunkedArrayList.java
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hdfs.util;
-
-import java.util.AbstractList;
-import java.util.Iterator;
-import java.util.List;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Lists;
-
-/**
- * Simplified List implementation which stores elements as a list
- * of chunks, each chunk having a maximum size. This improves over
- * using an ArrayList in that creating a large list will never require
- * a large amount of contiguous heap space -- thus reducing the likelihood
- * of triggering a CMS compaction pause due to heap fragmentation.
- * 
- * The first chunks allocated are small, but each additional chunk is
- * 50% larger than the previous, ramping up to a configurable maximum
- * chunk size. Reasonable defaults are provided which should be a good
- * balance between not making any large allocations while still retaining
- * decent performance.
- *
- * This currently only supports a small subset of List operations --
- * namely addition and iteration.
- */
-@InterfaceAudience.Private
-public class ChunkedArrayList<T> extends AbstractList<T> {
-
-  /**
-   * The chunks which make up the full list.
-   */
-  private final List<List<T>> chunks = Lists.newArrayList();
-  
-  /**
-   * Cache of the last element in the 'chunks' array above.
-   * This speeds up the add operation measurably.
-   */
-  private List<T> lastChunk = null;
-
-  /**
-   * The capacity with which the last chunk was allocated.
-   */
-  private int lastChunkCapacity;
-  
-  /**
-   * The capacity of the first chunk to allocate in a cleared list.
-   */
-  private final int initialChunkCapacity;
-  
-  /**
-   * The maximum number of elements for any chunk.
-   */
-  private final int maxChunkSize;
-
-  /**
-   * Total number of elements in the list.
-   */
-  private int size;
-  
-  /**
-   * Default initial size is 6 elements, since typical minimum object
-   * size is 64 bytes, and this leaves enough space for the object
-   * header.
-   */
-  private static final int DEFAULT_INITIAL_CHUNK_CAPACITY = 6;
-  
-  /**
-   * Default max size is 8K elements - which, at 8 bytes per element
-   * should be about 64KB -- small enough to easily fit in contiguous
-   * free heap space even with a fair amount of fragmentation.
-   */
-  private static final int DEFAULT_MAX_CHUNK_SIZE = 8*1024;
-  
-
-  public ChunkedArrayList() {
-    this(DEFAULT_INITIAL_CHUNK_CAPACITY, DEFAULT_MAX_CHUNK_SIZE);
-  }
-
-  /**
-   * @param initialChunkCapacity the capacity of the first chunk to be
-   * allocated
-   * @param maxChunkSize the maximum size of any chunk allocated
-   */
-  public ChunkedArrayList(int initialChunkCapacity, int maxChunkSize) {
-    Preconditions.checkArgument(maxChunkSize >= initialChunkCapacity);
-    this.initialChunkCapacity = initialChunkCapacity;
-    this.maxChunkSize = maxChunkSize;
-  }
-
-  @Override
-  public Iterator<T> iterator() {
-    return Iterables.concat(chunks).iterator();
-  }
-
-  @Override
-  public boolean add(T e) {
-    if (lastChunk == null) {
-      addChunk(initialChunkCapacity);
-    } else if (lastChunk.size() >= lastChunkCapacity) {
-      int newCapacity = lastChunkCapacity + (lastChunkCapacity >> 1);
-      addChunk(Math.min(newCapacity, maxChunkSize));
-    }
-    size++;
-    return lastChunk.add(e);
-  }
-
-  @Override
-  public void clear() {
-    chunks.clear();
-    lastChunk = null;
-    lastChunkCapacity = 0;
-    size = 0;
-  }
-  
-  private void addChunk(int capacity) {
-    lastChunk = Lists.newArrayListWithCapacity(capacity);
-    chunks.add(lastChunk);
-    lastChunkCapacity = capacity;
-  }
-
-  @Override
-  public boolean isEmpty() {
-    return size == 0;
-  }
-
-  @Override
-  public int size() {
-    return size;
-  }
-  
-  @VisibleForTesting
-  int getNumChunks() {
-    return chunks.size();
-  }
-  
-  @VisibleForTesting
-  int getMaxChunkSize() {
-    int size = 0;
-    for (List<T> chunk : chunks) {
-      size = Math.max(size, chunk.size());
-    }
-    return size;
-  }
-
-  @Override
-  public T get(int arg0) {
-    throw new UnsupportedOperationException(
-        this.getClass().getName() + " does not support random access");
-  }
-}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/89d6ac49/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestChunkedArrayList.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestChunkedArrayList.java
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestChunkedArrayList.java
deleted file mode 100644
index a1e49cc..0000000
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestChunkedArrayList.java
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hdfs.util;
-
-import static org.junit.Assert.*;
-
-import java.util.ArrayList;
-
-import org.junit.Test;
-
-import com.google.common.base.Stopwatch;
-
-public class TestChunkedArrayList {
-
-  @Test
-  public void testBasics() {
-    final int N_ELEMS = 100000;
-    ChunkedArrayList<Integer> l = new ChunkedArrayList<Integer>();
-    assertTrue(l.isEmpty());
-    // Insert a bunch of elements.
-    for (int i = 0; i < N_ELEMS; i++) {
-      l.add(i);
-    }
-    assertFalse(l.isEmpty());
-    assertEquals(N_ELEMS, l.size());
-
-    // Check that it got chunked.
-    assertTrue(l.getNumChunks() > 10);
-    assertEquals(8192, l.getMaxChunkSize());
-  }
-  
-  @Test
-  public void testIterator() {
-    ChunkedArrayList<Integer> l = new ChunkedArrayList<Integer>();
-    for (int i = 0; i < 30000; i++) {
-      l.add(i);
-    }
-    
-    int i = 0;
-    for (int fromList : l) {
-      assertEquals(i, fromList);
-      i++;
-    }
-  }
-  
-  @Test
-  public void testPerformance() {
-    String obj = "hello world";
-    
-    final int numElems = 1000000;
-    final int numTrials = 5;
-    
-    for (int trial = 0; trial < numTrials; trial++) {
-      System.gc();
-      {
-        ArrayList<String> arrayList = new ArrayList<String>();
-        Stopwatch sw = new Stopwatch();
-        sw.start();
-        for (int i = 0; i < numElems; i++) {
-          arrayList.add(obj);
-        }
-        System.out.println("       ArrayList " + sw.elapsedMillis());
-      }
-      
-      // test ChunkedArrayList
-      System.gc();
-      {
-        ChunkedArrayList<String> chunkedList = new ChunkedArrayList<String>();
-        Stopwatch sw = new Stopwatch();
-        sw.start();
-        for (int i = 0; i < numElems; i++) {
-          chunkedList.add(obj);
-        }
-        System.out.println("ChunkedArrayList " + sw.elapsedMillis());
-      }
-    }
-  }
-}


Mime
View raw message