arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject arrow git commit: ARROW-1627: New class to handle collection of BufferLedger(s) within …
Date Thu, 05 Oct 2017 03:19:08 GMT
Repository: arrow
Updated Branches:
  refs/heads/master 31d33e079 -> dc129d60f


ARROW-1627: New class to handle collection of BufferLedger(s) within …

…AllocationManager

LowCostIdentityHashMap uses array only to store objects since key (BaseAllocator) is already
included in the value (BufferLedger)
To make this class a bit more generic it uses values that implement ValueWithKeyIncluded interface
that provides API : getKey()
BufferLedger provides implementation for it.
LowCostIdentityHashMap is not general purpose map it just provides some “map like” APIs
to simplify switch over from IdentityHashMap.

Author: Yuliya Feldman <yuliya@dremio.com>

Closes #1150 from yufeldman/ARROW-1627-master and squashes the following commits:

dcd5e293 [Yuliya Feldman] ARROW-1627: New class to handle collection of BufferLedger(s) within
AllocationManager LowCostIdentityHashMap uses array only to store objects since key (BaseAllocator)
is already included in the value (BufferLedger) To make this class a bit more generic it uses
values that implement ValueWithKeyIncluded interface that provides API : getKey() BufferLedger
provides implementation for it. LowCostIdentityHashMap is not general purpose map it just
provides some “map like” APIs to simplify switch over from IdentityHashMap.


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/dc129d60
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/dc129d60
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/dc129d60

Branch: refs/heads/master
Commit: dc129d60fbffbf3a5b71b1f7987f7dab948b3d61
Parents: 31d33e0
Author: Yuliya Feldman <yuliya@dremio.com>
Authored: Wed Oct 4 23:18:58 2017 -0400
Committer: Wes McKinney <wes.mckinney@twosigma.com>
Committed: Wed Oct 4 23:18:58 2017 -0400

----------------------------------------------------------------------
 .../apache/arrow/memory/AllocationManager.java  |  17 +-
 .../arrow/memory/LowCostIdentityHasMap.java     | 334 +++++++++++++++++++
 .../arrow/memory/ValueWithKeyIncluded.java      |  26 ++
 .../arrow/memory/TestLowCostIdentityHasMap.java | 169 ++++++++++
 4 files changed, 541 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/dc129d60/java/memory/src/main/java/org/apache/arrow/memory/AllocationManager.java
----------------------------------------------------------------------
diff --git a/java/memory/src/main/java/org/apache/arrow/memory/AllocationManager.java b/java/memory/src/main/java/org/apache/arrow/memory/AllocationManager.java
index 6877c18..14687b5 100644
--- a/java/memory/src/main/java/org/apache/arrow/memory/AllocationManager.java
+++ b/java/memory/src/main/java/org/apache/arrow/memory/AllocationManager.java
@@ -74,7 +74,9 @@ public class AllocationManager {
   private final long allocatorManagerId = MANAGER_ID_GENERATOR.incrementAndGet();
   private final int size;
   private final UnsafeDirectLittleEndian underlying;
-  private final IdentityHashMap<BufferAllocator, BufferLedger> map = new IdentityHashMap<>();
+  // ARROW-1627 Trying to minimize memory overhead caused by previously used IdentityHashMap
+  // see JIRA for details
+  private final LowCostIdentityHasMap<BaseAllocator, BufferLedger> map = new LowCostIdentityHasMap<>();
   private final ReadWriteLock lock = new ReentrantReadWriteLock();
   private final AutoCloseableLock readLock = new AutoCloseableLock(lock.readLock());
   private final AutoCloseableLock writeLock = new AutoCloseableLock(lock.writeLock());
@@ -144,7 +146,7 @@ public class AllocationManager {
       if (retain) {
         ledger.inc();
       }
-      BufferLedger oldLedger = map.put(allocator, ledger);
+      BufferLedger oldLedger = map.put(ledger);
       Preconditions.checkArgument(oldLedger == null);
       allocator.associateLedger(ledger);
       return ledger;
@@ -174,7 +176,7 @@ public class AllocationManager {
       } else {
         // we need to change the owning allocator. we've been removed so we'll get whatever
is
         // top of list
-        BufferLedger newLedger = map.values().iterator().next();
+        BufferLedger newLedger = map.getNextValue();
 
         // we'll forcefully transfer the ownership and not worry about whether we exceeded
the
         // limit
@@ -196,7 +198,7 @@ public class AllocationManager {
    * As with AllocationManager, the only reason this is public is due to ArrowBuf being in
io
    * .netty.buffer package.
    */
-  public class BufferLedger {
+  public class BufferLedger implements ValueWithKeyIncluded<BaseAllocator> {
 
     private final IdentityHashMap<ArrowBuf, Object> buffers =
         BaseAllocator.DEBUG ? new IdentityHashMap<ArrowBuf, Object>() : null;
@@ -226,6 +228,11 @@ public class AllocationManager {
       return allocator;
     }
 
+    @Override
+    public BaseAllocator getKey() {
+      return allocator;
+    }
+
     /**
      * Transfer any balance the current ledger has to the target ledger. In the case that
the
      * current ledger holds no
@@ -457,4 +464,4 @@ public class AllocationManager {
 
   }
 
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/arrow/blob/dc129d60/java/memory/src/main/java/org/apache/arrow/memory/LowCostIdentityHasMap.java
----------------------------------------------------------------------
diff --git a/java/memory/src/main/java/org/apache/arrow/memory/LowCostIdentityHasMap.java
b/java/memory/src/main/java/org/apache/arrow/memory/LowCostIdentityHasMap.java
new file mode 100644
index 0000000..1153fb5
--- /dev/null
+++ b/java/memory/src/main/java/org/apache/arrow/memory/LowCostIdentityHasMap.java
@@ -0,0 +1,334 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+
+/**
+ * Highly specialized IdentityHashMap that implements only partial
+ * Map APIs
+ * It incurs low initial cost (just two elements by default)
+ * It assumes Value includes the Key - Implements @ValueWithKeyIncluded iface
+ * that provides "getKey" method
+ * @param <V>
+ */
+public class LowCostIdentityHasMap<K, V extends ValueWithKeyIncluded<K>> {
+
+  /*
+   * The internal data structure to hold values.
+   */
+  private Object[] elementData;
+
+  /* Actual number of values. */
+  private int size;
+
+  /*
+   * maximum number of elements that can be put in this map before having to
+   * rehash.
+   */
+  private int threshold;
+
+  private static final int DEFAULT_MIN_SIZE = 1;
+
+  /* Default load factor of 0.75; */
+  private static final int loadFactor = 7500;
+
+  /**
+   * Creates an Map with default expected maximum size.
+   */
+  public LowCostIdentityHasMap() {
+    this(DEFAULT_MIN_SIZE);
+  }
+
+  /**
+   * Creates an Map with the specified maximum size parameter.
+   *
+   * @param maxSize
+   *            The estimated maximum number of entries that will be put in
+   *            this map.
+   */
+  public LowCostIdentityHasMap(int maxSize) {
+    if (maxSize >= 0) {
+      this.size = 0;
+      threshold = getThreshold(maxSize);
+      elementData = newElementArray(computeElementArraySize());
+    } else {
+      throw new IllegalArgumentException();
+    }
+  }
+
+  private int getThreshold(int maxSize) {
+    // assign the threshold to maxSize initially, this will change to a
+    // higher value if rehashing occurs.
+    return maxSize > 2 ? maxSize : 2;
+  }
+
+  private int computeElementArraySize() {
+    int arraySize = (int) (((long) threshold * 10000) / loadFactor);
+    // ensure arraySize is positive, the above cast from long to int type
+    // leads to overflow and negative arraySize if threshold is too big
+    return arraySize < 0 ? -arraySize : arraySize;
+  }
+
+  /**
+   * Create a new element array
+   *
+   * @param s
+   *            the number of elements
+   * @return Reference to the element array
+   */
+  private Object[] newElementArray(int s) {
+    return new Object[s];
+  }
+  
+  /**
+   * Removes all elements from this map, leaving it empty.
+   *
+   * @see #isEmpty()
+   * @see #size()
+   */
+  public void clear() {
+    size = 0;
+    for (int i = 0; i < elementData.length; i++) {
+      elementData[i] = null;
+    }
+  }
+
+  /**
+   * Returns whether this map contains the specified key.
+   *
+   * @param key
+   *            the key to search for.
+   * @return {@code true} if this map contains the specified key,
+   *         {@code false} otherwise.
+   */
+  public boolean containsKey(K key) {
+    Preconditions.checkNotNull(key);
+
+    int index = findIndex(key, elementData);
+    return (elementData[index] == null) ? false : ((V)elementData[index]).getKey() == key;
+  }
+
+  /**
+   * Returns whether this map contains the specified value.
+   *
+   * @param value
+   *            the value to search for.
+   * @return {@code true} if this map contains the specified value,
+   *         {@code false} otherwise.
+   */
+  public boolean containsValue(V value) {
+    Preconditions.checkNotNull(value);
+
+    for (int i = 0; i < elementData.length; i++) {
+      if (elementData[i] == value) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  /**
+   * Returns the value of the mapping with the specified key.
+   *
+   * @param key
+   *            the key.
+   * @return the value of the mapping with the specified key.
+   */
+  public V get(K key) {
+    Preconditions.checkNotNull(key);
+
+    int index = findIndex(key, elementData);
+
+    return (elementData[index] == null) ? null :
+      (((V)elementData[index]).getKey() == key) ? (V)elementData[index] : null;
+  }
+
+  /**
+   * Returns the index where the key is found at, or the index of the next
+   * empty spot if the key is not found in this table.
+   */
+  @VisibleForTesting
+  int findIndex(Object key, Object[] array) {
+    int length = array.length;
+    int index = getModuloHash(key, length);
+    int last = (index + length - 1) % length;
+    while (index != last) {
+      if ((array[index] == null) || ((V)array[index]).getKey() == key) {
+        /*
+         * Found the key, or the next empty spot (which means key is not
+         * in the table)
+         */
+        break;
+      }
+      index = (index+1) % length;
+    }
+    return index;
+  }
+
+  @VisibleForTesting
+  static int getModuloHash(Object key, int length) {
+    return ((System.identityHashCode(key) & 0x7FFFFFFF) % length);
+  }
+
+  /**
+   * Maps the specified key to the specified value.
+   *
+   * @param value
+   *            the value.
+   * @return the value of any previous mapping with the specified key or
+   *         {@code null} if there was no such mapping.
+   */
+  public V put(V value) {
+    Preconditions.checkNotNull(value);
+    K _key = value.getKey();
+    Preconditions.checkNotNull(_key);
+    V _value = value;
+
+    int index = findIndex(_key, elementData);
+
+    // if the key doesn't exist in the table
+    if (elementData[index] == null || ((V)elementData[index]).getKey() != _key) {
+      if (++size > threshold) {
+        rehash();
+        index = findIndex(_key, elementData);
+      }
+
+      // insert the key and assign the value to null initially
+      elementData[index] = null;
+    }
+
+    // insert value to where it needs to go, return the old value
+    Object result = elementData[index];
+    elementData[index] = _value;
+
+    return (V) result;
+  }
+
+  @VisibleForTesting
+  void rehash() {
+    int newlength = elementData.length * 15 / 10;
+    if (newlength == 0) {
+      newlength = 1;
+    }
+    Object[] newData = newElementArray(newlength);
+    for (int i = 0; i < elementData.length; i++) {
+      Object key = (elementData[i] == null) ? null : ((V)elementData[i]).getKey();
+      if (key != null) {
+        // if not empty
+        int index = findIndex(key, newData);
+        newData[index] = elementData[i];
+      }
+    }
+    elementData = newData;
+    computeMaxSize();
+  }
+
+  private void computeMaxSize() {
+    threshold = (int) ((long) (elementData.length) * loadFactor / 10000);
+  }
+
+  /**
+   * Removes the mapping with the specified key from this map.
+   *
+   * @param key
+   *            the key of the mapping to remove.
+   * @return the value of the removed mapping, or {@code null} if no mapping
+   *         for the specified key was found.
+   */
+  public V remove(K key) {
+    Preconditions.checkNotNull(key);
+
+    boolean hashedOk;
+    int index, next, hash;
+    Object result, object;
+    index = next = findIndex(key, elementData);
+
+    if (elementData[index] == null || ((V)elementData[index]).getKey() != key) {
+      return null;
+    }
+
+    // store the value for this key
+    result = elementData[index];
+    // clear value to allow movement of the rest of the elements
+    elementData[index] = null;
+    size--;
+
+    // shift the following elements up if needed
+    // until we reach an empty spot
+    int length = elementData.length;
+    while (true) {
+      next = (next + 1) % length;
+      object = elementData[next];
+      if (object == null) {
+        break;
+      }
+
+      hash = getModuloHash(((V)object).getKey(), length);
+      hashedOk = hash > index;
+      if (next < index) {
+        hashedOk = hashedOk || (hash <= next);
+      } else {
+        hashedOk = hashedOk && (hash <= next);
+      }
+      if (!hashedOk) {
+        elementData[index] = object;
+        index = next;
+        elementData[index] = null;
+      }
+    }
+
+    return (V) result;
+  }
+
+
+
+  /**
+   * Returns whether this Map has no elements.
+   *
+   * @return {@code true} if this Map has no elements,
+   *         {@code false} otherwise.
+   * @see #size()
+   */
+  public boolean isEmpty() {
+    return size == 0;
+  }
+
+  /**
+   * Returns the number of mappings in this Map.
+   *
+   * @return the number of mappings in this Map.
+   */
+  public int size() {
+    return size;
+  }
+
+  /**
+   * Special API to return next value - substitute of regular Map.values.iterator().next()
+   * @return next available value or null if none available
+   */
+  public V getNextValue() {
+    for (int i = 0; i < elementData.length; i++) {
+      if (elementData[i] != null) {
+        return (V)elementData[i];
+      }
+    }
+    return null;
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/arrow/blob/dc129d60/java/memory/src/main/java/org/apache/arrow/memory/ValueWithKeyIncluded.java
----------------------------------------------------------------------
diff --git a/java/memory/src/main/java/org/apache/arrow/memory/ValueWithKeyIncluded.java b/java/memory/src/main/java/org/apache/arrow/memory/ValueWithKeyIncluded.java
new file mode 100644
index 0000000..7bd9cec
--- /dev/null
+++ b/java/memory/src/main/java/org/apache/arrow/memory/ValueWithKeyIncluded.java
@@ -0,0 +1,26 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.memory;
+
+/**
+ * Helper Iface to generify a value to be included in the map where
+ * key is part of the value
+ */
+public interface ValueWithKeyIncluded<K> {
+    K getKey();
+}

http://git-wip-us.apache.org/repos/asf/arrow/blob/dc129d60/java/memory/src/test/java/org/apache/arrow/memory/TestLowCostIdentityHasMap.java
----------------------------------------------------------------------
diff --git a/java/memory/src/test/java/org/apache/arrow/memory/TestLowCostIdentityHasMap.java
b/java/memory/src/test/java/org/apache/arrow/memory/TestLowCostIdentityHasMap.java
new file mode 100644
index 0000000..c119614
--- /dev/null
+++ b/java/memory/src/test/java/org/apache/arrow/memory/TestLowCostIdentityHasMap.java
@@ -0,0 +1,169 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.memory;
+
+import static junit.framework.TestCase.assertNotNull;
+import static junit.framework.TestCase.assertTrue;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+import org.junit.Test;
+
+/**
+ * To test simplified implementation of IdentityHashMap
+ */
+public class TestLowCostIdentityHasMap {
+
+  @Test
+  public void testIdentityHashMap() throws Exception {
+    LowCostIdentityHasMap<String, StringWithKey> hashMap = new LowCostIdentityHasMap<>();
+
+    StringWithKey obj1 = new StringWithKey("s1key", "s1value");
+    StringWithKey obj2 = new StringWithKey("s2key", "s2value");
+    StringWithKey obj3 = new StringWithKey("s3key", "s3value");
+    StringWithKey obj4 = new StringWithKey("s1key", "s4value");
+    StringWithKey obj5 = new StringWithKey("s5key", "s5value");
+
+    assertNull(hashMap.put(obj1));
+    assertNull(hashMap.put(obj2));
+    assertNull(hashMap.put(obj3));
+    assertEquals(obj1, hashMap.put(obj4));
+    assertNull(hashMap.put(obj5));
+
+    assertEquals(4, hashMap.size());
+
+    assertEquals(obj4,hashMap.get("s1key"));
+
+    assertNull(hashMap.remove("abc"));
+
+    assertEquals(obj3,hashMap.remove("s3key"));
+
+    assertEquals(3, hashMap.size());
+
+    assertTrue(!hashMap.isEmpty());
+
+    StringWithKey nextValue = hashMap.getNextValue();
+
+    assertNotNull(nextValue);
+
+    assertTrue((hashMap.get("s1key") == nextValue || hashMap.get("s2key") == nextValue ||
+      hashMap.get("s5key") == nextValue));
+
+    assertTrue(hashMap.containsValue(obj4));
+    assertTrue(hashMap.containsValue(obj2));
+    assertTrue(hashMap.containsValue(obj5));
+
+    assertEquals(obj4,hashMap.remove("s1key"));
+
+    nextValue = hashMap.getNextValue();
+
+    assertNotNull(nextValue);
+
+    assertTrue(hashMap.get("s2key") == nextValue || hashMap.get("s5key") == nextValue);
+
+    assertEquals(2, hashMap.size());
+
+    assertEquals(obj2,hashMap.remove("s2key"));
+    assertEquals(obj5,hashMap.remove("s5key"));
+
+    assertEquals(0, hashMap.size());
+
+    assertTrue(hashMap.isEmpty());
+  }
+
+  @Test
+  public void testLargeMap() throws Exception {
+    LowCostIdentityHasMap<String, StringWithKey> hashMap = new LowCostIdentityHasMap<>();
+
+    String [] keys = new String[200];
+    for (int i = 0; i < 200; i++) {
+      keys[i] = "s"+i+"key";
+    }
+
+    for (int i = 0; i < 100; i++) {
+      if (i % 5 == 0 && i != 0) {
+        StringWithKey obj = new StringWithKey(keys[i-5], "s" + i + "value");
+        StringWithKey retObj = hashMap.put(obj);
+        assertNotNull(retObj);
+        StringWithKey obj1 = new StringWithKey(keys[i], "s" + 2*i + "value");
+        StringWithKey retObj1 = hashMap.put(obj1);
+        assertNull(retObj1);
+      } else {
+        StringWithKey obj = new StringWithKey(keys[i], "s" + i + "value");
+        StringWithKey retObj = hashMap.put(obj);
+        assertNull(retObj);
+      }
+    }
+    assertEquals(100, hashMap.size());
+    for (int i = 0; i < 100; i++) {
+      StringWithKey returnObj = hashMap.get(keys[i]);
+      assertNotNull(returnObj);
+      if (i == 95) {
+        assertEquals("s190value", returnObj.getValue());
+        continue;
+      }
+      if (i % 5 == 0) {
+        assertEquals("s" + (i+5) + "value", returnObj.getValue());
+      } else {
+        assertEquals("s" + i + "value", returnObj.getValue());
+      }
+    }
+
+    for (int i = 0; i < 100; i++) {
+      if (i % 4 == 0) {
+        StringWithKey returnObj = hashMap.remove(keys[i]);
+        assertNotNull(returnObj);
+        assertTrue(!hashMap.containsKey(keys[i]));
+      }
+      StringWithKey obj = new StringWithKey(keys[100+i], "s" + (100+i) + "value");
+      StringWithKey retObj = hashMap.put(obj);
+      assertNull(retObj);
+      assertTrue(hashMap.containsKey(keys[100+i]));
+    }
+    assertEquals(175, hashMap.size());
+    for (int i = 0; i < 100; i++) {
+      StringWithKey retObj = hashMap.getNextValue();
+      assertNotNull(retObj);
+      hashMap.remove(retObj.getKey());
+    }
+    assertTrue(!hashMap.isEmpty());
+    assertEquals(75, hashMap.size());
+    hashMap.clear();
+    assertTrue(hashMap.isEmpty());
+  }
+
+  private class StringWithKey implements ValueWithKeyIncluded<String> {
+
+    private String myValue;
+    private String myKey;
+
+    StringWithKey(String myKey, String myValue) {
+      this.myKey = myKey;
+      this.myValue = myValue;
+    }
+
+    @Override
+    public String getKey() {
+      return myKey;
+    }
+
+    String getValue() {
+      return myValue;
+    }
+  }
+}


Mime
View raw message