Return-Path: X-Original-To: apmail-mahout-commits-archive@www.apache.org Delivered-To: apmail-mahout-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id EFC3C97CD for ; Fri, 6 Apr 2012 14:44:53 +0000 (UTC) Received: (qmail 11755 invoked by uid 500); 6 Apr 2012 14:44:53 -0000 Delivered-To: apmail-mahout-commits-archive@mahout.apache.org Received: (qmail 11659 invoked by uid 500); 6 Apr 2012 14:44:53 -0000 Mailing-List: contact commits-help@mahout.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@mahout.apache.org Delivered-To: mailing list commits@mahout.apache.org Received: (qmail 11651 invoked by uid 99); 6 Apr 2012 14:44:53 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 06 Apr 2012 14:44:53 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 06 Apr 2012 14:44:49 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 42F892388A36; Fri, 6 Apr 2012 14:44:28 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1310359 - in /mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common: FastByIDMap.java FastIDSet.java FastMap.java Date: Fri, 06 Apr 2012 14:44:28 -0000 To: commits@mahout.apache.org From: srowen@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20120406144428.42F892388A36@eris.apache.org> Author: srowen Date: Fri Apr 6 14:44:27 2012 New Revision: 1310359 URL: http://svn.apache.org/viewvc?rev=1310359&view=rev Log: Allow choice of load factor in custom maps Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastIDSet.java mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java?rev=1310359&r1=1310358&r2=1310359&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java Fri Apr 6 14:44:27 2012 @@ -37,7 +37,7 @@ import com.google.common.base.Preconditi public final class FastByIDMap implements Serializable, Cloneable { public static final int NO_MAX_SIZE = Integer.MAX_VALUE; - private static final double ALLOWED_LOAD_FACTOR = 1.5; + private static final float DEFAULT_LOAD_FACTOR = 1.5f; /** Dummy object used to represent a key that has been removed. */ private static final long REMOVED = Long.MAX_VALUE; @@ -45,6 +45,7 @@ public final class FastByIDMap implem private long[] keys; private V[] values; + private float loadFactor; private int numEntries; private int numSlotsUsed; private final int maxSize; @@ -59,25 +60,33 @@ public final class FastByIDMap implem public FastByIDMap(int size) { this(size, NO_MAX_SIZE); } - + + public FastByIDMap(int size, float loadFactor) { + this(size, NO_MAX_SIZE, loadFactor); + } + + public FastByIDMap(int size, int maxSize) { + this(size, maxSize, DEFAULT_LOAD_FACTOR); + } + /** - * Creates a new whose capacity can accommodate the given number of entries without - * rehash.

+ * Creates a new whose capacity can accommodate the given number of entries without rehash. * - * @param size - * desired capacity - * @param maxSize - * max capacity - * @throws IllegalArgumentException - * if size is less than 0, maxSize is less than 1, or at least half of - * {@link RandomUtils#MAX_INT_SMALLER_TWIN_PRIME} + * @param size desired capacity + * @param maxSize max capacity + * @param loadFactor ratio of internal hash table size to current size + * @throws IllegalArgumentException if size is less than 0, maxSize is less than 1 + * or at least half of {@link RandomUtils#MAX_INT_SMALLER_TWIN_PRIME}, or + * loadFactor is less than 1 */ - public FastByIDMap(int size, int maxSize) { + public FastByIDMap(int size, int maxSize, float loadFactor) { Preconditions.checkArgument(size >= 0, "size must be at least 0"); - int max = (int) (RandomUtils.MAX_INT_SMALLER_TWIN_PRIME / ALLOWED_LOAD_FACTOR); + Preconditions.checkArgument(loadFactor >= 1.0f, "loadFactor must be at least 1.0"); + this.loadFactor = loadFactor; + int max = (int) (RandomUtils.MAX_INT_SMALLER_TWIN_PRIME / loadFactor); Preconditions.checkArgument(size < max, "size must be less than " + max); Preconditions.checkArgument(maxSize >= 1, "maxSize must be at least 1"); - int hashSize = RandomUtils.nextTwinPrime((int) (ALLOWED_LOAD_FACTOR * size)); + int hashSize = RandomUtils.nextTwinPrime((int) (loadFactor * size)); keys = new long[hashSize]; Arrays.fill(keys, NULL); values = (V[]) new Object[hashSize]; @@ -170,9 +179,9 @@ public final class FastByIDMap implem throw new NullPointerException(); } // If less than half the slots are open, let's clear it up - if (numSlotsUsed * ALLOWED_LOAD_FACTOR >= keys.length) { + if (numSlotsUsed * loadFactor >= keys.length) { // If over half the slots used are actual entries, let's grow - if (numEntries * ALLOWED_LOAD_FACTOR >= numSlotsUsed) { + if (numEntries * loadFactor >= numSlotsUsed) { growAndRehash(); } else { // Otherwise just rehash to clear REMOVED entries and don't grow @@ -262,14 +271,14 @@ public final class FastByIDMap implem } public void rehash() { - rehash(RandomUtils.nextTwinPrime((int) (ALLOWED_LOAD_FACTOR * numEntries))); + rehash(RandomUtils.nextTwinPrime((int) (loadFactor * numEntries))); } private void growAndRehash() { - if (keys.length * ALLOWED_LOAD_FACTOR >= RandomUtils.MAX_INT_SMALLER_TWIN_PRIME) { + if (keys.length * loadFactor >= RandomUtils.MAX_INT_SMALLER_TWIN_PRIME) { throw new IllegalStateException("Can't grow any more"); } - rehash(RandomUtils.nextTwinPrime((int) (ALLOWED_LOAD_FACTOR * keys.length))); + rehash(RandomUtils.nextTwinPrime((int) (loadFactor * keys.length))); } private void rehash(int newHashSize) { Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastIDSet.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastIDSet.java?rev=1310359&r1=1310358&r2=1310359&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastIDSet.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastIDSet.java Fri Apr 6 14:44:27 2012 @@ -31,13 +31,14 @@ import com.google.common.base.Preconditi */ public final class FastIDSet implements Serializable, Cloneable, Iterable { - private static final double ALLOWED_LOAD_FACTOR = 1.5; + private static final float DEFAULT_LOAD_FACTOR = 1.5f; /** Dummy object used to represent a key that has been removed. */ private static final long REMOVED = Long.MAX_VALUE; private static final long NULL = Long.MIN_VALUE; private long[] keys; + private float loadFactor; private int numEntries; private int numSlotsUsed; @@ -52,13 +53,18 @@ public final class FastIDSet implements } public FastIDSet(int size) { + this(size, DEFAULT_LOAD_FACTOR); + } + + public FastIDSet(int size, float loadFactor) { Preconditions.checkArgument(size >= 0, "size must be at least 0"); - int max = (int) (RandomUtils.MAX_INT_SMALLER_TWIN_PRIME / ALLOWED_LOAD_FACTOR); + Preconditions.checkArgument(loadFactor >= 1.0f, "loadFactor must be at least 1.0"); + this.loadFactor = loadFactor; + int max = (int) (RandomUtils.MAX_INT_SMALLER_TWIN_PRIME / loadFactor); Preconditions.checkArgument(size < max, "size must be less than %d", max); - int hashSize = RandomUtils.nextTwinPrime((int) (ALLOWED_LOAD_FACTOR * size)); + int hashSize = RandomUtils.nextTwinPrime((int) (loadFactor * size)); keys = new long[hashSize]; - Arrays.fill(keys, NULL); - } + Arrays.fill(keys, NULL); } /** * @see #findForAdd(long) @@ -118,9 +124,9 @@ public final class FastIDSet implements Preconditions.checkArgument(key != NULL && key != REMOVED); // If less than half the slots are open, let's clear it up - if (numSlotsUsed * ALLOWED_LOAD_FACTOR >= keys.length) { + if (numSlotsUsed * loadFactor >= keys.length) { // If over half the slots used are actual entries, let's grow - if (numEntries * ALLOWED_LOAD_FACTOR >= numSlotsUsed) { + if (numEntries * loadFactor >= numSlotsUsed) { growAndRehash(); } else { // Otherwise just rehash to clear REMOVED entries and don't grow @@ -231,14 +237,14 @@ public final class FastIDSet implements } private void growAndRehash() { - if (keys.length * ALLOWED_LOAD_FACTOR >= RandomUtils.MAX_INT_SMALLER_TWIN_PRIME) { + if (keys.length * loadFactor >= RandomUtils.MAX_INT_SMALLER_TWIN_PRIME) { throw new IllegalStateException("Can't grow any more"); } - rehash(RandomUtils.nextTwinPrime((int) (ALLOWED_LOAD_FACTOR * keys.length))); + rehash(RandomUtils.nextTwinPrime((int) (loadFactor * keys.length))); } public void rehash() { - rehash(RandomUtils.nextTwinPrime((int) (ALLOWED_LOAD_FACTOR * numEntries))); + rehash(RandomUtils.nextTwinPrime((int) (loadFactor * numEntries))); } private void rehash(int newHashSize) { Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java?rev=1310359&r1=1310358&r2=1310359&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java Fri Apr 6 14:44:27 2012 @@ -54,13 +54,14 @@ import com.google.common.base.Preconditi public final class FastMap implements Map, Serializable, Cloneable { public static final int NO_MAX_SIZE = Integer.MAX_VALUE; - private static final double ALLOWED_LOAD_FACTOR = 1.5; + private static final float DEFAULT_LOAD_FACTOR = 1.5f; /** Dummy object used to represent a key that has been removed. */ private static final Object REMOVED = new Object(); private K[] keys; private V[] values; + private float loadFactor; private int numEntries; private int numSlotsUsed; private final int maxSize; @@ -80,25 +81,32 @@ public final class FastMap implemen this(other.size()); putAll(other); } + + public FastMap(int size, float loadFactor) { + this(size, NO_MAX_SIZE, loadFactor); + } + + public FastMap(int size, int maxSize) { + this(size, maxSize, DEFAULT_LOAD_FACTOR); + } /** - * Creates a new whose capacity can accommodate the given number of entries without - * rehash.

+ * Creates a new whose capacity can accommodate the given number of entries without rehash. * - * @param size - * desired capacity - * @param maxSize - * max capacity - * @throws IllegalArgumentException - * if size is less than 0, maxSize is less than 1, or at least half of - * {@link RandomUtils#MAX_INT_SMALLER_TWIN_PRIME} + * @param size desired capacity + * @param maxSize max capacity + * @throws IllegalArgumentException if size is less than 0, maxSize is less than 1 + * or at least half of {@link RandomUtils#MAX_INT_SMALLER_TWIN_PRIME}, or + * loadFactor is less than 1 */ - public FastMap(int size, int maxSize) { + public FastMap(int size, int maxSize, float loadFactor) { Preconditions.checkArgument(size >= 0, "size must be at least 0"); - int max = (int) (RandomUtils.MAX_INT_SMALLER_TWIN_PRIME / ALLOWED_LOAD_FACTOR); + Preconditions.checkArgument(loadFactor >= 1.0f, "loadFactor must be at least 1.0"); + this.loadFactor = loadFactor; + int max = (int) (RandomUtils.MAX_INT_SMALLER_TWIN_PRIME / loadFactor); Preconditions.checkArgument(size < max, "size must be less than " + max); Preconditions.checkArgument(maxSize >= 1, "maxSize must be at least 1"); - int hashSize = RandomUtils.nextTwinPrime((int) (ALLOWED_LOAD_FACTOR * size)); + int hashSize = RandomUtils.nextTwinPrime((int) (loadFactor * size)); keys = (K[]) new Object[hashSize]; values = (V[]) new Object[hashSize]; this.maxSize = maxSize; @@ -174,9 +182,9 @@ public final class FastMap implemen throw new NullPointerException(); } // If less than half the slots are open, let's clear it up - if (numSlotsUsed * ALLOWED_LOAD_FACTOR >= keys.length) { + if (numSlotsUsed * loadFactor >= keys.length) { // If over half the slots used are actual entries, let's grow - if (numEntries * ALLOWED_LOAD_FACTOR >= numSlotsUsed) { + if (numEntries * loadFactor >= numSlotsUsed) { growAndRehash(); } else { // Otherwise just rehash to clear REMOVED entries and don't grow @@ -279,14 +287,14 @@ public final class FastMap implemen } public void rehash() { - rehash(RandomUtils.nextTwinPrime((int) (ALLOWED_LOAD_FACTOR * numEntries))); + rehash(RandomUtils.nextTwinPrime((int) (loadFactor * numEntries))); } private void growAndRehash() { - if (keys.length * ALLOWED_LOAD_FACTOR >= RandomUtils.MAX_INT_SMALLER_TWIN_PRIME) { + if (keys.length * loadFactor >= RandomUtils.MAX_INT_SMALLER_TWIN_PRIME) { throw new IllegalStateException("Can't grow any more"); } - rehash(RandomUtils.nextTwinPrime((int) (ALLOWED_LOAD_FACTOR * keys.length))); + rehash(RandomUtils.nextTwinPrime((int) (loadFactor * keys.length))); } private void rehash(int newHashSize) {