mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gsing...@apache.org
Subject svn commit: r883365 [21/47] - in /lucene/mahout/trunk: ./ examples/ matrix/ matrix/src/ matrix/src/main/ matrix/src/main/java/ matrix/src/main/java/org/ matrix/src/main/java/org/apache/ matrix/src/main/java/org/apache/mahout/ matrix/src/main/java/org/a...
Date Mon, 23 Nov 2009 15:14:38 GMT
Added: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/IntArrayList.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/IntArrayList.java?rev=883365&view=auto
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/IntArrayList.java (added)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/IntArrayList.java Mon Nov 23 15:14:26 2009
@@ -0,0 +1,578 @@
+/*
+Copyright � 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear in all copies and 
+that both that copyright notice and this permission notice appear in supporting documentation. 
+CERN makes no representations about the suitability of this software for any purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.colt.list;
+
+import org.apache.mahout.colt.function.IntProcedure;
+/**
+Resizable list holding <code>int</code> elements; implemented with arrays.
+First see the <a href="package-summary.html">package summary</a> and javadoc <a href="package-tree.html">tree view</a> to get the broad picture.
+*/
+/** 
+ * @deprecated until unit tests are in place.  Until this time, this class/interface is unsupported.
+ */
+@Deprecated
+public class IntArrayList extends AbstractIntList {
+	/**
+	 * The array buffer into which the elements of the list are stored.
+	 * The capacity of the list is the length of this array buffer.
+	 * @serial
+	 */
+	protected int[] elements;
+/**
+ * Constructs an empty list.
+ */
+public IntArrayList() {
+	this(10);
+}
+/**
+ * Constructs a list containing the specified elements. 
+ * The initial size and capacity of the list is the length of the array.
+ *
+ * <b>WARNING:</b> For efficiency reasons and to keep memory usage low, <b>the array is not copied</b>.
+ * So if subsequently you modify the specified array directly via the [] operator, be sure you know what you're doing.
+ * 
+ * @param elements the array to be backed by the the constructed list
+ */
+public IntArrayList(int[] elements) {
+	elements(elements);
+}
+/**
+ * Constructs an empty list with the specified initial capacity.
+ *
+ * @param   initialCapacity   the number of elements the receiver can hold without auto-expanding itself by allocating new internal memory.
+ */
+public IntArrayList(int initialCapacity) {
+	this(new int[initialCapacity]);
+	setSizeRaw(0);
+}
+/**
+ * Appends the specified element to the end of this list.
+ *
+ * @param element element to be appended to this list.
+ */
+public void add(int element) {
+	// overridden for performance only.
+	if (size == elements.length) {
+		ensureCapacity(size + 1); 
+	}
+	elements[size++] = element;
+}
+/**
+ * Inserts the specified element before the specified position into the receiver. 
+ * Shifts the element currently at that position (if any) and
+ * any subsequent elements to the right.
+ *
+ * @param index index before which the specified element is to be inserted (must be in [0,size]).
+ * @param element element to be inserted.
+ * @exception IndexOutOfBoundsException index is out of range (<tt>index &lt; 0 || index &gt; size()</tt>).
+ */
+public void beforeInsert(int index, int element) {
+	// overridden for performance only.
+	if (size == index) {
+		add(element);
+		return;
+	}
+	if (index > size || index < 0) 
+		throw new IndexOutOfBoundsException("Index: "+index+", Size: "+size);
+	ensureCapacity(size + 1);
+	System.arraycopy(elements, index, elements, index+1, size-index);
+	elements[index] = element;
+	size++;
+}
+/**
+ * Searches the receiver for the specified value using
+ * the binary search algorithm.  The receiver must <strong>must</strong> be
+ * sorted (as by the sort method) prior to making this call.  If
+ * it is not sorted, the results are undefined: in particular, the call
+ * may enter an infinite loop.  If the receiver contains multiple elements
+ * equal to the specified object, there is no guarantee which instance
+ * will be found.
+ *
+ * @param key the value to be searched for.
+ * @param from the leftmost search position, inclusive.
+ * @param to the rightmost search position, inclusive.
+ * @return index of the search key, if it is contained in the receiver;
+ *	       otherwise, <tt>(-(<i>insertion point</i>) - 1)</tt>.  The <i>insertion
+ *	       point</i> is defined as the the point at which the value would
+ * 	       be inserted into the receiver: the index of the first
+ *	       element greater than the key, or <tt>receiver.size()</tt>, if all
+ *	       elements in the receiver are less than the specified key.  Note
+ *	       that this guarantees that the return value will be &gt;= 0 if
+ *	       and only if the key is found.
+ * @see org.apache.mahout.colt.Sorting
+ * @see java.util.Arrays
+ */
+public int binarySearchFromTo(int key, int from, int to) {
+	return org.apache.mahout.colt.Sorting.binarySearchFromTo(this.elements,key,from,to);
+}
+/**
+ * Returns a deep copy of the receiver. 
+ *
+ * @return  a deep copy of the receiver.
+ */
+public Object clone() {
+	// overridden for performance only.
+	IntArrayList clone = new IntArrayList((int[]) elements.clone());
+	clone.setSizeRaw(size);
+	return clone;
+}
+/**
+ * Returns a deep copy of the receiver; uses <code>clone()</code> and casts the result.
+ *
+ * @return  a deep copy of the receiver.
+ */
+public IntArrayList copy() {
+	return (IntArrayList) clone();
+}
+ /**
+ * Sorts the specified range of the receiver into ascending numerical order. 
+ *
+ * The sorting algorithm is a count sort. This algorithm offers guaranteed
+ * <dt>Performance: O(Max(n,max-min+1)).
+ * <dt>Space requirements: int[max-min+1] buffer.
+ * <p>This algorithm is only applicable if max-min+1 is not large!
+ * But if applicable, it usually outperforms quicksort by a factor of 3-4.
+ *
+ * @param from the index of the first element (inclusive) to be sorted.
+ * @param to the index of the last element (inclusive) to be sorted.
+ * @param min the smallest element contained in the range.
+ * @param max the largest element contained in the range.
+ */
+protected void countSortFromTo(int from, int to, int min, int max) {
+	if (size==0) return;
+	checkRangeFromTo(from, to, size);
+
+	final int width = (int) (max-min+1);
+	
+	int[] counts = new int[width];
+	int[] theElements = elements;	
+	for (int i=from; i<=to; ) counts[(int)(theElements[i++]-min)]++;
+
+	int fromIndex = from;
+	int val = min;
+	for (int i=0; i<width; i++, val++) {
+		int c = counts[i];
+		if (c>0) {
+			if (c==1) theElements[fromIndex++]=val;
+			else {
+				int toIndex = fromIndex + c - 1;
+				fillFromToWith(fromIndex,toIndex,val);
+				fromIndex = toIndex + 1;
+			}
+		}
+	}
+}
+/**
+ * Returns the elements currently stored, including invalid elements between size and capacity, if any.
+ *
+ * <b>WARNING:</b> For efficiency reasons and to keep memory usage low, <b>the array is not copied</b>.
+ * So if subsequently you modify the returned array directly via the [] operator, be sure you know what you're doing.
+ *
+ * @return the elements currently stored.
+ */
+public int[] elements() {
+	return elements;
+}
+/**
+ * Sets the receiver's elements to be the specified array (not a copy of it).
+ *
+ * The size and capacity of the list is the length of the array.
+ * <b>WARNING:</b> For efficiency reasons and to keep memory usage low, <b>the array is not copied</b>.
+ * So if subsequently you modify the specified array directly via the [] operator, be sure you know what you're doing.
+ *
+ * @param elements the new elements to be stored.
+ * @return the receiver itself.
+ */
+public AbstractIntList elements(int[] elements) {
+	this.elements=elements;
+	this.size=elements.length;
+	return this;
+}
+/**
+ * Ensures that the receiver can hold at least the specified number of elements without needing to allocate new internal memory.
+ * If necessary, allocates new internal memory and increases the capacity of the receiver.
+ *
+ * @param   minCapacity   the desired minimum capacity.
+ */
+public void ensureCapacity(int minCapacity) {
+	elements = org.apache.mahout.colt.Arrays.ensureCapacity(elements,minCapacity);
+}
+/**
+ * Compares the specified Object with the receiver.  
+ * Returns true if and only if the specified Object is also an ArrayList of the same type, both Lists have the
+ * same size, and all corresponding pairs of elements in the two Lists are identical.
+ * In other words, two Lists are defined to be equal if they contain the
+ * same elements in the same order.
+ *
+ * @param otherObj the Object to be compared for equality with the receiver.
+ * @return true if the specified Object is equal to the receiver.
+ */
+public boolean equals(Object otherObj) { //delta
+	// overridden for performance only.
+	if (! (otherObj instanceof IntArrayList)) return super.equals(otherObj);
+	if (this==otherObj) return true;
+	if (otherObj==null) return false;
+	IntArrayList other = (IntArrayList) otherObj;
+	if (size()!=other.size()) return false;
+
+	int[] theElements = elements();
+	int[] otherElements = other.elements();
+	for (int i=size(); --i >= 0; ) {
+	    if (theElements[i] != otherElements[i]) return false;
+	}
+	return true;
+}
+/**
+ * Applies a procedure to each element of the receiver, if any.
+ * Starts at index 0, moving rightwards.
+ * @param procedure    the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise continues. 
+ * @return <tt>false</tt> if the procedure stopped before all elements where iterated over, <tt>true</tt> otherwise. 
+ */
+public boolean forEach(IntProcedure procedure) {
+	// overridden for performance only.
+	int[] theElements = elements;
+	int theSize = size;
+	
+	for (int i=0; i<theSize;) if (! procedure.apply(theElements[i++])) return false;
+	return true;
+}
+/**
+ * Returns the element at the specified position in the receiver.
+ *
+ * @param index index of element to return.
+ * @exception IndexOutOfBoundsException index is out of range (index
+ * 		  &lt; 0 || index &gt;= size()).
+ */
+public int get(int index) {
+	// overridden for performance only.
+	if (index >= size || index < 0)
+		throw new IndexOutOfBoundsException("Index: "+index+", Size: "+size);
+	return elements[index];
+}
+/**
+ * Returns the element at the specified position in the receiver; <b>WARNING:</b> Does not check preconditions. 
+ * Provided with invalid parameters this method may return invalid elements without throwing any exception!
+ * <b>You should only use this method when you are absolutely sure that the index is within bounds.</b>
+ * Precondition (unchecked): <tt>index &gt;= 0 && index &lt; size()</tt>.
+ *
+ * @param index index of element to return.
+ */
+public int getQuick(int index) {
+	return elements[index];
+}
+/**
+ * Returns the index of the first occurrence of the specified
+ * element. Returns <code>-1</code> if the receiver does not contain this element.
+ * Searches between <code>from</code>, inclusive and <code>to</code>, inclusive.
+ * Tests for identity.
+ *
+ * @param element element to search for.
+ * @param from the leftmost search position, inclusive.
+ * @param to the rightmost search position, inclusive.
+ * @return  the index of the first occurrence of the element in the receiver; returns <code>-1</code> if the element is not found.
+ * @exception IndexOutOfBoundsException index is out of range (<tt>size()&gt;0 && (from&lt;0 || from&gt;to || to&gt;=size())</tt>).
+ */
+public int indexOfFromTo(int element, int from, int to) {
+	// overridden for performance only.
+	if (size==0) return -1;
+	checkRangeFromTo(from, to, size);
+
+	int[] theElements = elements;
+	for (int i = from ; i <= to; i++) {
+	    if (element==theElements[i]) {return i;} //found
+	}
+	return -1; //not found
+}
+/**
+ * Returns the index of the last occurrence of the specified
+ * element. Returns <code>-1</code> if the receiver does not contain this element.
+ * Searches beginning at <code>to</code>, inclusive until <code>from</code>, inclusive.
+ * Tests for identity.
+ *
+ * @param element element to search for.
+ * @param from the leftmost search position, inclusive.
+ * @param to the rightmost search position, inclusive.
+ * @return  the index of the last occurrence of the element in the receiver; returns <code>-1</code> if the element is not found.
+ * @exception IndexOutOfBoundsException index is out of range (<tt>size()&gt;0 && (from&lt;0 || from&gt;to || to&gt;=size())</tt>).
+ */
+public int lastIndexOfFromTo(int element, int from, int to) {
+	// overridden for performance only.
+	if (size==0) return -1;
+	checkRangeFromTo(from, to, size);
+
+	int[] theElements = elements;
+	for (int i = to ; i >= from; i--) {
+	    if (element==theElements[i]) {return i;} //found
+	}
+	return -1; //not found
+}
+/**
+ * Returns a new list of the part of the receiver between <code>from</code>, inclusive, and <code>to</code>, inclusive.
+ * @param from the index of the first element (inclusive).
+ * @param to the index of the last element (inclusive).
+ * @return a new list
+ * @exception IndexOutOfBoundsException index is out of range (<tt>size()&gt;0 && (from&lt;0 || from&gt;to || to&gt;=size())</tt>).
+ */
+public AbstractIntList partFromTo(int from, int to) {
+	if (size==0) return new IntArrayList(0);
+
+	checkRangeFromTo(from, to, size);
+
+	int[] part = new int[to-from+1];
+	System.arraycopy(elements, from, part, 0, to-from+1);
+	return new IntArrayList(part);
+}
+/**
+* Removes from the receiver all elements that are contained in the specified list.
+* Tests for identity.
+*
+* @param other the other list.
+* @return <code>true</code> if the receiver changed as a result of the call.
+*/
+public boolean removeAll(AbstractIntList other) {
+	// overridden for performance only.
+	if (! (other instanceof IntArrayList))	return super.removeAll(other);
+	
+	/* There are two possibilities to do the thing
+	   a) use other.indexOf(...)
+	   b) sort other, then use other.binarySearch(...)
+	   
+	   Let's try to figure out which one is faster. Let M=size, N=other.size, then
+	   a) takes O(M*N) steps
+	   b) takes O(N*logN + M*logN) steps (sorting is O(N*logN) and binarySearch is O(logN))
+ 
+	   Hence, if N*logN + M*logN < M*N, we use b) otherwise we use a).
+	*/
+	if (other.size()==0) {return false;} //nothing to do
+	int limit = other.size()-1;
+	int j=0;
+	int[] theElements = elements;
+	int mySize = size();
+
+	double N=(double) other.size();
+	double M=(double) mySize;
+	if ( (N+M)* org.apache.mahout.jet.math.Arithmetic.log2(N) < M*N ) {
+		// it is faster to sort other before searching in it
+		IntArrayList sortedList = (IntArrayList) other.clone();
+		sortedList.quickSort();
+
+		for (int i=0; i<mySize ; i++) {
+			if (sortedList.binarySearchFromTo(theElements[i], 0, limit) < 0) theElements[j++]=theElements[i];
+		}
+	}
+	else {
+		// it is faster to search in other without sorting
+		for (int i=0; i<mySize ; i++) {
+			if (other.indexOfFromTo(theElements[i], 0, limit) < 0) theElements[j++]=theElements[i];
+		}
+	}
+
+	boolean modified = (j!=mySize);
+	setSize(j);
+	return modified;
+}
+/**
+ * Replaces a number of elements in the receiver with the same number of elements of another list.
+ * Replaces elements in the receiver, between <code>from</code> (inclusive) and <code>to</code> (inclusive),
+ * with elements of <code>other</code>, starting from <code>otherFrom</code> (inclusive).
+ *
+ * @param from the position of the first element to be replaced in the receiver
+ * @param to the position of the last element to be replaced in the receiver
+ * @param other list holding elements to be copied into the receiver.
+ * @param otherFrom position of first element within other list to be copied.
+ */
+public void replaceFromToWithFrom(int from, int to, AbstractIntList other, int otherFrom) {
+	// overridden for performance only.
+	if (! (other instanceof IntArrayList)) {
+		// slower
+		super.replaceFromToWithFrom(from,to,other,otherFrom);
+		return;
+	}
+	int length=to-from+1;
+	if (length>0) {
+		checkRangeFromTo(from, to, size());
+		checkRangeFromTo(otherFrom,otherFrom+length-1,other.size());
+		System.arraycopy(((IntArrayList) other).elements, otherFrom, elements, from, length);
+	}
+}
+/**
+* Retains (keeps) only the elements in the receiver that are contained in the specified other list.
+* In other words, removes from the receiver all of its elements that are not contained in the
+* specified other list. 
+* @param other the other list to test against.
+* @return <code>true</code> if the receiver changed as a result of the call.
+*/
+public boolean retainAll(AbstractIntList other) {
+	// overridden for performance only.
+	if (! (other instanceof IntArrayList))	return super.retainAll(other);
+	
+	/* There are two possibilities to do the thing
+	   a) use other.indexOf(...)
+	   b) sort other, then use other.binarySearch(...)
+	   
+	   Let's try to figure out which one is faster. Let M=size, N=other.size, then
+	   a) takes O(M*N) steps
+	   b) takes O(N*logN + M*logN) steps (sorting is O(N*logN) and binarySearch is O(logN))
+
+	   Hence, if N*logN + M*logN < M*N, we use b) otherwise we use a).
+	*/
+	int limit = other.size()-1;
+	int j=0;
+	int[] theElements = elements;
+	int mySize = size();
+
+	double N=(double) other.size();
+	double M=(double) mySize;
+	if ( (N+M)* org.apache.mahout.jet.math.Arithmetic.log2(N) < M*N ) {
+		// it is faster to sort other before searching in it
+		IntArrayList sortedList = (IntArrayList) other.clone();
+		sortedList.quickSort();
+
+		for (int i=0; i<mySize ; i++) {
+			if (sortedList.binarySearchFromTo(theElements[i], 0, limit) >= 0) theElements[j++]=theElements[i];
+		}
+	}
+	else {
+		// it is faster to search in other without sorting
+		for (int i=0; i<mySize ; i++) {
+			if (other.indexOfFromTo(theElements[i], 0, limit) >= 0) theElements[j++]=theElements[i];
+		}
+	}
+
+	boolean modified = (j!=mySize);
+	setSize(j);
+	return modified;
+}
+/**
+ * Reverses the elements of the receiver.
+ * Last becomes first, second last becomes second first, and so on.
+ */
+public void reverse() {
+	// overridden for performance only.
+	int tmp;
+	int limit=size/2;
+	int j=size-1;
+
+	int[] theElements = elements;
+	for (int i=0; i<limit;) { //swap
+		tmp=theElements[i];
+		theElements[i++]=theElements[j];
+		theElements[j--]=tmp;
+	}
+}
+/**
+ * Replaces the element at the specified position in the receiver with the specified element.
+ *
+ * @param index index of element to replace.
+ * @param element element to be stored at the specified position.
+ * @exception IndexOutOfBoundsException index is out of range (index
+ * 		  &lt; 0 || index &gt;= size()).
+ */
+public void set(int index, int element) {
+	// overridden for performance only.
+	if (index >= size || index < 0)
+		throw new IndexOutOfBoundsException("Index: "+index+", Size: "+size);
+	elements[index] = element;
+}
+/**
+ * Replaces the element at the specified position in the receiver with the specified element; <b>WARNING:</b> Does not check preconditions.
+ * Provided with invalid parameters this method may access invalid indexes without throwing any exception!
+ * <b>You should only use this method when you are absolutely sure that the index is within bounds.</b>
+ * Precondition (unchecked): <tt>index &gt;= 0 && index &lt; size()</tt>.
+ *
+ * @param index index of element to replace.
+ * @param element element to be stored at the specified position.
+ */
+public void setQuick(int index, int element) {
+	elements[index] = element;
+}
+/**
+ * Randomly permutes the part of the receiver between <code>from</code> (inclusive) and <code>to</code> (inclusive). 
+ * @param from the index of the first element (inclusive) to be permuted.
+ * @param to the index of the last element (inclusive) to be permuted.
+ * @exception IndexOutOfBoundsException index is out of range (<tt>size()&gt;0 && (from&lt;0 || from&gt;to || to&gt;=size())</tt>).
+ */
+public void shuffleFromTo(int from, int to) {
+	// overridden for performance only.
+	if (size==0) {return;}
+	checkRangeFromTo(from, to, size);
+	
+	org.apache.mahout.jet.random.Uniform gen = new org.apache.mahout.jet.random.Uniform(new org.apache.mahout.jet.random.engine.DRand(new java.util.Date()));
+	int tmpElement;
+	int[] theElements = elements;
+	int random;
+	for (int i=from; i<to; i++) { 
+		random = gen.nextIntFromTo(i, to);
+
+		//swap(i, random)
+		tmpElement = theElements[random];
+		theElements[random]=theElements[i]; 
+		theElements[i]=tmpElement; 
+	}  
+}
+/**
+ * Sorts the specified range of the receiver into ascending order. 
+ *
+ * The sorting algorithm is dynamically chosen according to the characteristics of the data set.
+ * Currently quicksort and countsort are considered.
+ * Countsort is not always applicable, but if applicable, it usually outperforms quicksort by a factor of 3-4.
+ *
+ * <p>Best case performance: O(N).
+ * <dt>Worst case performance: O(N^2) (a degenerated quicksort).
+ * <dt>Best case space requirements: 0 KB. 
+ * <dt>Worst case space requirements: 40 KB.
+ *
+ * @param from the index of the first element (inclusive) to be sorted.
+ * @param to the index of the last element (inclusive) to be sorted.
+ * @exception IndexOutOfBoundsException index is out of range (<tt>size()&gt;0 && (from&lt;0 || from&gt;to || to&gt;=size())</tt>).
+ */
+public void sortFromTo(int from, int to) {
+	/* 
+	 * Computes min and max and decides on this basis.
+	 * In practice the additional overhead is very small compared to the potential gains.
+	 */
+	final int widthThreshold = 10000; // never consider options resulting in outrageous memory allocations.
+	
+	if (size==0) return;
+	checkRangeFromTo(from, to, size);
+
+	// determine minimum and maximum.
+	int min=elements[from];
+	int max=elements[from];
+
+	int[] theElements = elements;
+	for (int i=from+1; i<=to; ) {
+		int elem = theElements[i++];
+		if (elem>max) max=elem;
+		else if (elem<min) min=elem;
+	}
+
+	// try to figure out which option is fastest.
+	double N = (double)to - (double)from + 1.0;
+	double quickSortEstimate = 	N * Math.log(N)/0.6931471805599453; // O(N*log(N,base=2)) ; ln(2)=0.6931471805599453
+
+	double width = (double)max - (double)min + 1.0;
+	double countSortEstimate = 	Math.max(width,N); // O(Max(width,N))
+	
+	if (width < widthThreshold && countSortEstimate < quickSortEstimate) {
+		countSortFromTo(from, to, min, max);
+	}
+	else {
+		quickSortFromTo(from, to);
+	}
+}
+/**
+ * Trims the capacity of the receiver to be the receiver's current 
+ * size. Releases any superfluous internal memory. An application can use this operation to minimize the 
+ * storage of the receiver.
+ */
+public void trimToSize() {
+	elements = org.apache.mahout.colt.Arrays.trimToCapacity(elements,size());
+}
+}

Propchange: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/IntArrayList.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/LongArrayList.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/LongArrayList.java?rev=883365&view=auto
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/LongArrayList.java (added)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/LongArrayList.java Mon Nov 23 15:14:26 2009
@@ -0,0 +1,572 @@
+/*
+Copyright � 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear in all copies and 
+that both that copyright notice and this permission notice appear in supporting documentation. 
+CERN makes no representations about the suitability of this software for any purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.colt.list;
+
+import org.apache.mahout.colt.function.LongProcedure;
+/**
+Resizable list holding <code>long</code> elements; implemented with arrays.
+First see the <a href="package-summary.html">package summary</a> and javadoc <a href="package-tree.html">tree view</a> to get the broad picture.
+*/
+/** 
+ * @deprecated until unit tests are in place.  Until this time, this class/interface is unsupported.
+ */
+@Deprecated
+public class LongArrayList extends AbstractLongList {
+	/**
+	 * The array buffer into which the elements of the list are stored.
+	 * The capacity of the list is the length of this array buffer.
+	 * @serial
+	 */
+	protected long[] elements;
+/**
+ * Constructs an empty list.
+ */
+public LongArrayList() {
+	this(10);
+}
+/**
+ * Constructs a list containing the specified elements. 
+ * The initial size and capacity of the list is the length of the array.
+ *
+ * <b>WARNING:</b> For efficiency reasons and to keep memory usage low, <b>the array is not copied</b>.
+ * So if subsequently you modify the specified array directly via the [] operator, be sure you know what you're doing.
+ * 
+ * @param elements the array to be backed by the the constructed list
+ */
+public LongArrayList(long[] elements) {
+	elements(elements);
+}
+/**
+ * Constructs an empty list with the specified initial capacity.
+ *
+ * @param   initialCapacity   the number of elements the receiver can hold without auto-expanding itself by allocating new internal memory.
+ */
+public LongArrayList(int initialCapacity) {
+	this(new long[initialCapacity]);
+	setSizeRaw(0);
+}
+/**
+ * Appends the specified element to the end of this list.
+ *
+ * @param element element to be appended to this list.
+ */
+public void add(long element) {
+	// overridden for performance only.
+	if (size == elements.length) ensureCapacity(size + 1); 
+	elements[size++] = element;
+}
+/**
+ * Inserts the specified element before the specified position into the receiver. 
+ * Shifts the element currently at that position (if any) and
+ * any subsequent elements to the right.
+ *
+ * @param index index before which the specified element is to be inserted (must be in [0,size]).
+ * @param element element to be inserted.
+ * @exception IndexOutOfBoundsException index is out of range (<tt>index &lt; 0 || index &gt; size()</tt>).
+ */
+public void beforeInsert(int index, long element) {
+	// overridden for performance only.
+	if (index > size || index < 0) 
+		throw new IndexOutOfBoundsException("Index: "+index+", Size: "+size);
+	ensureCapacity(size + 1);
+	System.arraycopy(elements, index, elements, index+1, size-index);
+	elements[index] = element;
+	size++;
+}
+/**
+ * Searches the receiver for the specified value using
+ * the binary search algorithm.  The receiver must <strong>must</strong> be
+ * sorted (as by the sort method) prior to making this call.  If
+ * it is not sorted, the results are undefined: in particular, the call
+ * may enter an infinite loop.  If the receiver contains multiple elements
+ * equal to the specified object, there is no guarantee which instance
+ * will be found.
+ *
+ * @param key the value to be searched for.
+ * @param from the leftmost search position, inclusive.
+ * @param to the rightmost search position, inclusive.
+ * @return index of the search key, if it is contained in the receiver;
+ *	       otherwise, <tt>(-(<i>insertion point</i>) - 1)</tt>.  The <i>insertion
+ *	       point</i> is defined as the the point at which the value would
+ * 	       be inserted into the receiver: the index of the first
+ *	       element greater than the key, or <tt>receiver.size()</tt>, if all
+ *	       elements in the receiver are less than the specified key.  Note
+ *	       that this guarantees that the return value will be &gt;= 0 if
+ *	       and only if the key is found.
+ * @see org.apache.mahout.colt.Sorting
+ * @see java.util.Arrays
+ */
+public int binarySearchFromTo(long key, int from, int to) {
+	return org.apache.mahout.colt.Sorting.binarySearchFromTo(this.elements,key,from,to);
+}
+/**
+ * Returns a deep copy of the receiver. 
+ *
+ * @return  a deep copy of the receiver.
+ */
+public Object clone() {
+	// overridden for performance only.
+	LongArrayList clone = new LongArrayList((long[]) elements.clone());
+	clone.setSizeRaw(size);
+	return clone;
+}
+/**
+ * Returns a deep copy of the receiver; uses <code>clone()</code> and casts the result.
+ *
+ * @return  a deep copy of the receiver.
+ */
+public LongArrayList copy() {
+	return (LongArrayList) clone();
+}
+ /**
+ * Sorts the specified range of the receiver into ascending numerical order. 
+ *
+ * The sorting algorithm is a count sort. This algorithm offers guaranteed
+ * <dt>Performance: O(Max(n,max-min+1)).
+ * <dt>Space requirements: int[max-min+1] buffer.
+ * <p>This algorithm is only applicable if max-min+1 is not large!
+ * But if applicable, it usually outperforms quicksort by a factor of 3-4.
+ *
+ * @param from the index of the first element (inclusive) to be sorted.
+ * @param to the index of the last element (inclusive) to be sorted.
+ * @param min the smallest element contained in the range.
+ * @param max the largest element contained in the range.
+ */
+protected void countSortFromTo(int from, int to, long min, long max) {
+	if (size==0) return;
+	checkRangeFromTo(from, to, size);
+
+	final int width = (int) (max-min+1);
+	
+	int[] counts = new int[width];
+	long[] theElements = elements;	
+	for (int i=from; i<=to; ) counts[(int)(theElements[i++]-min)]++;
+
+	int fromIndex = from;
+	long val = min;
+	for (int i=0; i<width; i++, val++) {
+		int c = counts[i];
+		if (c>0) {
+			if (c==1) theElements[fromIndex++]=val;
+			else {
+				int toIndex = fromIndex + c - 1;
+				fillFromToWith(fromIndex,toIndex,val);
+				fromIndex = toIndex + 1;
+			}
+		}
+	}
+}
+/**
+ * Returns the elements currently stored, including invalid elements between size and capacity, if any.
+ *
+ * <b>WARNING:</b> For efficiency reasons and to keep memory usage low, <b>the array is not copied</b>.
+ * So if subsequently you modify the returned array directly via the [] operator, be sure you know what you're doing.
+ *
+ * @return the elements currently stored.
+ */
+public long[] elements() {
+	return elements;
+}
+/**
+ * Sets the receiver's elements to be the specified array (not a copy of it).
+ *
+ * The size and capacity of the list is the length of the array.
+ * <b>WARNING:</b> For efficiency reasons and to keep memory usage low, <b>the array is not copied</b>.
+ * So if subsequently you modify the specified array directly via the [] operator, be sure you know what you're doing.
+ *
+ * @param elements the new elements to be stored.
+ * @return the receiver itself.
+ */
+public AbstractLongList elements(long[] elements) {
+	this.elements=elements;
+	this.size=elements.length;
+	return this;
+}
+/**
+ * Ensures that the receiver can hold at least the specified number of elements without needing to allocate new internal memory.
+ * If necessary, allocates new internal memory and increases the capacity of the receiver.
+ *
+ * @param   minCapacity   the desired minimum capacity.
+ */
+public void ensureCapacity(int minCapacity) {
+	elements = org.apache.mahout.colt.Arrays.ensureCapacity(elements,minCapacity);
+}
+/**
+ * Compares the specified Object with the receiver.  
+ * Returns true if and only if the specified Object is also an ArrayList of the same type, both Lists have the
+ * same size, and all corresponding pairs of elements in the two Lists are identical.
+ * In other words, two Lists are defined to be equal if they contain the
+ * same elements in the same order.
+ *
+ * @param otherObj the Object to be compared for equality with the receiver.
+ * @return true if the specified Object is equal to the receiver.
+ */
+public boolean equals(Object otherObj) { //delta
+	// overridden for performance only.
+	if (! (otherObj instanceof LongArrayList)) return super.equals(otherObj);
+	if (this==otherObj) return true;
+	if (otherObj==null) return false;
+	LongArrayList other = (LongArrayList) otherObj;
+	if (size()!=other.size()) return false;
+
+	long[] theElements = elements();
+	long[] otherElements = other.elements();
+	for (int i=size(); --i >= 0; ) {
+	    if (theElements[i] != otherElements[i]) return false;
+	}
+	return true;
+}
+/**
+ * Applies a procedure to each element of the receiver, if any.
+ * Starts at index 0, moving rightwards.
+ * @param procedure    the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise continues. 
+ * @return <tt>false</tt> if the procedure stopped before all elements where iterated over, <tt>true</tt> otherwise. 
+ */
+public boolean forEach(LongProcedure procedure) {
+	// overridden for performance only.
+	long[] theElements = elements;
+	int theSize = size;
+	
+	for (int i=0; i<theSize;) if (! procedure.apply(theElements[i++])) return false;
+	return true;
+}
+/**
+ * Returns the element at the specified position in the receiver.
+ *
+ * @param index index of element to return.
+ * @exception IndexOutOfBoundsException index is out of range (index
+ * 		  &lt; 0 || index &gt;= size()).
+ */
+public long get(int index) {
+	// overridden for performance only.
+	if (index >= size || index < 0)
+		throw new IndexOutOfBoundsException("Index: "+index+", Size: "+size);
+	return elements[index];
+}
+/**
+ * Returns the element at the specified position in the receiver; <b>WARNING:</b> Does not check preconditions. 
+ * Provided with invalid parameters this method may return invalid elements without throwing any exception!
+ * <b>You should only use this method when you are absolutely sure that the index is within bounds.</b>
+ * Precondition (unchecked): <tt>index &gt;= 0 && index &lt; size()</tt>.
+ *
+ * @param index index of element to return.
+ */
+public long getQuick(int index) {
+	return elements[index];
+}
+/**
+ * Returns the index of the first occurrence of the specified
+ * element. Returns <code>-1</code> if the receiver does not contain this element.
+ * Searches between <code>from</code>, inclusive and <code>to</code>, inclusive.
+ * Tests for identity.
+ *
+ * @param element element to search for.
+ * @param from the leftmost search position, inclusive.
+ * @param to the rightmost search position, inclusive.
+ * @return  the index of the first occurrence of the element in the receiver; returns <code>-1</code> if the element is not found.
+ * @exception IndexOutOfBoundsException index is out of range (<tt>size()&gt;0 && (from&lt;0 || from&gt;to || to&gt;=size())</tt>).
+ */
+public int indexOfFromTo(long element, int from, int to) {
+	// overridden for performance only.
+	if (size==0) return -1;
+	checkRangeFromTo(from, to, size);
+
+	long[] theElements = elements;
+	for (int i = from ; i <= to; i++) {
+	    if (element==theElements[i]) {return i;} //found
+	}
+	return -1; //not found
+}
+/**
+ * Returns the index of the last occurrence of the specified
+ * element. Returns <code>-1</code> if the receiver does not contain this element.
+ * Searches beginning at <code>to</code>, inclusive until <code>from</code>, inclusive.
+ * Tests for identity.
+ *
+ * @param element element to search for.
+ * @param from the leftmost search position, inclusive.
+ * @param to the rightmost search position, inclusive.
+ * @return  the index of the last occurrence of the element in the receiver; returns <code>-1</code> if the element is not found.
+ * @exception IndexOutOfBoundsException index is out of range (<tt>size()&gt;0 && (from&lt;0 || from&gt;to || to&gt;=size())</tt>).
+ */
+public int lastIndexOfFromTo(long element, int from, int to) {
+	// overridden for performance only.
+	if (size==0) return -1;
+	checkRangeFromTo(from, to, size);
+
+	long[] theElements = elements;
+	for (int i = to ; i >= from; i--) {
+	    if (element==theElements[i]) {return i;} //found
+	}
+	return -1; //not found
+}
+/**
+ * Returns a new list of the part of the receiver between <code>from</code>, inclusive, and <code>to</code>, inclusive.
+ * @param from the index of the first element (inclusive).
+ * @param to the index of the last element (inclusive).
+ * @return a new list
+ * @exception IndexOutOfBoundsException index is out of range (<tt>size()&gt;0 && (from&lt;0 || from&gt;to || to&gt;=size())</tt>).
+ */
+public AbstractLongList partFromTo(int from, int to) {
+	if (size==0) return new LongArrayList(0);
+
+	checkRangeFromTo(from, to, size);
+
+	long[] part = new long[to-from+1];
+	System.arraycopy(elements, from, part, 0, to-from+1);
+	return new LongArrayList(part);
+}
+/**
+* Removes from the receiver all elements that are contained in the specified list.
+* Tests for identity.
+*
+* @param other the other list.
+* @return <code>true</code> if the receiver changed as a result of the call.
+*/
+public boolean removeAll(AbstractLongList other) {
+	// overridden for performance only.
+	if (! (other instanceof LongArrayList))	return super.removeAll(other);
+	
+	/* There are two possibilities to do the thing
+	   a) use other.indexOf(...)
+	   b) sort other, then use other.binarySearch(...)
+	   
+	   Let's try to figure out which one is faster. Let M=size, N=other.size, then
+	   a) takes O(M*N) steps
+	   b) takes O(N*logN + M*logN) steps (sorting is O(N*logN) and binarySearch is O(logN))
+ 
+	   Hence, if N*logN + M*logN < M*N, we use b) otherwise we use a).
+	*/
+	if (other.size()==0) {return false;} //nothing to do
+	int limit = other.size()-1;
+	int j=0;
+	long[] theElements = elements;
+	int mySize = size();
+
+	double N=(double) other.size();
+	double M=(double) mySize;
+	if ( (N+M)* org.apache.mahout.jet.math.Arithmetic.log2(N) < M*N ) {
+		// it is faster to sort other before searching in it
+		LongArrayList sortedList = (LongArrayList) other.clone();
+		sortedList.quickSort();
+
+		for (int i=0; i<mySize ; i++) {
+			if (sortedList.binarySearchFromTo(theElements[i], 0, limit) < 0) theElements[j++]=theElements[i];
+		}
+	}
+	else {
+		// it is faster to search in other without sorting
+		for (int i=0; i<mySize ; i++) {
+			if (other.indexOfFromTo(theElements[i], 0, limit) < 0) theElements[j++]=theElements[i];
+		}
+	}
+
+	boolean modified = (j!=mySize);
+	setSize(j);
+	return modified;
+}
+/**
+ * Replaces a number of elements in the receiver with the same number of elements of another list.
+ * Replaces elements in the receiver, between <code>from</code> (inclusive) and <code>to</code> (inclusive),
+ * with elements of <code>other</code>, starting from <code>otherFrom</code> (inclusive).
+ *
+ * @param from the position of the first element to be replaced in the receiver
+ * @param to the position of the last element to be replaced in the receiver
+ * @param other list holding elements to be copied into the receiver.
+ * @param otherFrom position of first element within other list to be copied.
+ */
+public void replaceFromToWithFrom(int from, int to, AbstractLongList other, int otherFrom) {
+	// overridden for performance only.
+	if (! (other instanceof LongArrayList)) {
+		// slower
+		super.replaceFromToWithFrom(from,to,other,otherFrom);
+		return;
+	}
+	int length=to-from+1;
+	if (length>0) {
+		checkRangeFromTo(from, to, size());
+		checkRangeFromTo(otherFrom,otherFrom+length-1,other.size());
+		System.arraycopy(((LongArrayList) other).elements, otherFrom, elements, from, length);
+	}
+}
+/**
+* Retains (keeps) only the elements in the receiver that are contained in the specified other list.
+* In other words, removes from the receiver all of its elements that are not contained in the
+* specified other list. 
+* @param other the other list to test against.
+* @return <code>true</code> if the receiver changed as a result of the call.
+*/
+public boolean retainAll(AbstractLongList other) {
+	// overridden for performance only.
+	if (! (other instanceof LongArrayList))	return super.retainAll(other);
+	
+	/* There are two possibilities to do the thing
+	   a) use other.indexOf(...)
+	   b) sort other, then use other.binarySearch(...)
+	   
+	   Let's try to figure out which one is faster. Let M=size, N=other.size, then
+	   a) takes O(M*N) steps
+	   b) takes O(N*logN + M*logN) steps (sorting is O(N*logN) and binarySearch is O(logN))
+
+	   Hence, if N*logN + M*logN < M*N, we use b) otherwise we use a).
+	*/
+	int limit = other.size()-1;
+	int j=0;
+	long[] theElements = elements;
+	int mySize = size();
+
+	double N=(double) other.size();
+	double M=(double) mySize;
+	if ( (N+M)* org.apache.mahout.jet.math.Arithmetic.log2(N) < M*N ) {
+		// it is faster to sort other before searching in it
+		LongArrayList sortedList = (LongArrayList) other.clone();
+		sortedList.quickSort();
+
+		for (int i=0; i<mySize ; i++) {
+			if (sortedList.binarySearchFromTo(theElements[i], 0, limit) >= 0) theElements[j++]=theElements[i];
+		}
+	}
+	else {
+		// it is faster to search in other without sorting
+		for (int i=0; i<mySize ; i++) {
+			if (other.indexOfFromTo(theElements[i], 0, limit) >= 0) theElements[j++]=theElements[i];
+		}
+	}
+
+	boolean modified = (j!=mySize);
+	setSize(j);
+	return modified;
+}
+/**
+ * Reverses the elements of the receiver.
+ * Last becomes first, second last becomes second first, and so on.
+ */
+public void reverse() {
+	// overridden for performance only.
+	long tmp;
+	int limit=size/2;
+	int j=size-1;
+
+	long[] theElements = elements;
+	for (int i=0; i<limit;) { //swap
+		tmp=theElements[i];
+		theElements[i++]=theElements[j];
+		theElements[j--]=tmp;
+	}
+}
+/**
+ * Replaces the element at the specified position in the receiver with the specified element.
+ *
+ * @param index index of element to replace.
+ * @param element element to be stored at the specified position.
+ * @exception IndexOutOfBoundsException index is out of range (index
+ * 		  &lt; 0 || index &gt;= size()).
+ */
+public void set(int index, long element) {
+	// overridden for performance only.
+	if (index >= size || index < 0)
+		throw new IndexOutOfBoundsException("Index: "+index+", Size: "+size);
+	elements[index] = element;
+}
+/**
+ * Replaces the element at the specified position in the receiver with the specified element; <b>WARNING:</b> Does not check preconditions.
+ * Provided with invalid parameters this method may access invalid indexes without throwing any exception!
+ * <b>You should only use this method when you are absolutely sure that the index is within bounds.</b>
+ * Precondition (unchecked): <tt>index &gt;= 0 && index &lt; size()</tt>.
+ *
+ * @param index index of element to replace.
+ * @param element element to be stored at the specified position.
+ */
+public void setQuick(int index, long element) {
+	elements[index] = element;
+}
+/**
+ * Randomly permutes the part of the receiver between <code>from</code> (inclusive) and <code>to</code> (inclusive). 
+ * @param from the index of the first element (inclusive) to be permuted.
+ * @param to the index of the last element (inclusive) to be permuted.
+ * @exception IndexOutOfBoundsException index is out of range (<tt>size()&gt;0 && (from&lt;0 || from&gt;to || to&gt;=size())</tt>).
+ */
+public void shuffleFromTo(int from, int to) {
+	// overridden for performance only.
+	if (size==0) return;
+	checkRangeFromTo(from, to, size);
+	
+	org.apache.mahout.jet.random.Uniform gen = new org.apache.mahout.jet.random.Uniform(new org.apache.mahout.jet.random.engine.DRand(new java.util.Date()));
+	long tmpElement;
+	long[] theElements = elements;
+	int random;
+	for (int i=from; i<to; i++) { 
+		random = gen.nextIntFromTo(i, to);
+
+		//swap(i, random)
+		tmpElement = theElements[random];
+		theElements[random]=theElements[i]; 
+		theElements[i]=tmpElement; 
+	}  
+}
+/**
+ * Sorts the specified range of the receiver into ascending order. 
+ *
+ * The sorting algorithm is dynamically chosen according to the characteristics of the data set.
+ * Currently quicksort and countsort are considered.
+ * Countsort is not always applicable, but if applicable, it usually outperforms quicksort by a factor of 3-4.
+ *
+ * <p>Best case performance: O(N).
+ * <dt>Worst case performance: O(N^2) (a degenerated quicksort).
+ * <dt>Best case space requirements: 0 KB. 
+ * <dt>Worst case space requirements: 40 KB.
+ *
+ * @param from the index of the first element (inclusive) to be sorted.
+ * @param to the index of the last element (inclusive) to be sorted.
+ * @exception IndexOutOfBoundsException index is out of range (<tt>size()&gt;0 && (from&lt;0 || from&gt;to || to&gt;=size())</tt>).
+ */
+public void sortFromTo(int from, int to) {
+	/* 
+	 * Computes min and max and decides on this basis.
+	 * In practice the additional overhead is very small compared to the potential gains.
+	 */
+	final int widthThreshold = 10000; // never consider options resulting in outrageous memory allocations.
+	
+	if (size==0) return;
+	checkRangeFromTo(from, to, size);
+
+	// determine minimum and maximum.
+	long min=elements[from];
+	long max=elements[from];
+
+	long[] theElements = elements;
+	for (int i=from+1; i<=to; ) {
+		long elem = theElements[i++];
+		if (elem>max) max=elem;
+		else if (elem<min) min=elem;
+	}
+
+	// try to figure out which option is fastest.
+	double N = (double)to - (double)from + 1.0;
+	double quickSortEstimate = 	N * Math.log(N)/0.6931471805599453; // O(N*log(N,base=2)) ; ln(2)=0.6931471805599453
+
+	double width = (double)max - (double)min + 1.0;
+	double countSortEstimate = 	Math.max(width,N); // O(Max(width,N))
+	
+	if (width < widthThreshold && countSortEstimate < quickSortEstimate) {
+		countSortFromTo(from, to, min, max);
+	}
+	else {
+		quickSortFromTo(from, to);
+	}
+}
+/**
+ * Trims the capacity of the receiver to be the receiver's current 
+ * size. Releases any superfluous internal memory. An application can use this operation to minimize the 
+ * storage of the receiver.
+ */
+public void trimToSize() {
+	elements = org.apache.mahout.colt.Arrays.trimToCapacity(elements,size());
+}
+}

Propchange: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/LongArrayList.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/MinMaxNumberList.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/MinMaxNumberList.java?rev=883365&view=auto
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/MinMaxNumberList.java (added)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/MinMaxNumberList.java Mon Nov 23 15:14:26 2009
@@ -0,0 +1,296 @@
+/*
+Copyright � 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear in all copies and 
+that both that copyright notice and this permission notice appear in supporting documentation. 
+CERN makes no representations about the suitability of this software for any purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.colt.list;
+
+import org.apache.mahout.colt.bitvector.BitVector;
+import org.apache.mahout.colt.bitvector.QuickBitVector;
+/**
+ * Resizable compressed list holding numbers; based on the fact that a value in a given interval need not take more than <tt>log(max-min+1)</tt> bits; implemented with a <tt>org.apache.mahout.colt.bitvector.BitVector</tt>.
+ * First see the <a href="package-summary.html">package summary</a> and javadoc <a href="package-tree.html">tree view</a> to get the broad picture.
+ * <p>
+ * Numbers can be compressed when minimum and maximum of all values ever to be stored in the list are known.
+ * For example, if min=16, max=27, only 4 bits are needed to store a value.
+ * No compression is achieved for <tt>float</tt> and <tt>double</tt> values.
+ * <p>
+ * You can add, get and set elements quite similar to <tt>java.util.ArrayList</tt>.
+ * <p>
+ * <b>Applicability:</b> Applicable if the data is non floating point, highly skewed without "outliers" and minimum and maximum known in advance.
+ * <p>
+ * <b>Performance:</b> Basic operations like <tt>add()</tt>, <tt>get()</tt>, <tt>set()</tt>, <tt>size()</tt> and <tt>clear()</tt> are <tt>O(1)</tt>, i.e. run in constant time.
+ * <dt>200Mhz Pentium Pro, JDK 1.2, NT:
+ * <dt><tt>10^6</tt> calls to <tt>getQuick()</tt> --> <tt>0.5</tt> seconds.
+ * (50 times slower than reading from a primitive array of the appropriate type.)
+ * <dt><tt>10^6</tt> calls to <tt>setQuick()</tt> --> <tt>0.8</tt> seconds.
+ * (15 times slower than writing to a primitive array of the appropriate type.)
+ * <p>
+ * This class can, for example, be useful when making large lists of numbers persistent.
+ * Also useful when very large lists would otherwise consume too much main memory.
+ * <p>
+ * Upon instantiation a contract is signed that defines the interval values may fall into.
+ * It is not legal to store values not contained in that interval.
+ * WARNING: The contract is not checked. Be sure you do not store illegal values.
+ * If you need to store <tt>float</tt> or <tt>double</tt> values, you must set the minimum and maximum to <tt>[Integer.MIN_VALUE,Integer.MAX_VALUE]</tt> or <tt>[Long.MIN_VALUE,Long.MAX_VALUE]</tt>, respectively.
+ * <p>
+ * Although access methods are only defined on <tt>long</tt> values you can also store
+ * all other primitive data types: <tt>boolean</tt>, <tt>byte</tt>, <tt>short</tt>, <tt>int</tt>, <tt>long</tt>, <tt>float</tt>, <tt>double</tt> and <tt>char</tt>.
+ * You can do this by explicitly representing them as <tt>long</tt> values.
+ * Use casts for discrete data types.
+ * Use the methods of <tt>java.lang.Float</tt> and <tt>java.lang.Double</tt> for floating point data types:
+ * Recall that with those methods you can convert any floating point value to a <tt>long</tt> value and back <b>without losing any precision</b>:
+ * <p>
+ * <b>Example usage:</b><pre>
+ * MinMaxNumberList list = ... instantiation goes here
+ * double d1 = 1.234;
+ * list.add(Double.doubleToLongBits(d1));
+ * double d2 = Double.longBitsToDouble(list.get(0));
+ * if (d1!=d2) System.out.println("This is impossible!");
+ *
+ * MinMaxNumberList list2 = ... instantiation goes here
+ * float f1 = 1.234f;
+ * list2.add((long) Float.floatToIntBits(f1));
+ * float f2 = Float.intBitsToFloat((int)list2.get(0));
+ * if (f1!=f2) System.out.println("This is impossible!");
+ * </pre>
+ *
+ * @see LongArrayList
+ * @see DistinctNumberList
+ * @see java.lang.Float
+ * @see java.lang.Double
+ * @author wolfgang.hoschek@cern.ch
+ * @version 1.0, 09/24/99
+ */
+/** 
+ * @deprecated until unit tests are in place.  Until this time, this class/interface is unsupported.
+ */
+@Deprecated
+public class MinMaxNumberList extends org.apache.mahout.colt.list.AbstractLongList {
+	protected long minValue;
+	protected int bitsPerElement;
+	protected long[] bits;
+	protected int capacity;
+/**
+ * Constructs an empty list with the specified initial capacity and the specified range of values allowed to be hold in this list.
+ * Legal values are in the range [minimum,maximum], all inclusive.
+ * @param   minimum   the minimum of values allowed to be hold in this list.
+ * @param   maximum   the maximum of values allowed to be hold in this list.
+ * @param   initialCapacity   the number of elements the receiver can hold without auto-expanding itself by allocating new internal memory.
+ */
+public MinMaxNumberList(long minimum, long maximum, int initialCapacity) {
+	this.setUp(minimum, maximum, initialCapacity);
+}
+/**
+ * Appends the specified element to the end of this list.
+ *
+ * @param element element to be appended to this list.
+ */
+public void add(long element) {
+	// overridden for performance only.
+	if (size == capacity) {
+		ensureCapacity(size + 1); 
+	}
+	int i=size*this.bitsPerElement;
+	QuickBitVector.putLongFromTo(this.bits, element-this.minValue,i,i+this.bitsPerElement-1);
+	size++;
+}
+/**
+ * Appends the elements <tt>elements[from]</tt> (inclusive), ..., <tt>elements[to]</tt> (inclusive) to the receiver.
+ * @param elements the elements to be appended to the receiver.
+ * @param from the index of the first element to be appended (inclusive)
+ * @param to the index of the last element to be appended (inclusive)
+ */
+public void addAllOfFromTo(long[] elements, int from, int to) {
+	// cache some vars for speed.
+	int bitsPerElem = this.bitsPerElement;
+	int bitsPerElemMinusOne = bitsPerElem-1;
+	long min = this.minValue;
+	long[] theBits = this.bits;
+
+	// now let's go.
+	ensureCapacity(this.size+to-from+1);
+	int firstBit = this.size*bitsPerElem;
+	int i=from;
+	for (int times=to-from+1; --times >=0; ) {
+		QuickBitVector.putLongFromTo(theBits, elements[i++]-min, firstBit, firstBit+bitsPerElemMinusOne);
+		firstBit += bitsPerElem;
+	}
+	this.size += (to-from+1); //*bitsPerElem;
+}
+/**
+ * Returns the number of bits necessary to store a single element.
+ */
+public int bitsPerElement() {
+	return this.bitsPerElement;
+}
+/**
+ * Returns the number of bits necessary to store values in the range <tt>[minimum,maximum]</tt>.
+ */
+public static int bitsPerElement(long minimum, long maximum) {
+	int bits;
+	if (1+maximum-minimum > 0) {
+		bits=(int) Math.round(Math.ceil(org.apache.mahout.jet.math.Arithmetic.log(2,1+maximum-minimum)));
+	}
+	else {	
+		// overflow or underflow in calculating "1+maximum-minimum"
+		// happens if signed long representation is too short for doing unsigned calculations
+		// e.g. if minimum==LONG.MIN_VALUE, maximum==LONG.MAX_VALUE
+		// --> in such cases store all bits of values without any compression.
+		bits=64;
+	}
+	return bits;
+}
+/**
+ * Ensures that the receiver can hold at least the specified number of elements without needing to allocate new internal memory.
+ * If necessary, allocates new internal memory and increases the capacity of the receiver.
+ *
+ * @param   minCapacity   the desired minimum capacity.
+ */
+public void ensureCapacity(int minCapacity) {
+	int oldCapacity = capacity;
+	if (minCapacity > oldCapacity) {
+	    int newCapacity = (oldCapacity * 3)/2 + 1;
+		if (newCapacity < minCapacity)	newCapacity = minCapacity;
+		BitVector vector = toBitVector();
+		vector.setSize(newCapacity*bitsPerElement);
+		this.bits = vector.elements();
+		this.capacity = newCapacity;
+	}
+}
+/**
+ * Returns the element at the specified position in the receiver; <b>WARNING:</b> Does not check preconditions. 
+ * Provided with invalid parameters this method may return invalid elements without throwing any exception!
+ * <b>You should only use this method when you are absolutely sure that the index is within bounds.</b>
+ * Precondition (unchecked): <tt>index &gt;= 0 && index &lt; size()</tt>.
+ *
+ * @param index index of element to return.
+ */
+public long getQuick(int index) {
+	int i=index*this.bitsPerElement;
+	return this.minValue + QuickBitVector.getLongFromTo(this.bits, i,i+this.bitsPerElement-1);
+}
+/**
+ * Copies all elements between index <tt>from</tt> (inclusive) and <tt>to</tt> (inclusive) into <tt>part</tt>, starting at index <tt>partFrom</tt> within <tt>part</tt>.
+ * Elements are only copied if a corresponding flag within <tt>qualificants</tt> is set.
+ * More precisely:<pre>
+ * for (; from<=to; from++, partFrom++, qualificantsFrom++) {
+ *    if (qualificants==null || qualificants.get(qualificantsFrom)) {
+ *       part[partFrom] = this.get(from);
+ *    }
+ * }
+ * </pre>
+ */
+public void partFromTo(final int from, final int to, final BitVector qualificants, final int qualificantsFrom, long[] part, final int partFrom) {
+	int width = to-from+1;
+	if (from<0 || from>to || to>=size || qualificantsFrom<0 || (qualificants!=null && qualificantsFrom+width>qualificants.size())) {
+		throw new IndexOutOfBoundsException();
+	}
+	if (partFrom<0 || partFrom+width>part.length) {
+		throw new IndexOutOfBoundsException();
+	}
+	
+	long minVal = this.minValue;
+	int bitsPerElem = this.bitsPerElement;
+	long[] theBits = this.bits;
+	
+	int q = qualificantsFrom;
+	int p = partFrom;
+	int j=from*bitsPerElem;
+
+	//BitVector tmpBitVector = new BitVector(this.bits, this.size*bitsPerElem);
+	for (int i=from; i<=to; i++, q++, p++, j += bitsPerElem) {
+		if (qualificants==null || qualificants.get(q)) {
+			//part[p] = minVal + tmpBitVector.getLongFromTo(j, j+bitsPerElem-1);
+			part[p] = minVal + QuickBitVector.getLongFromTo(theBits, j, j+bitsPerElem-1);
+		}
+	}
+}
+/**
+ * Replaces the element at the specified position in the receiver with the specified element; <b>WARNING:</b> Does not check preconditions. 
+ * Provided with invalid parameters this method may access invalid indexes without throwing any exception!
+ * <b>You should only use this method when you are absolutely sure that the index is within bounds.</b>
+ * Precondition (unchecked): <tt>index &gt;= 0 && index &lt; size()</tt>.
+ *
+ * @param index index of element to replace.
+ * @param element element to be stored at the specified position.
+ */
+public void setQuick(int index, long element) {
+	int i=index*this.bitsPerElement;
+	QuickBitVector.putLongFromTo(this.bits, element-this.minValue,i,i+this.bitsPerElement-1);
+}
+/**
+ * Sets the size of the receiver without modifying it otherwise.
+ * This method should not release or allocate new memory but simply set some instance variable like <tt>size</tt>.
+ */
+protected void setSizeRaw(int newSize) {
+	super.setSizeRaw(newSize);
+}
+/**
+ * Sets the receiver to an empty list with the specified initial capacity and the specified range of values allowed to be hold in this list.
+ * Legal values are in the range [minimum,maximum], all inclusive.
+ * @param   minimum   the minimum of values allowed to be hold in this list.
+ * @param   maximum   the maximum of values allowed to be hold in this list.
+ * @param   initialCapacity   the number of elements the receiver can hold without auto-expanding itself by allocating new internal memory.
+ */
+protected void setUp(long minimum, long maximum, int initialCapacity) {
+	setUpBitsPerEntry(minimum, maximum);
+
+	//this.capacity=initialCapacity;
+	this.bits = QuickBitVector.makeBitVector(initialCapacity,this.bitsPerElement);
+	this.capacity = initialCapacity;
+	this.size=0;	
+}
+/**
+ * This method was created in VisualAge.
+ * @param minValue long
+ * @param maxValue long
+ * @param initialCapacity int
+ */
+protected void setUpBitsPerEntry(long minimum, long maximum) {
+	this.bitsPerElement=this.bitsPerElement(minimum, maximum);
+	if (this.bitsPerElement!=64) { 
+		this.minValue=minimum;	
+			// overflow or underflow in calculating "1+maxValue-minValue"
+			// happens if signed long representation is too short for doing unsigned calculations
+			// e.g. if minValue==LONG.MIN_VALUE, maxValue=LONG.MAX_VALUE
+			// --> in such cases store all bits of values without any en/decoding
+	}
+	else {
+		this.minValue=0;
+	};
+}
+/**
+ * Returns the receiver seen as bitvector.
+ * WARNING: The bitvector and the receiver share the backing bits. Modifying one of them will affect the other.
+ */
+public BitVector toBitVector() {
+	return new BitVector(this.bits, this.capacity*bitsPerElement);
+}
+/**
+ * Trims the capacity of the receiver to be the receiver's current 
+ * size. An application can use this operation to minimize the 
+ * storage of the receiver. 
+ */
+public void trimToSize() {
+	int oldCapacity = capacity;
+	if (size < oldCapacity) {
+		BitVector vector = toBitVector();
+		vector.setSize(size);
+		this.bits = vector.elements();
+		this.capacity = size;
+	}
+}
+/**
+ * deprecated
+ * Returns the minimum element legal to the stored in the receiver.
+ * Remark: This does not mean that such a minimum element is currently contained in the receiver.
+ * @deprecated
+ */
+public long xminimum() {
+	return this.minValue;
+}
+}

Propchange: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/MinMaxNumberList.java
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message