Return-Path: Delivered-To: apmail-lucene-java-commits-archive@www.apache.org Received: (qmail 66554 invoked from network); 2 Feb 2008 19:04:56 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.2) by minotaur.apache.org with SMTP; 2 Feb 2008 19:04:56 -0000 Received: (qmail 11441 invoked by uid 500); 2 Feb 2008 19:04:47 -0000 Delivered-To: apmail-lucene-java-commits-archive@lucene.apache.org Received: (qmail 11420 invoked by uid 500); 2 Feb 2008 19:04:47 -0000 Mailing-List: contact java-commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: java-dev@lucene.apache.org Delivered-To: mailing list java-commits@lucene.apache.org Received: (qmail 11409 invoked by uid 99); 2 Feb 2008 19:04:46 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 02 Feb 2008 11:04:46 -0800 X-ASF-Spam-Status: No, hits=-100.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.3] (HELO eris.apache.org) (140.211.11.3) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 02 Feb 2008 19:04:25 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 1F4BF1A9842; Sat, 2 Feb 2008 11:04:22 -0800 (PST) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r617859 [2/2] - in /lucene/java/trunk: ./ contrib/miscellaneous/src/test/org/apache/lucene/misc/ contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/ src/java/org/apache/lucene/search/ src/java/org/apache/lucene/util/ src... Date: Sat, 02 Feb 2008 19:04:09 -0000 To: java-commits@lucene.apache.org From: buschmi@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20080202190433.1F4BF1A9842@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Added: lucene/java/trunk/src/java/org/apache/lucene/util/OpenBitSet.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/util/OpenBitSet.java?rev=617859&view=auto ============================================================================== --- lucene/java/trunk/src/java/org/apache/lucene/util/OpenBitSet.java (added) +++ lucene/java/trunk/src/java/org/apache/lucene/util/OpenBitSet.java Sat Feb 2 11:04:03 2008 @@ -0,0 +1,773 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.util; + +import java.util.Arrays; +import java.io.Serializable; + +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.DocIdSetIterator; + +/** An "open" BitSet implementation that allows direct access to the array of words + * storing the bits. + *

+ * Unlike java.util.bitet, the fact that bits are packed into an array of longs + * is part of the interface. This allows efficient implementation of other algorithms + * by someone other than the author. It also allows one to efficiently implement + * alternate serialization or interchange formats. + *

+ * OpenBitSet is faster than java.util.BitSet in most operations + * and *much* faster at calculating cardinality of sets and results of set operations. + * It can also handle sets of larger cardinality (up to 64 * 2**32-1) + *

+ * The goals of OpenBitSet are the fastest implementation possible, and + * maximum code reuse. Extra safety and encapsulation + * may always be built on top, but if that's built in, the cost can never be removed (and + * hence people re-implement their own version in order to get better performance). + * If you want a "safe", totally encapsulated (and slower and limited) BitSet + * class, use java.util.BitSet. + *

+ *

Performance Results

+ * + Test system: Pentium 4, Sun Java 1.5_06 -server -Xbatch -Xmx64M +
BitSet size = 1,000,000 +
Results are java.util.BitSet time divided by OpenBitSet time. + + + + + + + + + + +
cardinality intersect_count union nextSetBit get iterator
50% full 3.36 3.96 1.44 1.46 1.99 1.58
1% full 3.31 3.90   1.04   0.99
+
+Test system: AMD Opteron, 64 bit linux, Sun Java 1.5_06 -server -Xbatch -Xmx64M +
BitSet size = 1,000,000 +
Results are java.util.BitSet time divided by OpenBitSet time. + + + + + + + + + + +
cardinality intersect_count union nextSetBit get iterator
50% full 2.50 3.50 1.00 1.03 1.12 1.25
1% full 2.51 3.49   1.00   1.02
+ + * @version $Id$ + */ + +public class OpenBitSet extends DocIdSet implements Cloneable, Serializable { + protected long[] bits; + protected int wlen; // number of words (elements) used in the array + + /** Constructs an OpenBitSet large enough to hold numBits. + * + * @param numBits + */ + public OpenBitSet(long numBits) { + bits = new long[bits2words(numBits)]; + wlen = bits.length; + } + + public OpenBitSet() { + this(64); + } + + /** Constructs an OpenBitSet from an existing long[]. + *
+ * The first 64 bits are in long[0], + * with bit index 0 at the least significant bit, and bit index 63 at the most significant. + * Given a bit index, + * the word containing it is long[index/64], and it is at bit number index%64 within that word. + *

+ * numWords are the number of elements in the array that contain + * set bits (non-zero longs). + * numWords should be <= bits.length, and + * any existing words in the array at position >= numWords should be zero. + * + */ + public OpenBitSet(long[] bits, int numWords) { + this.bits = bits; + this.wlen = numWords; + } + + public DocIdSetIterator iterator() { + return new OpenBitSetIterator(bits, wlen); + } + + /** Returns the current capacity in bits (1 greater than the index of the last bit) */ + public long capacity() { return bits.length << 6; } + + /** + * Returns the current capacity of this set. Included for + * compatibility. This is *not* equal to {@link #cardinality} + */ + public long size() { + return capacity(); + } + + /** Returns true if there are no set bits */ + public boolean isEmpty() { return cardinality()==0; } + + /** Expert: returns the long[] storing the bits */ + public long[] getBits() { return bits; } + + /** Expert: sets a new long[] to use as the bit storage */ + public void setBits(long[] bits) { this.bits = bits; } + + /** Expert: gets the number of longs in the array that are in use */ + public int getNumWords() { return wlen; } + + /** Expert: sets the number of longs in the array that are in use */ + public void setNumWords(int nWords) { this.wlen=nWords; } + + + + /** Returns true or false for the specified bit index. */ + public boolean get(int index) { + int i = index >> 6; // div 64 + // signed shift will keep a negative index and force an + // array-index-out-of-bounds-exception, removing the need for an explicit check. + if (i>=bits.length) return false; + + int bit = index & 0x3f; // mod 64 + long bitmask = 1L << bit; + return (bits[i] & bitmask) != 0; + } + + + /** Returns true or false for the specified bit index. + * The index should be less than the OpenBitSet size + */ + public boolean fastGet(int index) { + int i = index >> 6; // div 64 + // signed shift will keep a negative index and force an + // array-index-out-of-bounds-exception, removing the need for an explicit check. + int bit = index & 0x3f; // mod 64 + long bitmask = 1L << bit; + return (bits[i] & bitmask) != 0; + } + + + + /** Returns true or false for the specified bit index + * The index should be less than the OpenBitSet size + */ + public boolean get(long index) { + int i = (int)(index >> 6); // div 64 + if (i>=bits.length) return false; + int bit = (int)index & 0x3f; // mod 64 + long bitmask = 1L << bit; + return (bits[i] & bitmask) != 0; + } + + /** Returns true or false for the specified bit index. Allows specifying + * an index outside the current size. */ + public boolean fastGet(long index) { + int i = (int)(index >> 6); // div 64 + int bit = (int)index & 0x3f; // mod 64 + long bitmask = 1L << bit; + return (bits[i] & bitmask) != 0; + } + + /* + // alternate implementation of get() + public boolean get1(int index) { + int i = index >> 6; // div 64 + int bit = index & 0x3f; // mod 64 + return ((bits[i]>>>bit) & 0x01) != 0; + // this does a long shift and a bittest (on x86) vs + // a long shift, and a long AND, (the test for zero is prob a no-op) + // testing on a P4 indicates this is slower than (bits[i] & bitmask) != 0; + } + */ + + + /** returns 1 if the bit is set, 0 if not. + * The index should be less than the OpenBitSet size + */ + public int getBit(int index) { + int i = index >> 6; // div 64 + int bit = index & 0x3f; // mod 64 + return ((int)(bits[i]>>>bit)) & 0x01; + } + + + /* + public boolean get2(int index) { + int word = index >> 6; // div 64 + int bit = index & 0x0000003f; // mod 64 + return (bits[word] << bit) < 0; // hmmm, this would work if bit order were reversed + // we could right shift and check for parity bit, if it was available to us. + } + */ + + /** sets a bit, expanding the set size if necessary */ + public void set(long index) { + int wordNum = expandingWordNum(index); + int bit = (int)index & 0x3f; + long bitmask = 1L << bit; + bits[wordNum] |= bitmask; + } + + + /** Sets the bit at the specified index. + * The index should be less than the OpenBitSet size. + */ + public void fastSet(int index) { + int wordNum = index >> 6; // div 64 + int bit = index & 0x3f; // mod 64 + long bitmask = 1L << bit; + bits[wordNum] |= bitmask; + } + + /** Sets the bit at the specified index. + * The index should be less than the OpenBitSet size. + */ + public void fastSet(long index) { + int wordNum = (int)(index >> 6); + int bit = (int)index & 0x3f; + long bitmask = 1L << bit; + bits[wordNum] |= bitmask; + } + + /** Sets a range of bits, expanding the set size if necessary + * + * @param startIndex lower index + * @param endIndex one-past the last bit to set + */ + public void set(long startIndex, long endIndex) { + if (endIndex <= startIndex) return; + + int startWord = (int)(startIndex>>6); + + // since endIndex is one past the end, this is index of the last + // word to be changed. + int endWord = expandingWordNum(endIndex-1); + + long startmask = -1L << startIndex; + long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex due to wrap + + if (startWord == endWord) { + bits[startWord] |= (startmask & endmask); + return; + } + + bits[startWord] |= startmask; + Arrays.fill(bits, startWord+1, endWord, -1L); + bits[endWord] |= endmask; + } + + + + protected int expandingWordNum(long index) { + int wordNum = (int)(index >> 6); + if (wordNum>=wlen) { + ensureCapacity(index+1); + wlen = wordNum+1; + } + return wordNum; + } + + + /** clears a bit. + * The index should be less than the OpenBitSet size. + */ + public void fastClear(int index) { + int wordNum = index >> 6; + int bit = index & 0x03f; + long bitmask = 1L << bit; + bits[wordNum] &= ~bitmask; + // hmmm, it takes one more instruction to clear than it does to set... any + // way to work around this? If there were only 63 bits per word, we could + // use a right shift of 10111111...111 in binary to position the 0 in the + // correct place (using sign extension). + // Could also use Long.rotateRight() or rotateLeft() *if* they were converted + // by the JVM into a native instruction. + // bits[word] &= Long.rotateLeft(0xfffffffe,bit); + } + + /** clears a bit. + * The index should be less than the OpenBitSet size. + */ + public void fastClear(long index) { + int wordNum = (int)(index >> 6); // div 64 + int bit = (int)index & 0x3f; // mod 64 + long bitmask = 1L << bit; + bits[wordNum] &= ~bitmask; + } + + /** clears a bit, allowing access beyond the current set size without changing the size.*/ + public void clear(long index) { + int wordNum = (int)(index >> 6); // div 64 + if (wordNum>=wlen) return; + int bit = (int)index & 0x3f; // mod 64 + long bitmask = 1L << bit; + bits[wordNum] &= ~bitmask; + } + + /** Clears a range of bits. Clearing past the end does not change the size of the set. + * + * @param startIndex lower index + * @param endIndex one-past the last bit to clear + */ + public void clear(long startIndex, long endIndex) { + if (endIndex <= startIndex) return; + + int startWord = (int)(startIndex>>6); + if (startWord >= wlen) return; + + // since endIndex is one past the end, this is index of the last + // word to be changed. + int endWord = (int)((endIndex-1)>>6); + + long startmask = -1L << startIndex; + long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex due to wrap + + // invert masks since we are clearing + startmask = ~startmask; + endmask = ~endmask; + + if (startWord == endWord) { + bits[startWord] &= (startmask | endmask); + return; + } + + bits[startWord] &= startmask; + + int middle = Math.min(wlen, endWord); + Arrays.fill(bits, startWord+1, middle, 0L); + if (endWord < wlen) { + bits[endWord] &= endmask; + } + } + + + + /** Sets a bit and returns the previous value. + * The index should be less than the OpenBitSet size. + */ + public boolean getAndSet(int index) { + int wordNum = index >> 6; // div 64 + int bit = index & 0x3f; // mod 64 + long bitmask = 1L << bit; + boolean val = (bits[wordNum] & bitmask) != 0; + bits[wordNum] |= bitmask; + return val; + } + + /** Sets a bit and returns the previous value. + * The index should be less than the OpenBitSet size. + */ + public boolean getAndSet(long index) { + int wordNum = (int)(index >> 6); // div 64 + int bit = (int)index & 0x3f; // mod 64 + long bitmask = 1L << bit; + boolean val = (bits[wordNum] & bitmask) != 0; + bits[wordNum] |= bitmask; + return val; + } + + /** flips a bit. + * The index should be less than the OpenBitSet size. + */ + public void fastFlip(int index) { + int wordNum = index >> 6; // div 64 + int bit = index & 0x3f; // mod 64 + long bitmask = 1L << bit; + bits[wordNum] ^= bitmask; + } + + /** flips a bit. + * The index should be less than the OpenBitSet size. + */ + public void fastFlip(long index) { + int wordNum = (int)(index >> 6); // div 64 + int bit = (int)index & 0x3f; // mod 64 + long bitmask = 1L << bit; + bits[wordNum] ^= bitmask; + } + + /** flips a bit, expanding the set size if necessary */ + public void flip(long index) { + int wordNum = expandingWordNum(index); + int bit = (int)index & 0x3f; // mod 64 + long bitmask = 1L << bit; + bits[wordNum] ^= bitmask; + } + + /** flips a bit and returns the resulting bit value. + * The index should be less than the OpenBitSet size. + */ + public boolean flipAndGet(int index) { + int wordNum = index >> 6; // div 64 + int bit = index & 0x3f; // mod 64 + long bitmask = 1L << bit; + bits[wordNum] ^= bitmask; + return (bits[wordNum] & bitmask) != 0; + } + + /** flips a bit and returns the resulting bit value. + * The index should be less than the OpenBitSet size. + */ + public boolean flipAndGet(long index) { + int wordNum = (int)(index >> 6); // div 64 + int bit = (int)index & 0x3f; // mod 64 + long bitmask = 1L << bit; + bits[wordNum] ^= bitmask; + return (bits[wordNum] & bitmask) != 0; + } + + /** Flips a range of bits, expanding the set size if necessary + * + * @param startIndex lower index + * @param endIndex one-past the last bit to flip + */ + public void flip(long startIndex, long endIndex) { + if (endIndex <= startIndex) return; + int oldlen = wlen; + int startWord = (int)(startIndex>>6); + + // since endIndex is one past the end, this is index of the last + // word to be changed. + int endWord = expandingWordNum(endIndex-1); + + /*** Grrr, java shifting wraps around so -1L>>>64 == -1 + * for that reason, make sure not to use endmask if the bits to flip will + * be zero in the last word (redefine endWord to be the last changed...) + long startmask = -1L << (startIndex & 0x3f); // example: 11111...111000 + long endmask = -1L >>> (64-(endIndex & 0x3f)); // example: 00111...111111 + ***/ + + long startmask = -1L << startIndex; + long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex due to wrap + + if (startWord == endWord) { + bits[startWord] ^= (startmask & endmask); + return; + } + + bits[startWord] ^= startmask; + + for (int i=startWord+1; i b.wlen) { + tot += BitUtil.pop_array(a.bits, b.wlen, a.wlen-b.wlen); + } + return tot; + } + + /** Returns the popcount or cardinality of "a and not b" + * or "intersection(a, not(b))". + * Neither set is modified. + */ + public static long andNotCount(OpenBitSet a, OpenBitSet b) { + long tot = BitUtil.pop_andnot(a.bits, b.bits, 0, Math.min(a.wlen, b.wlen)); + if (a.wlen > b.wlen) { + tot += BitUtil.pop_array(a.bits, b.wlen, a.wlen-b.wlen); + } + return tot; + } + + /** Returns the popcount or cardinality of the exclusive-or of the two sets. + * Neither set is modified. + */ + public static long xorCount(OpenBitSet a, OpenBitSet b) { + long tot = BitUtil.pop_xor(a.bits, b.bits, 0, Math.min(a.wlen, b.wlen)); + if (a.wlen < b.wlen) { + tot += BitUtil.pop_array(b.bits, a.wlen, b.wlen-a.wlen); + } else if (a.wlen > b.wlen) { + tot += BitUtil.pop_array(a.bits, b.wlen, a.wlen-b.wlen); + } + return tot; + } + + + /** Returns the index of the first set bit starting at the index specified. + * -1 is returned if there are no more set bits. + */ + public int nextSetBit(int index) { + int i = index>>6; + if (i>=wlen) return -1; + int subIndex = index & 0x3f; // index within the word + long word = bits[i] >> subIndex; // skip all the bits to the right of index + + if (word!=0) { + return (i<<6) + subIndex + BitUtil.ntz(word); + } + + while(++i < wlen) { + word = bits[i]; + if (word!=0) return (i<<6) + BitUtil.ntz(word); + } + + return -1; + } + + /** Returns the index of the first set bit starting at the index specified. + * -1 is returned if there are no more set bits. + */ + public long nextSetBit(long index) { + int i = (int)(index>>>6); + if (i>=wlen) return -1; + int subIndex = (int)index & 0x3f; // index within the word + long word = bits[i] >>> subIndex; // skip all the bits to the right of index + + if (word!=0) { + return (((long)i)<<6) + (subIndex + BitUtil.ntz(word)); + } + + while(++i < wlen) { + word = bits[i]; + if (word!=0) return (((long)i)<<6) + BitUtil.ntz(word); + } + + return -1; + } + + + + + public Object clone() { + try { + OpenBitSet obs = (OpenBitSet)super.clone(); + obs.bits = (long[]) obs.bits.clone(); // hopefully an array clone is as fast(er) than arraycopy + return obs; + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + } + + /** this = this AND other */ + public void intersect(OpenBitSet other) { + int newLen= Math.min(this.wlen,other.wlen); + long[] thisArr = this.bits; + long[] otherArr = other.bits; + // testing against zero can be more efficient + int pos=newLen; + while(--pos>=0) { + thisArr[pos] &= otherArr[pos]; + } + if (this.wlen > newLen) { + // fill zeros from the new shorter length to the old length + Arrays.fill(bits,newLen,this.wlen,0); + } + this.wlen = newLen; + } + + /** this = this OR other */ + public void union(OpenBitSet other) { + int newLen = Math.max(wlen,other.wlen); + ensureCapacityWords(newLen); + + long[] thisArr = this.bits; + long[] otherArr = other.bits; + int pos=Math.min(wlen,other.wlen); + while(--pos>=0) { + thisArr[pos] |= otherArr[pos]; + } + if (this.wlen < newLen) { + System.arraycopy(otherArr, this.wlen, thisArr, this.wlen, newLen-this.wlen); + } + this.wlen = newLen; + } + + + /** Remove all elements set in other. this = this AND_NOT other */ + public void remove(OpenBitSet other) { + int idx = Math.min(wlen,other.wlen); + long[] thisArr = this.bits; + long[] otherArr = other.bits; + while(--idx>=0) { + thisArr[idx] &= ~otherArr[idx]; + } + } + + /** this = this XOR other */ + public void xor(OpenBitSet other) { + int newLen = Math.max(wlen,other.wlen); + ensureCapacityWords(newLen); + + long[] thisArr = this.bits; + long[] otherArr = other.bits; + int pos=Math.min(wlen,other.wlen); + while(--pos>=0) { + thisArr[pos] ^= otherArr[pos]; + } + if (this.wlen < newLen) { + System.arraycopy(otherArr, this.wlen, thisArr, this.wlen, newLen-this.wlen); + } + this.wlen = newLen; + } + + + // some BitSet compatability methods + + //** see {@link intersect} */ + public void and(OpenBitSet other) { + intersect(other); + } + + //** see {@link union} */ + public void or(OpenBitSet other) { + union(other); + } + + //** see {@link andNot} */ + public void andNot(OpenBitSet other) { + remove(other); + } + + /** returns true if the sets have any elements in common */ + public boolean intersects(OpenBitSet other) { + int pos = Math.min(this.wlen, other.wlen); + long[] thisArr = this.bits; + long[] otherArr = other.bits; + while (--pos>=0) { + if ((thisArr[pos] & otherArr[pos])!=0) return true; + } + return false; + } + + + + /** Expand the long[] with the size given as a number of words (64 bit longs). + * getNumWords() is unchanged by this call. + */ + public void ensureCapacityWords(int numWords) { + if (bits.length < numWords) { + long[] newBits = new long[numWords]; + System.arraycopy(bits,0,newBits,0,wlen); + bits = newBits; + } + } + + /** Ensure that the long[] is big enough to hold numBits, expanding it if necessary. + * getNumWords() is unchanged by this call. + */ + public void ensureCapacity(long numBits) { + ensureCapacityWords(bits2words(numBits)); + } + + /** Lowers numWords, the number of words in use, + * by checking for trailing zero words. + */ + public void trimTrailingZeros() { + int idx = wlen-1; + while (idx>=0 && bits[idx]==0) idx--; + wlen = idx+1; + } + + /** returns the number of 64 bit words it would take to hold numBits */ + public static int bits2words(long numBits) { + return (int)(((numBits-1)>>>6)+1); + } + + + /** returns true if both sets have the same bits set */ + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof OpenBitSet)) return false; + OpenBitSet a; + OpenBitSet b = (OpenBitSet)o; + // make a the larger set. + if (b.wlen > this.wlen) { + a = b; b=this; + } else { + a=this; + } + + // check for any set bits out of the range of b + for (int i=a.wlen-1; i>=b.wlen; i--) { + if (a.bits[i]!=0) return false; + } + + for (int i=b.wlen-1; i>=0; i--) { + if (a.bits[i] != b.bits[i]) return false; + } + + return true; + } + + + public int hashCode() { + long h = 0x98761234; // something non-zero for length==0 + for (int i = bits.length; --i>=0;) { + h ^= bits[i]; + h = (h << 1) | (h >>> 31); // rotate left + } + return (int)((h>>32) ^ h); // fold leftmost bits into right + } + +} + + Propchange: lucene/java/trunk/src/java/org/apache/lucene/util/OpenBitSet.java ------------------------------------------------------------------------------ svn:eol-style = native Added: lucene/java/trunk/src/java/org/apache/lucene/util/OpenBitSetIterator.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/util/OpenBitSetIterator.java?rev=617859&view=auto ============================================================================== --- lucene/java/trunk/src/java/org/apache/lucene/util/OpenBitSetIterator.java (added) +++ lucene/java/trunk/src/java/org/apache/lucene/util/OpenBitSetIterator.java Sat Feb 2 11:04:03 2008 @@ -0,0 +1,173 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.util; + +import java.io.IOException; + +import org.apache.lucene.search.DocIdSetIterator; + +/** An iterator to iterate over set bits in an OpenBitSet. + * This is faster than nextSetBit() for iterating over the complete set of bits, + * especially when the density of the bits set is high. + * + * @version $Id$ + */ +public class OpenBitSetIterator extends DocIdSetIterator { + + // The General Idea: instead of having an array per byte that has + // the offsets of the next set bit, that array could be + // packed inside a 32 bit integer (8 4 bit numbers). That + // should be faster than accessing an array for each index, and + // the total array size is kept smaller (256*sizeof(int))=1K + protected final static int[] bitlist={ + 0x0,0x1,0x2,0x21,0x3,0x31,0x32,0x321,0x4,0x41,0x42,0x421,0x43,0x431,0x432,0x4321,0x5,0x51,0x52,0x521,0x53,0x531,0x532,0x5321,0x54,0x541,0x542,0x5421,0x543,0x5431,0x5432,0x54321,0x6,0x61,0x62,0x621,0x63,0x631,0x632,0x6321,0x64,0x641,0x642,0x6421,0x643,0x6431,0x6432,0x64321,0x65,0x651,0x652,0x6521,0x653,0x6531,0x6532,0x65321,0x654,0x6541,0x6542,0x65421,0x6543,0x65431,0x65432,0x654321,0x7,0x71,0x72,0x721,0x73,0x731,0x732,0x7321,0x74,0x741,0x742,0x7421,0x743,0x7431,0x7432,0x74321,0x75,0x751,0x752,0x7521,0x753,0x7531,0x7532,0x75321,0x754,0x7541,0x7542,0x75421,0x7543,0x75431,0x75432,0x754321,0x76,0x761,0x762,0x7621,0x763,0x7631,0x7632,0x76321,0x764,0x7641,0x7642,0x76421,0x7643,0x76431,0x76432,0x764321,0x765,0x7651,0x7652,0x76521,0x7653,0x76531,0x76532,0x765321,0x7654,0x76541,0x76542,0x765421,0x76543,0x765431,0x765432,0x7654321,0x8,0x81,0x82,0x821,0x83,0x831,0x832,0x8321,0x84,0x841,0x842,0x8421,0x843,0x8431,0x8432,0x84321,0x85,0x851,0x852,0x8521,0x853,0x8531,0x8532,0x85321,0x85 4,0x8541,0x8542,0x85421,0x8543,0x85431,0x85432,0x854321,0x86,0x861,0x862,0x8621,0x863,0x8631,0x8632,0x86321,0x864,0x8641,0x8642,0x86421,0x8643,0x86431,0x86432,0x864321,0x865,0x8651,0x8652,0x86521,0x8653,0x86531,0x86532,0x865321,0x8654,0x86541,0x86542,0x865421,0x86543,0x865431,0x865432,0x8654321,0x87,0x871,0x872,0x8721,0x873,0x8731,0x8732,0x87321,0x874,0x8741,0x8742,0x87421,0x8743,0x87431,0x87432,0x874321,0x875,0x8751,0x8752,0x87521,0x8753,0x87531,0x87532,0x875321,0x8754,0x87541,0x87542,0x875421,0x87543,0x875431,0x875432,0x8754321,0x876,0x8761,0x8762,0x87621,0x8763,0x87631,0x87632,0x876321,0x8764,0x87641,0x87642,0x876421,0x87643,0x876431,0x876432,0x8764321,0x8765,0x87651,0x87652,0x876521,0x87653,0x876531,0x876532,0x8765321,0x87654,0x876541,0x876542,0x8765421,0x876543,0x8765431,0x8765432,0x87654321 + }; + /***** the python code that generated bitlist + def bits2int(val): + arr=0 + for shift in range(8,0,-1): + if val & 0x80: + arr = (arr << 4) | shift + val = val << 1 + return arr + + def int_table(): + tbl = [ hex(bits2int(val)).strip('L') for val in range(256) ] + return ','.join(tbl) + ******/ + + // hmmm, what about an iterator that finds zeros though, + // or a reverse iterator... should they be separate classes + // for efficiency, or have a common root interface? (or + // maybe both? could ask for a SetBitsIterator, etc... + + + private final long[] arr; + private final int words; + private int i=-1; + private long word; + private int wordShift; + private int indexArray; + private int curDocId; + + public OpenBitSetIterator(OpenBitSet obs) { + this(obs.getBits(), obs.getNumWords()); + } + + public OpenBitSetIterator(long[] bits, int numWords) { + arr = bits; + words = numWords; + } + + // 64 bit shifts + private void shift() { + if ((int)word ==0) {wordShift +=32; word = word >>>32; } + if ((word & 0x0000FFFF) == 0) { wordShift +=16; word >>>=16; } + if ((word & 0x000000FF) == 0) { wordShift +=8; word >>>=8; } + indexArray = bitlist[(int)word & 0xff]; + } + + /***** alternate shift implementations + // 32 bit shifts, but a long shift needed at the end + private void shift2() { + int y = (int)word; + if (y==0) {wordShift +=32; y = (int)(word >>>32); } + if ((y & 0x0000FFFF) == 0) { wordShift +=16; y>>>=16; } + if ((y & 0x000000FF) == 0) { wordShift +=8; y>>>=8; } + indexArray = bitlist[y & 0xff]; + word >>>= (wordShift +1); + } + + private void shift3() { + int lower = (int)word; + int lowByte = lower & 0xff; + if (lowByte != 0) { + indexArray=bitlist[lowByte]; + return; + } + shift(); + } + ******/ + + public boolean next() { + if (indexArray==0) { + if (word!=0) { + word >>>= 8; + wordShift += 8; + } + + while (word==0) { + if (++i >= words) { + curDocId = -1; + return false; + } + word = arr[i]; + wordShift =-1; // loop invariant code motion should move this + } + + // after the first time, should I go with a linear search, or + // stick with the binary search in shift? + shift(); + } + + int bitIndex = (indexArray & 0x0f) + wordShift; + indexArray >>>= 4; + // should i<<6 be cached as a separate variable? + // it would only save one cycle in the best circumstances. + curDocId = (i<<6) + bitIndex; + return true; + } + + public boolean skipTo(int target) { + indexArray=0; + i = target >> 6; + if (i>=words) { + word =0; // setup so next() will also return -1 + curDocId = -1; + return false; + } + wordShift = target & 0x3f; + word = arr[i] >>> wordShift; + if (word !=0) { + wordShift--; // compensate for 1 based arrIndex + } else { + while (word ==0) { + if (++i >= words) { + curDocId = -1; + return false; + } + word = arr[i]; + } + wordShift =-1; + } + + shift(); + + int bitIndex = (indexArray & 0x0f) + wordShift; + indexArray >>>= 4; + // should i<<6 be cached as a separate variable? + // it would only save one cycle in the best circumstances. + curDocId = (i<<6) + bitIndex; + return true; + } + + public int doc() { + return this.curDocId; + } + +} Propchange: lucene/java/trunk/src/java/org/apache/lucene/util/OpenBitSetIterator.java ------------------------------------------------------------------------------ svn:eol-style = native Added: lucene/java/trunk/src/java/org/apache/lucene/util/SortedVIntList.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/util/SortedVIntList.java?rev=617859&view=auto ============================================================================== --- lucene/java/trunk/src/java/org/apache/lucene/util/SortedVIntList.java (added) +++ lucene/java/trunk/src/java/org/apache/lucene/util/SortedVIntList.java Sat Feb 2 11:04:03 2008 @@ -0,0 +1,218 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.BitSet; + +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.DocIdSetIterator; + +/** + * Store and iterate sorted integers in compressed form in RAM. + *
The code for compressing the differences between ascending integers was + * borrowed from {@link org.apache.lucene.store.IndexInput} and + * {@link org.apache.lucene.store.IndexOutput}. + */ +public class SortedVIntList extends DocIdSet { + /** When a BitSet has fewer than 1 in BITS2VINTLIST_SIZE bits set, + * a SortedVIntList representing the index numbers of the set bits + * will be smaller than that BitSet. + */ + final static int BITS2VINTLIST_SIZE = 8; + + private int size; + private byte[] bytes; + private int lastBytePos; + + /** + * Create a SortedVIntList from all elements of an array of integers. + * + * @param sortedInts A sorted array of non negative integers. + */ + public SortedVIntList(int[] sortedInts) { + this(sortedInts, sortedInts.length); + } + + /** + * Create a SortedVIntList from an array of integers. + * @param sortedInts An array of sorted non negative integers. + * @param inputSize The number of integers to be used from the array. + */ + public SortedVIntList(int[] sortedInts, int inputSize) { + SortedVIntListBuilder builder = new SortedVIntListBuilder(); + for (int i = 0; i < inputSize; i++) { + builder.addInt(sortedInts[i]); + } + builder.done(); + } + + /** + * Create a SortedVIntList from a BitSet. + * @param bits A bit set representing a set of integers. + */ + public SortedVIntList(BitSet bits) { + SortedVIntListBuilder builder = new SortedVIntListBuilder(); + int nextInt = bits.nextSetBit(0); + while (nextInt != -1) { + builder.addInt(nextInt); + nextInt = bits.nextSetBit(nextInt + 1); + } + builder.done(); + } + + /** + * Create a SortedVIntList from an OpenBitSet. + * @param bits A bit set representing a set of integers. + */ + public SortedVIntList(OpenBitSet bits) { + SortedVIntListBuilder builder = new SortedVIntListBuilder(); + int nextInt = bits.nextSetBit(0); + while (nextInt != -1) { + builder.addInt(nextInt); + nextInt = bits.nextSetBit(nextInt + 1); + } + builder.done(); + } + + /** + * Create a SortedVIntList. + * @param docIdSetIterator An iterator providing document numbers as a set of integers. + * This DocIdSetIterator is iterated completely when this constructor + * is called and it must provide the integers in non + * decreasing order. + */ + public SortedVIntList(DocIdSetIterator docIdSetIterator) throws IOException { + SortedVIntListBuilder builder = new SortedVIntListBuilder(); + while (docIdSetIterator.next()) { + builder.addInt(docIdSetIterator.doc()); + } + builder.done(); + } + + + private class SortedVIntListBuilder { + private int lastInt = 0; + + SortedVIntListBuilder() { + initBytes(); + lastInt = 0; + } + + void addInt(int nextInt) { + int diff = nextInt - lastInt; + if (diff < 0) { + throw new IllegalArgumentException( + "Input not sorted or first element negative."); + } + + if ((lastBytePos + MAX_BYTES_PER_INT) > bytes.length) { + // biggest possible int does not fit + resizeBytes((bytes.length * 2) + MAX_BYTES_PER_INT); + } + + // See org.apache.lucene.store.IndexOutput.writeVInt() + while ((diff & ~VB1) != 0) { // The high bit of the next byte needs to be set. + bytes[lastBytePos++] = (byte) ((diff & VB1) | ~VB1); + diff >>>= BIT_SHIFT; + } + bytes[lastBytePos++] = (byte) diff; // Last byte, high bit not set. + size++; + lastInt = nextInt; + } + + void done() { + resizeBytes(lastBytePos); + } + } + + + private void initBytes() { + size = 0; + bytes = new byte[128]; // initial byte size + lastBytePos = 0; + } + + private void resizeBytes(int newSize) { + if (newSize != bytes.length) { + byte[] newBytes = new byte[newSize]; + System.arraycopy(bytes, 0, newBytes, 0, lastBytePos); + bytes = newBytes; + } + } + + private static final int VB1 = 0x7F; + private static final int BIT_SHIFT = 7; + private final int MAX_BYTES_PER_INT = (31 / BIT_SHIFT) + 1; + + /** + * @return The total number of sorted integers. + */ + public int size() { + return size; + } + + /** + * @return The size of the byte array storing the compressed sorted integers. + */ + public int getByteSize() { + return bytes.length; + } + + /** + * @return An iterator over the sorted integers. + */ + public DocIdSetIterator iterator() { + return new DocIdSetIterator() { + int bytePos = 0; + int lastInt = 0; + + private void advance() { + // See org.apache.lucene.store.IndexInput.readVInt() + byte b = bytes[bytePos++]; + lastInt += b & VB1; + for (int s = BIT_SHIFT; (b & ~VB1) != 0; s += BIT_SHIFT) { + b = bytes[bytePos++]; + lastInt += (b & VB1) << s; + } + } + + public int doc() {return lastInt;} + + public boolean next() { + if (bytePos >= lastBytePos) { + return false; + } else { + advance(); + return true; + } + } + + public boolean skipTo(int docNr) { + while (bytePos < lastBytePos) { + advance(); + if (lastInt >= docNr) { // No skipping to docNr available. + return true; + } + } + return false; + } + }; + } +} + Propchange: lucene/java/trunk/src/java/org/apache/lucene/util/SortedVIntList.java ------------------------------------------------------------------------------ svn:eol-style = native Modified: lucene/java/trunk/src/test/org/apache/lucene/search/CachingWrapperFilterHelper.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/CachingWrapperFilterHelper.java?rev=617859&r1=617858&r2=617859&view=diff ============================================================================== --- lucene/java/trunk/src/test/org/apache/lucene/search/CachingWrapperFilterHelper.java (original) +++ lucene/java/trunk/src/test/org/apache/lucene/search/CachingWrapperFilterHelper.java Sat Feb 2 11:04:03 2008 @@ -43,13 +43,13 @@ this.shouldHaveCache = shouldHaveCache; } - public BitSet bits(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(IndexReader reader) throws IOException { if (cache == null) { cache = new WeakHashMap(); } synchronized (cache) { // check cache - BitSet cached = (BitSet) cache.get(reader); + DocIdSet cached = (DocIdSet) cache.get(reader); if (shouldHaveCache) { TestCase.assertNotNull("Cache should have data ", cached); } else { @@ -60,7 +60,7 @@ } } - final BitSet bits = filter.bits(reader); + final DocIdSet bits = filter.getDocIdSet(reader); synchronized (cache) { // update cache cache.put(reader, bits); Modified: lucene/java/trunk/src/test/org/apache/lucene/search/MockFilter.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/MockFilter.java?rev=617859&r1=617858&r2=617859&view=diff ============================================================================== --- lucene/java/trunk/src/test/org/apache/lucene/search/MockFilter.java (original) +++ lucene/java/trunk/src/test/org/apache/lucene/search/MockFilter.java Sat Feb 2 11:04:03 2008 @@ -18,14 +18,15 @@ */ import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.DocIdBitSet; import java.util.BitSet; public class MockFilter extends Filter { private boolean wasCalled; - public BitSet bits(IndexReader reader) { + public DocIdSet getDocIdSet(IndexReader reader) { wasCalled = true; - return new BitSet(); + return new DocIdBitSet(new BitSet()); } public void clear() { Modified: lucene/java/trunk/src/test/org/apache/lucene/search/RemoteCachingWrapperFilterHelper.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/RemoteCachingWrapperFilterHelper.java?rev=617859&r1=617858&r2=617859&view=diff ============================================================================== --- lucene/java/trunk/src/test/org/apache/lucene/search/RemoteCachingWrapperFilterHelper.java (original) +++ lucene/java/trunk/src/test/org/apache/lucene/search/RemoteCachingWrapperFilterHelper.java Sat Feb 2 11:04:03 2008 @@ -42,7 +42,7 @@ this.shouldHaveCache = shouldHaveCache; } - public BitSet bits(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(IndexReader reader) throws IOException { Filter cachedFilter = FilterManager.getInstance().getFilter(filter); TestCase.assertNotNull("Filter should not be null", cachedFilter); @@ -55,6 +55,6 @@ if (filter instanceof CachingWrapperFilterHelper) { ((CachingWrapperFilterHelper)cachedFilter).setShouldHaveCache(shouldHaveCache); } - return cachedFilter.bits(reader); + return cachedFilter.getDocIdSet(reader); } } Modified: lucene/java/trunk/src/test/org/apache/lucene/search/SingleDocTestFilter.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/SingleDocTestFilter.java?rev=617859&r1=617858&r2=617859&view=diff ============================================================================== --- lucene/java/trunk/src/test/org/apache/lucene/search/SingleDocTestFilter.java (original) +++ lucene/java/trunk/src/test/org/apache/lucene/search/SingleDocTestFilter.java Sat Feb 2 11:04:03 2008 @@ -18,6 +18,7 @@ */ import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.DocIdBitSet; import java.util.BitSet; import java.io.IOException; @@ -29,9 +30,9 @@ this.doc = doc; } - public BitSet bits(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(IndexReader reader) throws IOException { BitSet bits = new BitSet(reader.maxDoc()); bits.set(doc); - return bits; + return new DocIdBitSet(bits); } } Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java?rev=617859&r1=617858&r2=617859&view=diff ============================================================================== --- lucene/java/trunk/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java (original) +++ lucene/java/trunk/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java Sat Feb 2 11:04:03 2008 @@ -36,12 +36,12 @@ CachingWrapperFilter cacher = new CachingWrapperFilter(filter); // first time, nested filter is called - cacher.bits(reader); + cacher.getDocIdSet(reader); assertTrue("first time", filter.wasCalled()); // second time, nested filter should not be called filter.clear(); - cacher.bits(reader); + cacher.getDocIdSet(reader); assertFalse("second time", filter.wasCalled()); reader.close(); Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestExplanations.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestExplanations.java?rev=617859&r1=617858&r2=617859&view=diff ============================================================================== --- lucene/java/trunk/src/test/org/apache/lucene/search/TestExplanations.java (original) +++ lucene/java/trunk/src/test/org/apache/lucene/search/TestExplanations.java Sat Feb 2 11:04:03 2008 @@ -33,6 +33,7 @@ import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.DocIdBitSet; import java.util.Random; import java.util.BitSet; @@ -122,12 +123,12 @@ public ItemizedFilter(int[] docs) { this.docs = docs; } - public BitSet bits(IndexReader r) { + public DocIdSet getDocIdSet(IndexReader r) { BitSet b = new BitSet(r.maxDoc()); for (int i = 0; i < docs.length; i++) { b.set(docs[i]); } - return b; + return new DocIdBitSet(b); } } Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestFilteredQuery.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestFilteredQuery.java?rev=617859&r1=617858&r2=617859&view=diff ============================================================================== --- lucene/java/trunk/src/test/org/apache/lucene/search/TestFilteredQuery.java (original) +++ lucene/java/trunk/src/test/org/apache/lucene/search/TestFilteredQuery.java Sat Feb 2 11:04:03 2008 @@ -26,6 +26,7 @@ import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.DocIdBitSet; import java.util.BitSet; @@ -82,11 +83,11 @@ // must be static for serialization tests private static Filter newStaticFilterB() { return new Filter() { - public BitSet bits (IndexReader reader) { + public DocIdSet getDocIdSet (IndexReader reader) { BitSet bitset = new BitSet(5); bitset.set (1); bitset.set (3); - return bitset; + return new DocIdBitSet(bitset); } }; } @@ -150,10 +151,10 @@ // must be static for serialization tests private static Filter newStaticFilterA() { return new Filter() { - public BitSet bits (IndexReader reader) { + public DocIdSet getDocIdSet (IndexReader reader) { BitSet bitset = new BitSet(5); bitset.set(0, 5); - return bitset; + return new DocIdBitSet(bitset); } }; } @@ -198,5 +199,6 @@ QueryUtils.check(query,searcher); } } + Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestRemoteCachingWrapperFilter.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestRemoteCachingWrapperFilter.java?rev=617859&r1=617858&r2=617859&view=diff ============================================================================== --- lucene/java/trunk/src/test/org/apache/lucene/search/TestRemoteCachingWrapperFilter.java (original) +++ lucene/java/trunk/src/test/org/apache/lucene/search/TestRemoteCachingWrapperFilter.java Sat Feb 2 11:04:03 2008 @@ -91,7 +91,7 @@ public void testTermRemoteFilter() throws Exception { - CachingWrapperFilterHelper cwfh = new CachingWrapperFilterHelper(new QueryFilter(new TermQuery(new Term("type", "a")))); + CachingWrapperFilterHelper cwfh = new CachingWrapperFilterHelper(new QueryWrapperFilter(new TermQuery(new Term("type", "a")))); // This is what we are fixing - if one uses a CachingWrapperFilter(Helper) it will never // cache the filter on the remote site @@ -112,16 +112,16 @@ // assert that we get the same cached Filter, even if we create a new instance of RemoteCachingWrapperFilter(Helper) // this should pass because the Filter parameters are the same, and the cache uses Filter's hashCode() as cache keys, // and Filters' hashCode() builds on Filter parameters, not the Filter instance itself - rcwfh = new RemoteCachingWrapperFilterHelper(new QueryFilter(new TermQuery(new Term("type", "a"))), false); + rcwfh = new RemoteCachingWrapperFilterHelper(new QueryWrapperFilter(new TermQuery(new Term("type", "a"))), false); rcwfh.shouldHaveCache(false); search(new TermQuery(new Term("test", "test")), rcwfh, 0, "A"); - rcwfh = new RemoteCachingWrapperFilterHelper(new QueryFilter(new TermQuery(new Term("type", "a"))), false); + rcwfh = new RemoteCachingWrapperFilterHelper(new QueryWrapperFilter(new TermQuery(new Term("type", "a"))), false); rcwfh.shouldHaveCache(true); search(new TermQuery(new Term("test", "test")), rcwfh, 0, "A"); // assert that we get a non-cached version of the Filter because this is a new Query (type:b) - rcwfh = new RemoteCachingWrapperFilterHelper(new QueryFilter(new TermQuery(new Term("type", "b"))), false); + rcwfh = new RemoteCachingWrapperFilterHelper(new QueryWrapperFilter(new TermQuery(new Term("type", "b"))), false); rcwfh.shouldHaveCache(false); search(new TermQuery(new Term("type", "b")), rcwfh, 0, "B"); } Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestRemoteSearchable.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestRemoteSearchable.java?rev=617859&r1=617858&r2=617859&view=diff ============================================================================== --- lucene/java/trunk/src/test/org/apache/lucene/search/TestRemoteSearchable.java (original) +++ lucene/java/trunk/src/test/org/apache/lucene/search/TestRemoteSearchable.java Sat Feb 2 11:04:03 2008 @@ -116,11 +116,11 @@ Searcher searcher = new MultiSearcher(searchables); Hits hits = searcher.search( new TermQuery(new Term("test", "text")), - new QueryFilter(new TermQuery(new Term("test", "test")))); + new QueryWrapperFilter(new TermQuery(new Term("test", "test")))); assertEquals(1, hits.length()); Hits nohits = searcher.search( new TermQuery(new Term("test", "text")), - new QueryFilter(new TermQuery(new Term("test", "non-existent-term")))); + new QueryWrapperFilter(new TermQuery(new Term("test", "non-existent-term")))); assertEquals(0, nohits.length()); } @@ -129,7 +129,7 @@ Searchable[] searchables = { getRemote() }; Searcher searcher = new MultiSearcher(searchables); Hits hits = searcher.search( - new ConstantScoreQuery(new QueryFilter( + new ConstantScoreQuery(new QueryWrapperFilter( new TermQuery(new Term("test", "test"))))); assertEquals(1, hits.length()); } Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestScorerPerf.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestScorerPerf.java?rev=617859&r1=617858&r2=617859&view=diff ============================================================================== --- lucene/java/trunk/src/test/org/apache/lucene/search/TestScorerPerf.java (original) +++ lucene/java/trunk/src/test/org/apache/lucene/search/TestScorerPerf.java Sat Feb 2 11:04:03 2008 @@ -1,5 +1,6 @@ package org.apache.lucene.search; +import org.apache.lucene.util.DocIdBitSet; import org.apache.lucene.util.LuceneTestCase; import java.util.Random; @@ -95,16 +96,6 @@ return sets; } - public static class BitSetFilter extends Filter { - public BitSet set; - public BitSetFilter(BitSet set) { - this.set = set; - } - public BitSet bits(IndexReader reader) throws IOException { - return set; - } - } - public static class CountingHitCollector extends HitCollector { int count=0; int sum=0; @@ -137,8 +128,12 @@ BitSet addClause(BooleanQuery bq, BitSet result) { - BitSet rnd = sets[r.nextInt(sets.length)]; - Query q = new ConstantScoreQuery(new BitSetFilter(rnd)); + final BitSet rnd = sets[r.nextInt(sets.length)]; + Query q = new ConstantScoreQuery(new Filter() { + public DocIdSet getDocIdSet(IndexReader reader) { + return new DocIdBitSet(rnd); + }; + }); bq.add(q, BooleanClause.Occur.MUST); if (validate) { if (result==null) result = (BitSet)rnd.clone(); Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestSort.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestSort.java?rev=617859&r1=617858&r2=617859&view=diff ============================================================================== --- lucene/java/trunk/src/test/org/apache/lucene/search/TestSort.java (original) +++ lucene/java/trunk/src/test/org/apache/lucene/search/TestSort.java Sat Feb 2 11:04:03 2008 @@ -28,6 +28,7 @@ import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.DocIdBitSet; import java.io.IOException; import java.io.Serializable; @@ -571,10 +572,10 @@ // a filter that only allows through the first hit Filter filt = new Filter() { - public BitSet bits(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(IndexReader reader) throws IOException { BitSet bs = new BitSet(reader.maxDoc()); bs.set(docs1.scoreDocs[0].doc); - return bs; + return new DocIdBitSet(bs); } }; Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestSpanQueryFilter.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestSpanQueryFilter.java?rev=617859&r1=617858&r2=617859&view=diff ============================================================================== --- lucene/java/trunk/src/test/org/apache/lucene/search/TestSpanQueryFilter.java (original) +++ lucene/java/trunk/src/test/org/apache/lucene/search/TestSpanQueryFilter.java Sat Feb 2 11:04:03 2008 @@ -56,20 +56,36 @@ SpanTermQuery query = new SpanTermQuery(new Term("field", English.intToEnglish(10).trim())); SpanQueryFilter filter = new SpanQueryFilter(query); SpanFilterResult result = filter.bitSpans(reader); - BitSet bits = result.getBits(); - assertTrue("bits is null and it shouldn't be", bits != null); - assertTrue("tenth bit is not on", bits.get(10)); + DocIdSet docIdSet = result.getDocIdSet(); + assertTrue("docIdSet is null and it shouldn't be", docIdSet != null); + assertContainsDocId("docIdSet doesn't contain docId 10", docIdSet, 10); List spans = result.getPositions(); assertTrue("spans is null and it shouldn't be", spans != null); - assertTrue("spans Size: " + spans.size() + " is not: " + bits.cardinality(), spans.size() == bits.cardinality()); + int size = getDocIdSetSize(docIdSet); + assertTrue("spans Size: " + spans.size() + " is not: " + size, spans.size() == size); for (Iterator iterator = spans.iterator(); iterator.hasNext();) { SpanFilterResult.PositionInfo info = (SpanFilterResult.PositionInfo) iterator.next(); assertTrue("info is null and it shouldn't be", info != null); //The doc should indicate the bit is on - assertTrue("Bit is not on and it should be", bits.get(info.getDoc())); + assertContainsDocId("docIdSet doesn't contain docId " + info.getDoc(), docIdSet, info.getDoc()); //There should be two positions in each assertTrue("info.getPositions() Size: " + info.getPositions().size() + " is not: " + 2, info.getPositions().size() == 2); } reader.close(); + } + + int getDocIdSetSize(DocIdSet docIdSet) throws Exception { + int size = 0; + DocIdSetIterator it = docIdSet.iterator(); + while (it.next()) { + size++; + } + return size; + } + + public void assertContainsDocId(String msg, DocIdSet docIdSet, int docId) throws Exception { + DocIdSetIterator it = docIdSet.iterator(); + assertTrue(msg, it.skipTo(docId)); + assertTrue(msg, it.doc() == docId); } } Added: lucene/java/trunk/src/test/org/apache/lucene/util/TestOpenBitSet.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/util/TestOpenBitSet.java?rev=617859&view=auto ============================================================================== --- lucene/java/trunk/src/test/org/apache/lucene/util/TestOpenBitSet.java (added) +++ lucene/java/trunk/src/test/org/apache/lucene/util/TestOpenBitSet.java Sat Feb 2 11:04:03 2008 @@ -0,0 +1,203 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.util; + +import junit.framework.TestCase; + +import java.util.Random; +import java.util.BitSet; + +/** + * @version $Id$ + */ +public class TestOpenBitSet extends TestCase { + static Random rand = new Random(); + + void doGet(BitSet a, OpenBitSet b) { + int max = a.size(); + for (int i=0; i=0); + } + + // test interleaving different BitSetIterator.next() + void doIterate(BitSet a, OpenBitSet b) { + int aa=-1,bb=-1; + OpenBitSetIterator iterator = new OpenBitSetIterator(b); + do { + aa = a.nextSetBit(aa+1); + if (rand.nextBoolean()) + iterator.next(); + else + iterator.skipTo(bb+1); + bb = iterator.doc(); + assertEquals(aa,bb); + } while (aa>=0); + } + + + void doRandomSets(int maxSize, int iter) { + BitSet a0=null; + OpenBitSet b0=null; + + for (int i=0; i0) { + int nOper = rand.nextInt(sz); + for (int j=0; j>1)+1); + BitSet aa = (BitSet)a.clone(); aa.flip(fromIndex,toIndex); + OpenBitSet bb = (OpenBitSet)b.clone(); bb.flip(fromIndex,toIndex); + + doIterate(aa,bb); // a problem here is from flip or doIterate + + fromIndex = rand.nextInt(sz+80); + toIndex = fromIndex + rand.nextInt((sz>>1)+1); + aa = (BitSet)a.clone(); aa.clear(fromIndex,toIndex); + bb = (OpenBitSet)b.clone(); bb.clear(fromIndex,toIndex); + + doNextSetBit(aa,bb); // a problem here is from clear() or nextSetBit + + fromIndex = rand.nextInt(sz+80); + toIndex = fromIndex + rand.nextInt((sz>>1)+1); + aa = (BitSet)a.clone(); aa.set(fromIndex,toIndex); + bb = (OpenBitSet)b.clone(); bb.set(fromIndex,toIndex); + + doNextSetBit(aa,bb); // a problem here is from set() or nextSetBit + + + if (a0 != null) { + assertEquals( a.equals(a0), b.equals(b0)); + + assertEquals(a.cardinality(), b.cardinality()); + + BitSet a_and = (BitSet)a.clone(); a_and.and(a0); + BitSet a_or = (BitSet)a.clone(); a_or.or(a0); + BitSet a_xor = (BitSet)a.clone(); a_xor.xor(a0); + BitSet a_andn = (BitSet)a.clone(); a_andn.andNot(a0); + + OpenBitSet b_and = (OpenBitSet)b.clone(); assertEquals(b,b_and); b_and.and(b0); + OpenBitSet b_or = (OpenBitSet)b.clone(); b_or.or(b0); + OpenBitSet b_xor = (OpenBitSet)b.clone(); b_xor.xor(b0); + OpenBitSet b_andn = (OpenBitSet)b.clone(); b_andn.andNot(b0); + + doIterate(a_and,b_and); + doIterate(a_or,b_or); + doIterate(a_xor,b_xor); + doIterate(a_andn,b_andn); + + assertEquals(a_and.cardinality(), b_and.cardinality()); + assertEquals(a_or.cardinality(), b_or.cardinality()); + assertEquals(a_xor.cardinality(), b_xor.cardinality()); + assertEquals(a_andn.cardinality(), b_andn.cardinality()); + + // test non-mutating popcounts + assertEquals(b_and.cardinality(), OpenBitSet.intersectionCount(b,b0)); + assertEquals(b_or.cardinality(), OpenBitSet.unionCount(b,b0)); + assertEquals(b_xor.cardinality(), OpenBitSet.xorCount(b,b0)); + assertEquals(b_andn.cardinality(), OpenBitSet.andNotCount(b,b0)); + } + + a0=a; + b0=b; + } + } + + // large enough to flush obvious bugs, small enough to run in <.5 sec as part of a + // larger testsuite. + public void testSmall() { + doRandomSets(1200,1000); + } + + public void testBig() { + // uncomment to run a bigger test (~2 minutes). + // doRandomSets(2000,200000); + } + + public void testEquals() { + OpenBitSet b1 = new OpenBitSet(1111); + OpenBitSet b2 = new OpenBitSet(2222); + assertTrue(b1.equals(b2)); + assertTrue(b2.equals(b1)); + b1.set(10); + assertFalse(b1.equals(b2)); + assertFalse(b2.equals(b1)); + b2.set(10); + assertTrue(b1.equals(b2)); + assertTrue(b2.equals(b1)); + b2.set(2221); + assertFalse(b1.equals(b2)); + assertFalse(b2.equals(b1)); + b1.set(2221); + assertTrue(b1.equals(b2)); + assertTrue(b2.equals(b1)); + + // try different type of object + assertFalse(b1.equals(new Object())); + } + +} + + + Propchange: lucene/java/trunk/src/test/org/apache/lucene/util/TestOpenBitSet.java ------------------------------------------------------------------------------ svn:eol-style = native Added: lucene/java/trunk/src/test/org/apache/lucene/util/TestSortedVIntList.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/util/TestSortedVIntList.java?rev=617859&view=auto ============================================================================== --- lucene/java/trunk/src/test/org/apache/lucene/util/TestSortedVIntList.java (added) +++ lucene/java/trunk/src/test/org/apache/lucene/util/TestSortedVIntList.java Sat Feb 2 11:04:03 2008 @@ -0,0 +1,198 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.BitSet; + +import junit.framework.TestCase; +import junit.framework.TestSuite; +import junit.textui.TestRunner; + +import org.apache.lucene.search.DocIdSetIterator; + +public class TestSortedVIntList extends TestCase { + /** Main for running test case by itself. */ + public static void main(String args[]) { + TestRunner.run(new TestSuite(TestSortedVIntList.class)); + } + + void tstIterator ( + SortedVIntList vintList, + int[] ints) throws IOException { + for (int i = 0; i < ints.length; i++) { + if ((i > 0) && (ints[i-1] == ints[i])) { + return; // DocNrSkipper should not skip to same document. + } + } + DocIdSetIterator m = vintList.iterator(); + for (int i = 0; i < ints.length; i++) { + assertTrue("No end of Matcher at: " + i, m.next()); + assertEquals(ints[i], m.doc()); + } + assertTrue("End of Matcher", (! m.next())); + } + + void tstVIntList( + SortedVIntList vintList, + int[] ints, + int expectedByteSize) throws IOException { + assertEquals("Size", ints.length, vintList.size()); + assertEquals("Byte size", expectedByteSize, vintList.getByteSize()); + tstIterator(vintList, ints); + } + + public void tstViaBitSet(int [] ints, int expectedByteSize) throws IOException { + final int MAX_INT_FOR_BITSET = 1024 * 1024; + BitSet bs = new BitSet(); + for (int i = 0; i < ints.length; i++) { + if (ints[i] > MAX_INT_FOR_BITSET) { + return; // BitSet takes too much memory + } + if ((i > 0) && (ints[i-1] == ints[i])) { + return; // BitSet cannot store duplicate. + } + bs.set(ints[i]); + } + SortedVIntList svil = new SortedVIntList(bs); + tstVIntList(svil, ints, expectedByteSize); + tstVIntList(new SortedVIntList(svil.iterator()), ints, expectedByteSize); + } + + private static final int VB1 = 0x7F; + private static final int BIT_SHIFT = 7; + private static final int VB2 = (VB1 << BIT_SHIFT) | VB1; + private static final int VB3 = (VB2 << BIT_SHIFT) | VB1; + private static final int VB4 = (VB3 << BIT_SHIFT) | VB1; + + private int vIntByteSize(int i) { + assert i >= 0; + if (i <= VB1) return 1; + if (i <= VB2) return 2; + if (i <= VB3) return 3; + if (i <= VB4) return 4; + return 5; + } + + private int vIntListByteSize(int [] ints) { + int byteSize = 0; + int last = 0; + for (int i = 0; i < ints.length; i++) { + byteSize += vIntByteSize(ints[i] - last); + last = ints[i]; + } + return byteSize; + } + + public void tstInts(int [] ints) { + int expectedByteSize = vIntListByteSize(ints); + try { + tstVIntList(new SortedVIntList(ints), ints, expectedByteSize); + tstViaBitSet(ints, expectedByteSize); + } catch (IOException ioe) { + throw new Error(ioe); + } + } + + public void tstIllegalArgExc(int [] ints) { + try { + new SortedVIntList(ints); + } + catch (IllegalArgumentException e) { + return; + } + fail("Expected IllegalArgumentException"); + } + + private int[] fibArray(int a, int b, int size) { + final int[] fib = new int[size]; + fib[0] = a; + fib[1] = b; + for (int i = 2; i < size; i++) { + fib[i] = fib[i-1] + fib[i-2]; + } + return fib; + } + + private int[] reverseDiffs(int []ints) { // reverse the order of the successive differences + final int[] res = new int[ints.length]; + for (int i = 0; i < ints.length; i++) { + res[i] = ints[ints.length - 1] + (ints[0] - ints[ints.length - 1 - i]); + } + return res; + } + + public void test01() { + tstInts(new int[] {}); + } + public void test02() { + tstInts(new int[] {0}); + } + public void test03() { + tstInts(new int[] {0,Integer.MAX_VALUE}); + } + public void test04a() { + tstInts(new int[] {0, VB2 - 1}); + } + public void test04b() { + tstInts(new int[] {0, VB2}); + } + public void test04c() { + tstInts(new int[] {0, VB2 + 1}); + } + public void test05() { + tstInts(fibArray(0,1,7)); // includes duplicate value 1 + } + public void test05b() { + tstInts(reverseDiffs(fibArray(0,1,7))); + } + public void test06() { + tstInts(fibArray(1,2,45)); // no duplicates, size 46 exceeds max int. + } + public void test06b() { + tstInts(reverseDiffs(fibArray(1,2,45))); + } + public void test07a() { + tstInts(new int[] {0, VB3}); + } + public void test07b() { + tstInts(new int[] {1, VB3 + 2}); + } + public void test07c() { + tstInts(new int[] {2, VB3 + 4}); + } + public void test08a() { + tstInts(new int[] {0, VB4 + 1}); + } + public void test08b() { + tstInts(new int[] {1, VB4 + 1}); + } + public void test08c() { + tstInts(new int[] {2, VB4 + 1}); + } + + public void test10() { + tstIllegalArgExc(new int[] {-1}); + } + public void test11() { + tstIllegalArgExc(new int[] {1,0}); + } + public void test12() { + tstIllegalArgExc(new int[] {0,1,1,2,3,5,8,0}); + } +} Propchange: lucene/java/trunk/src/test/org/apache/lucene/util/TestSortedVIntList.java ------------------------------------------------------------------------------ svn:eol-style = native