lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From yo...@apache.org
Subject svn commit: r712908 - in /lucene/java/trunk: CHANGES.txt src/java/org/apache/lucene/util/BitUtil.java src/java/org/apache/lucene/util/OpenBitSet.java src/java/org/apache/lucene/util/OpenBitSetDISI.java
Date Tue, 11 Nov 2008 01:54:49 GMT
Author: yonik
Date: Mon Nov 10 17:54:49 2008
New Revision: 712908

URL: http://svn.apache.org/viewvc?rev=712908&view=rev
Log:
LUCENE-1443: Performance improvement for OpenBitSetDISI.inPlaceAnd()

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/util/BitUtil.java
    lucene/java/trunk/src/java/org/apache/lucene/util/OpenBitSet.java
    lucene/java/trunk/src/java/org/apache/lucene/util/OpenBitSetDISI.java   (contents, props
changed)

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=712908&r1=712907&r2=712908&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Mon Nov 10 17:54:49 2008
@@ -70,6 +70,9 @@
     more efficient (single pass) by not creating & populating an
     intermediate OpenBitSet (Paul Elschot, Mike McCandless)
 
+ 2. LUCENE-1443: Performance improvement for OpenBitSetDISI.inPlaceAnd()
+    (Paul Elschot via yonik)
+
 
 Documentation
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/util/BitUtil.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/util/BitUtil.java?rev=712908&r1=712907&r2=712908&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/util/BitUtil.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/util/BitUtil.java Mon Nov 10 17:54:49 2008
@@ -688,7 +688,7 @@
   public static final byte[] ntzTable = {8,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,7,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0};
 
 
-  /** Returns number of trailing zeros in the 64 bit long value. */
+  /** Returns number of trailing zeros in a 64 bit long value. */
   public static int ntz(long val) {
     // A full binary search to determine the low byte was slower than
     // a linear search for nextSetBit().  This is most likely because
@@ -728,6 +728,23 @@
     }
   }
 
+  /** Returns number of trailing zeros in a 32 bit int value. */
+  public static int ntz(int val) {
+    // This implementation does a single binary search at the top level only.
+    // In addition, the case of a non-zero first byte is checked for first
+    // because it is the most common in dense bit arrays.
+
+    int lowByte = val & 0xff;
+    if (lowByte != 0) return ntzTable[lowByte];
+    lowByte = (val>>>8) & 0xff;
+    if (lowByte != 0) return ntzTable[lowByte] + 8;
+    lowByte = (val>>>16) & 0xff;
+    if (lowByte != 0) return ntzTable[lowByte] + 16;
+    // no need to mask off low byte for the last byte.
+    // no need to check for zero on the last byte either.
+    return ntzTable[val>>>24] + 24;
+  }
+
   /** returns 0 based index of first set bit
    * (only works for x!=0)
    * <br/> This is an alternate implementation of ntz()

Modified: lucene/java/trunk/src/java/org/apache/lucene/util/OpenBitSet.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/util/OpenBitSet.java?rev=712908&r1=712907&r2=712908&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/util/OpenBitSet.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/util/OpenBitSet.java Mon Nov 10 17:54:49
2008
@@ -332,6 +332,43 @@
    * @param startIndex lower index
    * @param endIndex one-past the last bit to clear
    */
+  public void clear(int startIndex, int endIndex) {
+    if (endIndex <= startIndex) return;
+
+    int startWord = (startIndex>>6);
+    if (startWord >= wlen) return;
+
+    // since endIndex is one past the end, this is index of the last
+    // word to be changed.
+    int endWord   = ((endIndex-1)>>6);
+
+    long startmask = -1L << startIndex;
+    long endmask = -1L >>> -endIndex;  // 64-(endIndex&0x3f) is the same as
-endIndex due to wrap
+
+    // invert masks since we are clearing
+    startmask = ~startmask;
+    endmask = ~endmask;
+
+    if (startWord == endWord) {
+      bits[startWord] &= (startmask | endmask);
+      return;
+    }
+
+    bits[startWord] &= startmask;
+
+    int middle = Math.min(wlen, endWord);
+    Arrays.fill(bits, startWord+1, middle, 0L);
+    if (endWord < wlen) {
+      bits[endWord] &= endmask;
+    }
+  }
+
+
+  /** Clears a range of bits.  Clearing past the end does not change the size of the set.
+   *
+   * @param startIndex lower index
+   * @param endIndex one-past the last bit to clear
+   */
   public void clear(long startIndex, long endIndex) {
     if (endIndex <= startIndex) return;
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/util/OpenBitSetDISI.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/util/OpenBitSetDISI.java?rev=712908&r1=712907&r2=712908&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/util/OpenBitSetDISI.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/util/OpenBitSetDISI.java Mon Nov 10 17:54:49
2008
@@ -1,101 +1,96 @@
-package org.apache.lucene.util;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import org.apache.lucene.search.DocIdSetIterator;
- 
-public class OpenBitSetDISI extends OpenBitSet {
-
-  /** Construct an OpenBitSetDISI with its bits set
-   * from the doc ids of the given DocIdSetIterator.
-   * Also give a maximum size one larger than the largest doc id for which a
-   * bit may ever be set on this OpenBitSetDISI.
-   */
-  public OpenBitSetDISI(DocIdSetIterator disi, int maxSize) throws IOException {
-    super(maxSize);
-    inPlaceOr(disi);
-  }
-
-  /** Construct an OpenBitSetDISI with no bits set, and a given maximum size
-   * one larger than the largest doc id for which a bit may ever be set
-   * on this OpenBitSetDISI.
-   */
-  public OpenBitSetDISI(int maxSize) {
-    super(maxSize);
-  }
-
-  /**
-   * Perform an inplace OR with the doc ids from a given DocIdSetIterator,
-   * setting the bit for each such doc id.
-   * These doc ids should be smaller than the maximum size passed to the
-   * constructor.
-   */   
-  public void inPlaceOr(DocIdSetIterator disi) throws IOException {
-    while (disi.next() && (disi.doc() < size())) {
-      fastSet(disi.doc());
-    }
-  }
-
-  /**
-   * Perform an inplace AND with the doc ids from a given DocIdSetIterator,
-   * leaving only the bits set for which the doc ids are in common.
-   * These doc ids should be smaller than the maximum size passed to the
-   * constructor.
-   */   
-  public void inPlaceAnd(DocIdSetIterator disi) throws IOException {
-    int index = nextSetBit(0);
-    int lastNotCleared = -1;
-    while ((index != -1) && disi.skipTo(index)) {
-      while ((index != -1) && (index < disi.doc())) {
-        fastClear(index);
-        index = nextSetBit(index + 1);
-      }
-      if (index == disi.doc()) {
-        lastNotCleared = index;
-        index++;
-      }
-      assert (index == -1) || (index > disi.doc());
-    }
-    clear(lastNotCleared+1, size());
-  }
-
-  /**
-   * Perform an inplace NOT with the doc ids from a given DocIdSetIterator,
-   * clearing all the bits for each such doc id.
-   * These doc ids should be smaller than the maximum size passed to the
-   * constructor.
-   */   
-  public void inPlaceNot(DocIdSetIterator disi) throws IOException {
-    while (disi.next() && (disi.doc() < size())) {
-      fastClear(disi.doc());
-    }
-  }
-
-  /**
-   * Perform an inplace XOR with the doc ids from a given DocIdSetIterator,
-   * flipping all the bits for each such doc id.
-   * These doc ids should be smaller than the maximum size passed to the
-   * constructor.
-   */   
-  public void inPlaceXor(DocIdSetIterator disi) throws IOException {
-    while (disi.next() && (disi.doc() < size())) {
-      fastFlip(disi.doc());
-    }
-  }
-}
+package org.apache.lucene.util;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import org.apache.lucene.search.DocIdSetIterator;
+ 
+public class OpenBitSetDISI extends OpenBitSet {
+
+  /** Construct an OpenBitSetDISI with its bits set
+   * from the doc ids of the given DocIdSetIterator.
+   * Also give a maximum size one larger than the largest doc id for which a
+   * bit may ever be set on this OpenBitSetDISI.
+   */
+  public OpenBitSetDISI(DocIdSetIterator disi, int maxSize) throws IOException {
+    super(maxSize);
+    inPlaceOr(disi);
+  }
+
+  /** Construct an OpenBitSetDISI with no bits set, and a given maximum size
+   * one larger than the largest doc id for which a bit may ever be set
+   * on this OpenBitSetDISI.
+   */
+  public OpenBitSetDISI(int maxSize) {
+    super(maxSize);
+  }
+
+  /**
+   * Perform an inplace OR with the doc ids from a given DocIdSetIterator,
+   * setting the bit for each such doc id.
+   * These doc ids should be smaller than the maximum size passed to the
+   * constructor.
+   */   
+  public void inPlaceOr(DocIdSetIterator disi) throws IOException {
+    while (disi.next() && (disi.doc() < size())) {
+      fastSet(disi.doc());
+    }
+  }
+
+  /**
+   * Perform an inplace AND with the doc ids from a given DocIdSetIterator,
+   * leaving only the bits set for which the doc ids are in common.
+   * These doc ids should be smaller than the maximum size passed to the
+   * constructor.
+   */   
+  public void inPlaceAnd(DocIdSetIterator disi) throws IOException {
+    int bitSetDoc = nextSetBit(0);
+    while ((bitSetDoc != -1) && disi.skipTo(bitSetDoc)) {
+      int disiDoc = disi.doc();
+      clear(bitSetDoc, disiDoc);
+      bitSetDoc = nextSetBit(disiDoc + 1);
+    }
+    if (bitSetDoc != -1) {
+      clear(bitSetDoc, size());
+    }
+  }
+
+  /**
+   * Perform an inplace NOT with the doc ids from a given DocIdSetIterator,
+   * clearing all the bits for each such doc id.
+   * These doc ids should be smaller than the maximum size passed to the
+   * constructor.
+   */   
+  public void inPlaceNot(DocIdSetIterator disi) throws IOException {
+    while (disi.next() && (disi.doc() < size())) {
+      fastClear(disi.doc());
+    }
+  }
+
+  /**
+   * Perform an inplace XOR with the doc ids from a given DocIdSetIterator,
+   * flipping all the bits for each such doc id.
+   * These doc ids should be smaller than the maximum size passed to the
+   * constructor.
+   */   
+  public void inPlaceXor(DocIdSetIterator disi) throws IOException {
+    while (disi.next() && (disi.doc() < size())) {
+      fastFlip(disi.doc());
+    }
+  }
+}

Propchange: lucene/java/trunk/src/java/org/apache/lucene/util/OpenBitSetDISI.java
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message