lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jpou...@apache.org
Subject svn commit: r1636913 - in /lucene/dev/trunk/lucene: ./ core/src/java/org/apache/lucene/util/ core/src/java/org/apache/lucene/util/packed/ core/src/test/org/apache/lucene/util/
Date Wed, 05 Nov 2014 16:32:45 GMT
Author: jpountz
Date: Wed Nov  5 16:32:44 2014
New Revision: 1636913

URL: http://svn.apache.org/r1636913
Log:
LUCENE-6040: Speedup broadword bit selection.

Added:
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestBitUtil.java
Removed:
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/BroadWord.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestBroadWord.java
Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/BitUtil.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDecoder.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1636913&r1=1636912&r2=1636913&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Wed Nov  5 16:32:44 2014
@@ -264,6 +264,9 @@ Optimizations
 * LUCENE-6030: Add norms patched compression for a small number of common values
   (Ryan Ernst)
 
+* LUCENE-6040: Speed up EliasFanoDocIdSet through broadword bit selection.
+  (Paul Elschot)
+
 Build
 
 * LUCENE-5909: Smoke tester now has better command line parsing and

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/BitUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/BitUtil.java?rev=1636913&r1=1636912&r2=1636913&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/BitUtil.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/BitUtil.java Wed Nov  5 16:32:44
2014
@@ -212,4 +212,64 @@ public final class BitUtil {
      return ((l >>> 1) ^ -(l & 1));
    }
 
+
+  /** Select a 1-bit from a long. See also LUCENE-6040.
+   * @return The index of the r-th 1 bit in x. This bit must exist.
+   */
+  public static int select(long x, int r) {
+    long s = x - ((x & 0xAAAAAAAAAAAAAAAAL) >>> 1); // pairwise bitsums
+    s = (s & 0x3333333333333333L) + ((s >>> 2) & 0x3333333333333333L); //
nibblewise bitsums
+    s = ((s + (s >>> 4)) & 0x0F0F0F0F0F0F0F0FL) * L8_L; // bytewise bitsums,
cumulative
+
+    int b = (Long.numberOfTrailingZeros((s + psOverflow[r-1]) & (L8_L << 7)) >>
3) << 3; // bit position of byte with r-th 1 bit.
+    long l = r - (((s << 8) >>> b) & 0xFFL); // bit rank in byte at b
+
+    // Select bit l from byte (x >>> b):
+    int selectIndex = (int) (((x >>> b) & 0xFFL) | ((l-1) << 8));
+    int res = b + select256[selectIndex];
+    return res;
+  }
+
+  private final static long L8_L = 0x0101010101010101L;
+
+  private static final long[] psOverflow = new long[64];
+  static {
+    for (int s = 1; s <= 64; s++) {
+      psOverflow[s-1] = (128-s) * L8_L;
+    }
+  }
+
+  private static final byte[] select256 = new byte[8 * 256];
+  static {
+    for (int b = 0; b <= 0xFF; b++) {
+      for (int s = 1; s <= 8; s++) {
+        int byteIndex = b | ((s-1) << 8);
+        int bitIndex = selectNaive(b, s);
+        if (bitIndex < 0) {
+          bitIndex = 127; // positive as byte
+        }
+        assert bitIndex >= 0;
+        assert ((byte) bitIndex) >= 0; // non negative as byte, no need to mask the sign
+        select256[byteIndex] = (byte) bitIndex;
+      }
+    }
+  }
+
+  /**
+   * Naive implementation of {@link #select(long,int)}, using {@link Long#numberOfTrailingZeros}
repetitively.
+   * Works relatively fast for low ranks.
+   * @return The index of the r-th 1 bit in x, or -1 if no such bit exists.
+   */
+  public static int selectNaive(long x, int r) {
+    assert r >= 1;
+    int s = -1;
+    while ((x != 0L) && (r > 0)) {
+      int ntz = Long.numberOfTrailingZeros(x);
+      x >>>= (ntz + 1);
+      s += (ntz + 1);
+      r -= 1;
+    }
+    int res = (r > 0) ? -1 : s;
+    return res;
+  }
 }

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDecoder.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDecoder.java?rev=1636913&r1=1636912&r2=1636913&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDecoder.java
(original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDecoder.java
Wed Nov  5 16:32:44 2014
@@ -17,7 +17,7 @@
 
 package org.apache.lucene.util.packed;
 
-import org.apache.lucene.util.BroadWord; // bit selection in long
+import org.apache.lucene.util.BitUtil; // bit selection in long
 
 
 /** A decoder for an {@link EliasFanoEncoder}.
@@ -312,9 +312,10 @@ public class EliasFanoDecoder {
     if (rank >= 1) {
       long invCurHighLong = ~curHighLong;
       int clearBitForValue = (rank <= 8)
-                              ? BroadWord.selectNaive(invCurHighLong, rank)
-                              : BroadWord.select(invCurHighLong, rank);
-      assert clearBitForValue <= (Long.SIZE-1);
+                              ? BitUtil.selectNaive(invCurHighLong, rank)
+                              : BitUtil.select(invCurHighLong, rank);
+      assert clearBitForValue >= 0;
+      assert clearBitForValue <= Long.SIZE-1;
       setBitForIndex += clearBitForValue + 1; // the high bit just before setBitForIndex
is zero
       int oneBitsBeforeClearBit = clearBitForValue - rank + 1;
       efIndex += oneBitsBeforeClearBit; // the high bit at setBitForIndex and belongs to
the unary code for efIndex

Added: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestBitUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestBitUtil.java?rev=1636913&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestBitUtil.java (added)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestBitUtil.java Wed Nov
 5 16:32:44 2014
@@ -0,0 +1,87 @@
+package org.apache.lucene.util;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+public class TestBitUtil extends LuceneTestCase {
+
+  private void tstSelect(long x, int r, int exp) {
+    if ((0 <= exp) && (exp <= 63)) {
+      assertEquals("selectNaive(" + x + "," + r + ")", exp, BitUtil.selectNaive(x, r));
+      assertEquals("select(" + x + "," + r + ")", exp, BitUtil.select(x, r));
+    } else {
+      int act = BitUtil.selectNaive(x, r);
+      assertTrue("selectNaive(" + x + "," + r + ")", act < 0 || act > 63);
+      act = BitUtil.select(x, r);
+      assertTrue("select(" + x + "," + r + ")", act < 0 || act > 63);
+    }
+  }
+
+  public void testSelectFromZero() {
+    tstSelect(0L,1,72);
+  }
+  public void testSelectSingleBit() {
+    for (int i = 0; i < 64; i++) {
+      tstSelect((1L << i),1,i);
+    }
+  }
+  public void testSelectTwoBits() {
+    for (int i = 0; i < 64; i++) {
+      for (int j = i+1; j < 64; j++) {
+        long x = (1L << i) | (1L << j);
+        //System.out.println(getName() + " i: " + i + " j: " + j);
+        tstSelect(x,1,i);
+        tstSelect(x,2,j);
+        tstSelect(x,3,72);
+      }
+    }
+  }
+  public void testSelectThreeBits() {
+    for (int i = 0; i < 64; i++) {
+      for (int j = i+1; j < 64; j++) {
+        for (int k = j+1; k < 64; k++) {
+          long x = (1L << i) | (1L << j) | (1L << k);
+          tstSelect(x,1,i);
+          tstSelect(x,2,j);
+          tstSelect(x,3,k);
+          tstSelect(x,4,72);
+        }
+      }
+    }
+  }
+  public void testSelectAllBits() {
+    for (int i = 0; i < 64; i++) {
+      tstSelect(0xFFFFFFFFFFFFFFFFL,i+1,i);
+    }
+  }
+  public void testPerfSelectAllBits() {
+    for (int j = 0; j < 100000; j++) { // 1000000 for real perf test
+      for (int i = 0; i < 64; i++) {
+        assertEquals(i, BitUtil.select(0xFFFFFFFFFFFFFFFFL, i+1));
+      }
+    }
+  }
+  public void testPerfSelectAllBitsNaive() {
+    for (int j = 0; j < 10000; j++) { // real perftest: 1000000
+      for (int i = 0; i < 64; i++) {
+        assertEquals(i, BitUtil.selectNaive(0xFFFFFFFFFFFFFFFFL, i+1));
+      }
+    }
+  }
+}
+



Mime
View raw message