Return-Path: X-Original-To: apmail-lucene-commits-archive@www.apache.org Delivered-To: apmail-lucene-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 2B94217D7C for ; Wed, 5 Nov 2014 16:34:07 +0000 (UTC) Received: (qmail 60028 invoked by uid 500); 5 Nov 2014 16:34:07 -0000 Mailing-List: contact commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@lucene.apache.org Delivered-To: mailing list commits@lucene.apache.org Received: (qmail 60019 invoked by uid 99); 5 Nov 2014 16:34:07 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 05 Nov 2014 16:34:07 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 05 Nov 2014 16:34:05 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 3B3A523888A6; Wed, 5 Nov 2014 16:32:45 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1636913 - in /lucene/dev/trunk/lucene: ./ core/src/java/org/apache/lucene/util/ core/src/java/org/apache/lucene/util/packed/ core/src/test/org/apache/lucene/util/ Date: Wed, 05 Nov 2014 16:32:45 -0000 To: commits@lucene.apache.org From: jpountz@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20141105163245.3B3A523888A6@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: jpountz Date: Wed Nov 5 16:32:44 2014 New Revision: 1636913 URL: http://svn.apache.org/r1636913 Log: LUCENE-6040: Speedup broadword bit selection. Added: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestBitUtil.java Removed: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/BroadWord.java lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestBroadWord.java Modified: lucene/dev/trunk/lucene/CHANGES.txt lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/BitUtil.java lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDecoder.java Modified: lucene/dev/trunk/lucene/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1636913&r1=1636912&r2=1636913&view=diff ============================================================================== --- lucene/dev/trunk/lucene/CHANGES.txt (original) +++ lucene/dev/trunk/lucene/CHANGES.txt Wed Nov 5 16:32:44 2014 @@ -264,6 +264,9 @@ Optimizations * LUCENE-6030: Add norms patched compression for a small number of common values (Ryan Ernst) +* LUCENE-6040: Speed up EliasFanoDocIdSet through broadword bit selection. + (Paul Elschot) + Build * LUCENE-5909: Smoke tester now has better command line parsing and Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/BitUtil.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/BitUtil.java?rev=1636913&r1=1636912&r2=1636913&view=diff ============================================================================== --- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/BitUtil.java (original) +++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/BitUtil.java Wed Nov 5 16:32:44 2014 @@ -212,4 +212,64 @@ public final class BitUtil { return ((l >>> 1) ^ -(l & 1)); } + + /** Select a 1-bit from a long. See also LUCENE-6040. + * @return The index of the r-th 1 bit in x. This bit must exist. + */ + public static int select(long x, int r) { + long s = x - ((x & 0xAAAAAAAAAAAAAAAAL) >>> 1); // pairwise bitsums + s = (s & 0x3333333333333333L) + ((s >>> 2) & 0x3333333333333333L); // nibblewise bitsums + s = ((s + (s >>> 4)) & 0x0F0F0F0F0F0F0F0FL) * L8_L; // bytewise bitsums, cumulative + + int b = (Long.numberOfTrailingZeros((s + psOverflow[r-1]) & (L8_L << 7)) >> 3) << 3; // bit position of byte with r-th 1 bit. + long l = r - (((s << 8) >>> b) & 0xFFL); // bit rank in byte at b + + // Select bit l from byte (x >>> b): + int selectIndex = (int) (((x >>> b) & 0xFFL) | ((l-1) << 8)); + int res = b + select256[selectIndex]; + return res; + } + + private final static long L8_L = 0x0101010101010101L; + + private static final long[] psOverflow = new long[64]; + static { + for (int s = 1; s <= 64; s++) { + psOverflow[s-1] = (128-s) * L8_L; + } + } + + private static final byte[] select256 = new byte[8 * 256]; + static { + for (int b = 0; b <= 0xFF; b++) { + for (int s = 1; s <= 8; s++) { + int byteIndex = b | ((s-1) << 8); + int bitIndex = selectNaive(b, s); + if (bitIndex < 0) { + bitIndex = 127; // positive as byte + } + assert bitIndex >= 0; + assert ((byte) bitIndex) >= 0; // non negative as byte, no need to mask the sign + select256[byteIndex] = (byte) bitIndex; + } + } + } + + /** + * Naive implementation of {@link #select(long,int)}, using {@link Long#numberOfTrailingZeros} repetitively. + * Works relatively fast for low ranks. + * @return The index of the r-th 1 bit in x, or -1 if no such bit exists. + */ + public static int selectNaive(long x, int r) { + assert r >= 1; + int s = -1; + while ((x != 0L) && (r > 0)) { + int ntz = Long.numberOfTrailingZeros(x); + x >>>= (ntz + 1); + s += (ntz + 1); + r -= 1; + } + int res = (r > 0) ? -1 : s; + return res; + } } Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDecoder.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDecoder.java?rev=1636913&r1=1636912&r2=1636913&view=diff ============================================================================== --- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDecoder.java (original) +++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDecoder.java Wed Nov 5 16:32:44 2014 @@ -17,7 +17,7 @@ package org.apache.lucene.util.packed; -import org.apache.lucene.util.BroadWord; // bit selection in long +import org.apache.lucene.util.BitUtil; // bit selection in long /** A decoder for an {@link EliasFanoEncoder}. @@ -312,9 +312,10 @@ public class EliasFanoDecoder { if (rank >= 1) { long invCurHighLong = ~curHighLong; int clearBitForValue = (rank <= 8) - ? BroadWord.selectNaive(invCurHighLong, rank) - : BroadWord.select(invCurHighLong, rank); - assert clearBitForValue <= (Long.SIZE-1); + ? BitUtil.selectNaive(invCurHighLong, rank) + : BitUtil.select(invCurHighLong, rank); + assert clearBitForValue >= 0; + assert clearBitForValue <= Long.SIZE-1; setBitForIndex += clearBitForValue + 1; // the high bit just before setBitForIndex is zero int oneBitsBeforeClearBit = clearBitForValue - rank + 1; efIndex += oneBitsBeforeClearBit; // the high bit at setBitForIndex and belongs to the unary code for efIndex Added: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestBitUtil.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestBitUtil.java?rev=1636913&view=auto ============================================================================== --- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestBitUtil.java (added) +++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestBitUtil.java Wed Nov 5 16:32:44 2014 @@ -0,0 +1,87 @@ +package org.apache.lucene.util; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +public class TestBitUtil extends LuceneTestCase { + + private void tstSelect(long x, int r, int exp) { + if ((0 <= exp) && (exp <= 63)) { + assertEquals("selectNaive(" + x + "," + r + ")", exp, BitUtil.selectNaive(x, r)); + assertEquals("select(" + x + "," + r + ")", exp, BitUtil.select(x, r)); + } else { + int act = BitUtil.selectNaive(x, r); + assertTrue("selectNaive(" + x + "," + r + ")", act < 0 || act > 63); + act = BitUtil.select(x, r); + assertTrue("select(" + x + "," + r + ")", act < 0 || act > 63); + } + } + + public void testSelectFromZero() { + tstSelect(0L,1,72); + } + public void testSelectSingleBit() { + for (int i = 0; i < 64; i++) { + tstSelect((1L << i),1,i); + } + } + public void testSelectTwoBits() { + for (int i = 0; i < 64; i++) { + for (int j = i+1; j < 64; j++) { + long x = (1L << i) | (1L << j); + //System.out.println(getName() + " i: " + i + " j: " + j); + tstSelect(x,1,i); + tstSelect(x,2,j); + tstSelect(x,3,72); + } + } + } + public void testSelectThreeBits() { + for (int i = 0; i < 64; i++) { + for (int j = i+1; j < 64; j++) { + for (int k = j+1; k < 64; k++) { + long x = (1L << i) | (1L << j) | (1L << k); + tstSelect(x,1,i); + tstSelect(x,2,j); + tstSelect(x,3,k); + tstSelect(x,4,72); + } + } + } + } + public void testSelectAllBits() { + for (int i = 0; i < 64; i++) { + tstSelect(0xFFFFFFFFFFFFFFFFL,i+1,i); + } + } + public void testPerfSelectAllBits() { + for (int j = 0; j < 100000; j++) { // 1000000 for real perf test + for (int i = 0; i < 64; i++) { + assertEquals(i, BitUtil.select(0xFFFFFFFFFFFFFFFFL, i+1)); + } + } + } + public void testPerfSelectAllBitsNaive() { + for (int j = 0; j < 10000; j++) { // real perftest: 1000000 + for (int i = 0; i < 64; i++) { + assertEquals(i, BitUtil.selectNaive(0xFFFFFFFFFFFFFFFFL, i+1)); + } + } + } +} +