lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Jean-Francois Halleux" <halleux...@skynet.be>
Subject Small optimization in IndexSearcher
Date Wed, 14 Jan 2004 17:03:29 GMT
Hello,

	the method search(Query, Filter, int) in IndexSearcher makes use of a
Priority Queue to temporarily store hits before transfering them into an
array. This results in n X (put + pop) or a complexity of about 2 n log (n).

	Using a treeset to store hits and an iterator to transfer them to an array
allows for an n (log (n) + 1) complexity. Moreover, there is no need for the
HitQueue class anymore, the parameter nDoc is no more required and possibly
a very efficient Sorted Set implementation can be plugged in later on (or is
TreeSet already fast enough?). Because search is often called, I thought
that this  would be worthwile.

	All required modifications follow.

KR,

Jeff Halleux

----

Index: HitQueue.java
===================================================================
RCS file: HitQueue.java
diff -N HitQueue.java
--- HitQueue.java	18 Sep 2001 16:29:56 -0000	1.1.1.1
+++ /dev/null	1 Jan 1970 00:00:00 -0000
@@ -1,72 +0,0 @@
-package org.apache.lucene.search;
-
-/* ====================================================================
- * The Apache Software License, Version 1.1
- *
- * Copyright (c) 2001 The Apache Software Foundation.  All rights
- * reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * 3. The end-user documentation included with the redistribution,
- *    if any, must include the following acknowledgment:
- *       "This product includes software developed by the
- *        Apache Software Foundation (http://www.apache.org/)."
- *    Alternately, this acknowledgment may appear in the software itself,
- *    if and wherever such third-party acknowledgments normally appear.
- *
- * 4. The names "Apache" and "Apache Software Foundation" and
- *    "Apache Lucene" must not be used to endorse or promote products
- *    derived from this software without prior written permission. For
- *    written permission, please contact apache@apache.org.
- *
- * 5. Products derived from this software may not be called "Apache",
- *    "Apache Lucene", nor may "Apache" appear in their name, without
- *    prior written permission of the Apache Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- * ====================================================================
- *
- * This software consists of voluntary contributions made by many
- * individuals on behalf of the Apache Software Foundation.  For more
- * information on the Apache Software Foundation, please see
- * <http://www.apache.org/>.
- */
-
-import org.apache.lucene.util.PriorityQueue;
-
-final class HitQueue extends PriorityQueue {
-  HitQueue(int size) {
-    initialize(size);
-  }
-
-  protected final boolean lessThan(Object a, Object b) {
-    ScoreDoc hitA = (ScoreDoc)a;
-    ScoreDoc hitB = (ScoreDoc)b;
-    if (hitA.score == hitB.score)
-      return hitA.doc > hitB.doc;
-    else
-      return hitA.score < hitB.score;
-  }
-}
Index: IndexSearcher.java
===================================================================
RCS file:
/home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/search/IndexSearch
er.java,v
retrieving revision 1.11
diff -u -r1.11 IndexSearcher.java
--- IndexSearcher.java	16 Sep 2003 20:06:32 -0000	1.11
+++ IndexSearcher.java	14 Jan 2004 16:33:49 -0000
@@ -56,11 +56,14 @@

 import java.io.IOException;
 import java.util.BitSet;
+import java.util.Iterator;
+import java.util.SortedSet;
+import java.util.TreeSet;

-import org.apache.lucene.store.Directory;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.store.Directory;

 /** Implements search over a single IndexReader.
  *
@@ -130,23 +133,27 @@
       return new TopDocs(0, new ScoreDoc[0]);

     final BitSet bits = filter != null ? filter.bits(reader) : null;
-    final HitQueue hq = new HitQueue(nDocs);
-    final int[] totalHits = new int[1];
+    final SortedSet ss = new TreeSet();
+
     scorer.score(new HitCollector() {
 	public final void collect(int doc, float score) {
 	  if (score > 0.0f &&			  // ignore zeroed buckets
 	      (bits==null || bits.get(doc))) {	  // skip docs not in bits
-	    totalHits[0]++;
-            hq.insert(new ScoreDoc(doc, score));
+            ss.add(new ScoreDoc(doc, score));
 	  }
 	}
       }, reader.maxDoc());

-    ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
-    for (int i = hq.size()-1; i >= 0; i--)	  // put docs in array
-      scoreDocs[i] = (ScoreDoc)hq.pop();
-
-    return new TopDocs(totalHits[0], scoreDocs);
+    ScoreDoc[] scoreDocs = new ScoreDoc[ss.size()];
+
+    Iterator it=ss.iterator();
+
+    int i=0;
+    while (it.hasNext()) {
+    	scoreDocs[i++]=(ScoreDoc)it.next();
+    }
+
+    return new TopDocs(ss.size(), scoreDocs);
   }

   /** Lower-level search API.
Index: ScoreDoc.java
===================================================================
RCS file:
/home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/search/ScoreDoc.ja
va,v
retrieving revision 1.3
diff -u -r1.3 ScoreDoc.java
--- ScoreDoc.java	17 Jul 2002 21:54:38 -0000	1.3
+++ ScoreDoc.java	14 Jan 2004 16:33:51 -0000
@@ -56,18 +56,28 @@

 /** Expert: Returned by low-level search implementations.
  * @see TopDocs */
-public class ScoreDoc implements java.io.Serializable {
-  /** Expert: The score of this document for the query. */
-  public float score;
+public class ScoreDoc implements java.io.Serializable, Comparable {
+	/** Expert: The score of this document for the query. */
+	public float score;

-  /** Expert: A hit document's number.
-   * @see Searcher#doc(int)
-   */
-  public int doc;
+	/** Expert: A hit document's number.
+	 * @see Searcher#doc(int)
+	 */
+	public int doc;
+
+	/** Expert: Constructs a ScoreDoc. */
+	public ScoreDoc(int doc, float score) {
+		this.doc = doc;
+		this.score = score;
+	}
+
+	public int compareTo(Object o) {
+		ScoreDoc other = (ScoreDoc) o;
+
+		if (score < other.score) return 1;
+		if (score > other.score) return -1;
+		if (doc > other.doc) return 1;
+		return -1;
+	}

-  /** Expert: Constructs a ScoreDoc. */
-  public ScoreDoc(int doc, float score) {
-    this.doc = doc;
-    this.score = score;
-  }
 }



---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org


Mime
View raw message