lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cutt...@apache.org
Subject svn commit: r365447 - in /lucene/java/trunk: CHANGES.txt src/java/org/apache/lucene/search/IndexSearcher.java src/java/org/apache/lucene/search/TopDocCollector.java src/java/org/apache/lucene/search/TopFieldDocCollector.java
Date Mon, 02 Jan 2006 22:00:09 GMT
Author: cutting
Date: Mon Jan  2 14:00:07 2006
New Revision: 365447

URL: http://svn.apache.org/viewcvs?rev=365447&view=rev
Log:
Add TopDocCollector and TopFieldDocCollector.  These simplify the implementation of hit collectors
that collect top-scoring or -sorting documents.

Added:
    lucene/java/trunk/src/java/org/apache/lucene/search/TopDocCollector.java
    lucene/java/trunk/src/java/org/apache/lucene/search/TopFieldDocCollector.java
Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/search/IndexSearcher.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/CHANGES.txt?rev=365447&r1=365446&r2=365447&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Mon Jan  2 14:00:07 2006
@@ -215,6 +215,10 @@
 32. StopFilter can now ignore case when checking for stop words.
     (Grant Ingersoll via Yonik, LUCENE-248)
 
+33. Add TopDocCollector and TopFieldDocCollector.  These simplify the
+    implementation of hit collectors that collect only the
+    top-scoring or top-sorting hits.
+
 API Changes
 
  1. Several methods and fields have been deprecated. The API documentation

Modified: lucene/java/trunk/src/java/org/apache/lucene/search/IndexSearcher.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/search/IndexSearcher.java?rev=365447&r1=365446&r2=365447&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/IndexSearcher.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/IndexSearcher.java Mon Jan  2 14:00:07
2006
@@ -95,63 +95,20 @@
     if (nDocs <= 0)  // null might be returned from hq.top() below.
       throw new IllegalArgumentException("nDocs must be > 0");
 
-    Scorer scorer = weight.scorer(reader);
-    if (scorer == null)
-      return new TopDocs(0, new ScoreDoc[0], Float.NEGATIVE_INFINITY);
-
-    final BitSet bits = filter != null ? filter.bits(reader) : null;
-    final HitQueue hq = new HitQueue(nDocs);
-    final int[] totalHits = new int[1];
-    scorer.score(new HitCollector() {
-        private float minScore = 0.0f;
-        public final void collect(int doc, float score) {
-          if (score > 0.0f &&                     // ignore zeroed buckets
-              (bits==null || bits.get(doc))) {    // skip docs not in bits
-            totalHits[0]++;
-            if (hq.size() < nDocs || score >= minScore) {
-              hq.insert(new ScoreDoc(doc, score));
-              minScore = ((ScoreDoc)hq.top()).score; // maintain minScore
-            }
-          }
-        }
-      });
-
-    ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
-    for (int i = hq.size()-1; i >= 0; i--)        // put docs in array
-      scoreDocs[i] = (ScoreDoc)hq.pop();
-
-    float maxScore = (totalHits[0]==0) ? Float.NEGATIVE_INFINITY : scoreDocs[0].score;
-    
-    return new TopDocs(totalHits[0], scoreDocs, maxScore);
+    TopDocCollector collector = new TopDocCollector(nDocs);
+    search(weight, filter, collector);
+    return collector.topDocs();
   }
 
   // inherit javadoc
   public TopFieldDocs search(Weight weight, Filter filter, final int nDocs,
                              Sort sort)
       throws IOException {
-    Scorer scorer = weight.scorer(reader);
-    if (scorer == null)
-      return new TopFieldDocs(0, new ScoreDoc[0], sort.fields, Float.NEGATIVE_INFINITY);
-
-    final BitSet bits = filter != null ? filter.bits(reader) : null;
-    final FieldSortedHitQueue hq =
-      new FieldSortedHitQueue(reader, sort.fields, nDocs);
-    final int[] totalHits = new int[1];
-    scorer.score(new HitCollector() {
-        public final void collect(int doc, float score) {
-          if (score > 0.0f &&			  // ignore zeroed buckets
-              (bits==null || bits.get(doc))) {	  // skip docs not in bits
-            totalHits[0]++;
-            hq.insert(new FieldDoc(doc, score));
-          }
-        }
-      });
-
-    ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
-    for (int i = hq.size()-1; i >= 0; i--)        // put docs in array
-      scoreDocs[i] = hq.fillFields ((FieldDoc) hq.pop());
 
-    return new TopFieldDocs(totalHits[0], scoreDocs, hq.getFields(), hq.getMaxScore());
+    TopFieldDocCollector collector =
+      new TopFieldDocCollector(reader, sort, nDocs);
+    search(weight, filter, collector);
+    return (TopFieldDocs)collector.topDocs();
   }
 
   // inherit javadoc

Added: lucene/java/trunk/src/java/org/apache/lucene/search/TopDocCollector.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/search/TopDocCollector.java?rev=365447&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/TopDocCollector.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/TopDocCollector.java Mon Jan  2 14:00:07
2006
@@ -0,0 +1,81 @@
+package org.apache.lucene.search;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.BitSet;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.util.PriorityQueue;
+
+/** A {@link HitCollector} implementation that collects the top-scoring
+ * documents, returning them as a {@link TopDocs}.  This is used by {@link
+ * IndexSearcher} to implement {@link TopDocs}-based search.
+ *
+ * <p>This may be extended, overriding the collect method to, e.g.,
+ * conditionally invoke <code>super()</code> in order to filter which
+ * documents are collected.
+ **/
+public class TopDocCollector extends HitCollector {
+  private int numHits;
+  private float minScore = 0.0f;
+
+  int totalHits;
+  PriorityQueue hq;
+    
+  /** Construct to collect a given number of hits.
+   * @param numHits the maximum number of hits to collect
+   */
+  public TopDocCollector(int numHits) {
+    this(numHits, new HitQueue(numHits));
+  }
+
+  TopDocCollector(int numHits, PriorityQueue hq) {
+    this.numHits = numHits;
+    this.hq = hq;
+  }
+
+  // javadoc inherited
+  public void collect(int doc, float score) {
+    if (score > 0.0f) {
+      totalHits++;
+      if (hq.size() < numHits || score >= minScore) {
+        hq.insert(new ScoreDoc(doc, score));
+        minScore = ((ScoreDoc)hq.top()).score; // maintain minScore
+      }
+    }
+  }
+
+  /** The total number of documents that matched this query. */
+  public int getTotalHits() { return totalHits; }
+
+  /** The top-scoring hits. */
+  public TopDocs topDocs() {
+    ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
+    for (int i = hq.size()-1; i >= 0; i--)      // put docs in array
+      scoreDocs[i] = (ScoreDoc)hq.pop();
+      
+    float maxScore = (totalHits==0)
+      ? Float.NEGATIVE_INFINITY
+      : scoreDocs[0].score;
+    
+    return new TopDocs(totalHits, scoreDocs, maxScore);
+  }
+}

Added: lucene/java/trunk/src/java/org/apache/lucene/search/TopFieldDocCollector.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/search/TopFieldDocCollector.java?rev=365447&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/TopFieldDocCollector.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/TopFieldDocCollector.java Mon Jan
 2 14:00:07 2006
@@ -0,0 +1,65 @@
+package org.apache.lucene.search;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.BitSet;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+
+/** A {@link HitCollector} implementation that collects the top-sorting
+ * documents, returning them as a {@link TopFieldDocs}.  This is used by {@link
+ * IndexSearcher} to implement {@link TopFieldDocs}-based search.
+ *
+ * <p>This may be extended, overriding the collect method to, e.g.,
+ * conditionally invoke <code>super()</code> in order to filter which
+ * documents are collected.
+ **/
+public class TopFieldDocCollector extends TopDocCollector {
+
+  /** Construct to collect a given number of hits.
+   * @param reader the index to be searched
+   * @param sort the sort criteria
+   * @param numHits the maximum number of hits to collect
+   */
+  public TopFieldDocCollector(IndexReader reader, Sort sort, int numHits)
+    throws IOException {
+    super(numHits, new FieldSortedHitQueue(reader, sort.fields, numHits));
+  }
+
+  // javadoc inherited
+  public void collect(int doc, float score) {
+    if (score > 0.0f) {
+      totalHits++;
+      hq.insert(new FieldDoc(doc, score));
+    }
+  }
+
+  // javadoc inherited
+  public TopDocs topDocs() {
+    FieldSortedHitQueue fshq = (FieldSortedHitQueue)hq;
+    ScoreDoc[] scoreDocs = new ScoreDoc[fshq.size()];
+    for (int i = fshq.size()-1; i >= 0; i--)      // put docs in array
+      scoreDocs[i] = fshq.fillFields ((FieldDoc) fshq.pop());
+
+    return new TopFieldDocs(totalHits, scoreDocs,
+                            fshq.getFields(), fshq.getMaxScore());
+  }
+}



Mime
View raw message