lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From uschind...@apache.org
Subject svn commit: r889567 - in /lucene/java/branches/lucene_3_0: CHANGES.txt src/java/org/apache/lucene/search/FuzzyQuery.java src/java/org/apache/lucene/search/FuzzyTermEnum.java src/test/org/apache/lucene/search/TestFuzzyQuery.java
Date Fri, 11 Dec 2009 10:48:45 GMT
Author: uschindler
Date: Fri Dec 11 10:48:44 2009
New Revision: 889567

URL: http://svn.apache.org/viewvc?rev=889567&view=rev
Log:
LUCENE-2123: Fix 3.0 branch, also add scoring test

Modified:
    lucene/java/branches/lucene_3_0/CHANGES.txt
    lucene/java/branches/lucene_3_0/src/java/org/apache/lucene/search/FuzzyQuery.java
    lucene/java/branches/lucene_3_0/src/java/org/apache/lucene/search/FuzzyTermEnum.java
    lucene/java/branches/lucene_3_0/src/test/org/apache/lucene/search/TestFuzzyQuery.java

Modified: lucene/java/branches/lucene_3_0/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_3_0/CHANGES.txt?rev=889567&r1=889566&r2=889567&view=diff
==============================================================================
--- lucene/java/branches/lucene_3_0/CHANGES.txt (original)
+++ lucene/java/branches/lucene_3_0/CHANGES.txt Fri Dec 11 10:48:44 2009
@@ -3,6 +3,13 @@
 
 ======================= 3.0 branch (not yet released) =======================
 
+Changes in backwards compatibility policy
+
+* LUCENE-2123: Removed the protected inner class ScoreTerm from
+  FuzzyQuery. The change was needed because the comparator of this class
+  had to be changed in an incompatible way. The class was never intended
+  to be public.  (Uwe Schindler, Mike McCandless)
+  
 Bug fixes
 
  * LUCENE-2092: BooleanQuery was ignoring disableCoord in its hashCode
@@ -22,6 +29,9 @@
  * LUCENE-2086: When resolving deleted terms, do so in term sort order
    for better performance (Bogdan Ghidireac via Mike McCandless)
 
+ * LUCENE-2123 (partly): Fixes a slowdown / memory issue added by
+   LUCENE-504.  (Uwe Schindler, Robert Muir, Mike McCandless)
+
 ======================= Release 3.0.0 2009-11-25 =======================
 
 Changes in backwards compatibility policy

Modified: lucene/java/branches/lucene_3_0/src/java/org/apache/lucene/search/FuzzyQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_3_0/src/java/org/apache/lucene/search/FuzzyQuery.java?rev=889567&r1=889566&r2=889567&view=diff
==============================================================================
--- lucene/java/branches/lucene_3_0/src/java/org/apache/lucene/search/FuzzyQuery.java (original)
+++ lucene/java/branches/lucene_3_0/src/java/org/apache/lucene/search/FuzzyQuery.java Fri
Dec 11 10:48:44 2009
@@ -133,39 +133,30 @@
     }
 
     int maxSize = BooleanQuery.getMaxClauseCount();
-    PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<ScoreTerm>(1024);
+    PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<ScoreTerm>();
     FilteredTermEnum enumerator = getEnum(reader);
     try {
-      ScoreTerm bottomSt = null;
+      ScoreTerm st = new ScoreTerm();
       do {
         final Term t = enumerator.term();
         if (t == null) break;
-        ScoreTerm st = new ScoreTerm(t, enumerator.difference());
-        if (stQueue.size() < maxSize) {
-          // record the current bottom item
-          if (bottomSt == null || st.compareTo(bottomSt) > 0) {
-            bottomSt = st;
-          }
-          // add to PQ, as it is not yet filled up
-          stQueue.offer(st);
-        } else {
-          assert bottomSt != null;
-          // only add to PQ, if the ScoreTerm is greater than the current bottom,
-          // as all entries will be enqueued after the current bottom and will never be visible
-          if (st.compareTo(bottomSt) < 0) {
-            stQueue.offer(st);
-          }
-        }
-        //System.out.println("current: "+st.term+"("+st.score+"), bottom: "+bottomSt.term+"("+bottomSt.score+")");
+        final float score = enumerator.difference();
+        // ignore uncompetetive hits
+        if (stQueue.size() >= maxSize && score <= stQueue.peek().score)
+          continue;
+        // add new entry in PQ
+        st.term = t;
+        st.score = score;
+        stQueue.offer(st);
+        // possibly drop entries from queue
+        st = (stQueue.size() > maxSize) ? stQueue.poll() : new ScoreTerm();
       } while (enumerator.next());
     } finally {
       enumerator.close();
     }
     
     BooleanQuery query = new BooleanQuery(true);
-    int size = Math.min(stQueue.size(), maxSize);
-    for(int i = 0; i < size; i++){
-      ScoreTerm st = stQueue.poll();
+    for (final ScoreTerm st : stQueue) {
       TermQuery tq = new TermQuery(st.term);      // found a match
       tq.setBoost(getBoost() * st.score); // set the boost
       query.add(tq, BooleanClause.Occur.SHOULD);          // add to query
@@ -174,21 +165,15 @@
     return query;
   }
   
-  protected static class ScoreTerm implements Comparable<ScoreTerm> {
+  private static final class ScoreTerm implements Comparable<ScoreTerm> {
     public Term term;
     public float score;
     
-    public ScoreTerm(Term term, float score){
-      this.term = term;
-      this.score = score;
-    }
-    
     public int compareTo(ScoreTerm other) {
       if (this.score == other.score)
-        return this.term.compareTo(other.term);
+        return other.term.compareTo(this.term);
       else
-        // inverse ordering!!!
-        return Float.compare(other.score, this.score);
+        return Float.compare(this.score, other.score);
     }
   }
     

Modified: lucene/java/branches/lucene_3_0/src/java/org/apache/lucene/search/FuzzyTermEnum.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_3_0/src/java/org/apache/lucene/search/FuzzyTermEnum.java?rev=889567&r1=889566&r2=889567&view=diff
==============================================================================
--- lucene/java/branches/lucene_3_0/src/java/org/apache/lucene/search/FuzzyTermEnum.java (original)
+++ lucene/java/branches/lucene_3_0/src/java/org/apache/lucene/search/FuzzyTermEnum.java Fri
Dec 11 10:48:44 2009
@@ -157,7 +157,7 @@
    * based on how similar the Term is compared to a target term.  It returns
    * exactly 0.0f when
    * <pre>
-   *    editDistance &lt; maximumEditDistance</pre>
+   *    editDistance &gt; maximumEditDistance</pre>
    * Otherwise it returns:
    * <pre>
    *    1 - (editDistance / length)</pre>

Modified: lucene/java/branches/lucene_3_0/src/test/org/apache/lucene/search/TestFuzzyQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_3_0/src/test/org/apache/lucene/search/TestFuzzyQuery.java?rev=889567&r1=889566&r2=889567&view=diff
==============================================================================
--- lucene/java/branches/lucene_3_0/src/test/org/apache/lucene/search/TestFuzzyQuery.java
(original)
+++ lucene/java/branches/lucene_3_0/src/test/org/apache/lucene/search/TestFuzzyQuery.java
Fri Dec 11 10:48:44 2009
@@ -18,6 +18,7 @@
  */
 
 import java.util.Set;
+import java.util.List;
 import java.util.HashSet;
 import java.util.Arrays;
 import java.io.IOException;
@@ -80,6 +81,17 @@
     hits = searcher.search(query, null, 1000).scoreDocs;
     assertEquals(1, hits.length);
     
+    // test scoring
+    query = new FuzzyQuery(new Term("field", "bbbbb"), FuzzyQuery.defaultMinSimilarity, 0);
  
+    hits = searcher.search(query, null, 1000).scoreDocs;
+    assertEquals("3 documents should match", 3, hits.length);
+    List<String> order = Arrays.asList("bbbbb","abbbb","aabbb");
+    for (int i = 0; i < hits.length; i++) {
+      final String term = searcher.doc(hits[i].doc).get("field");
+      //System.out.println(hits[i].score);
+      assertEquals(order.get(i), term);
+    }
+
     // test BooleanQuery.maxClauseCount
     int savedClauseCount = BooleanQuery.getMaxClauseCount();
     try {



Mime
View raw message