lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sim...@apache.org
Subject svn commit: r1439450 - in /lucene/dev/branches/branch_4x: ./ lucene/ lucene/queries/ lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java lucene/queries/src/test/org/apache/lucene/queries/CommonTermsQueryTest.java
Date Mon, 28 Jan 2013 15:36:32 GMT
Author: simonw
Date: Mon Jan 28 15:36:31 2013
New Revision: 1439450

URL: http://svn.apache.org/viewvc?rev=1439450&view=rev
Log:
LUCENE-4727: use float as minShouldMatch on CommonTermsQuery

Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/queries/   (props changed)
    lucene/dev/branches/branch_4x/lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java
    lucene/dev/branches/branch_4x/lucene/queries/src/test/org/apache/lucene/queries/CommonTermsQueryTest.java

Modified: lucene/dev/branches/branch_4x/lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java?rev=1439450&r1=1439449&r2=1439450&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java
(original)
+++ lucene/dev/branches/branch_4x/lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java
Mon Jan 28 15:36:31 2013
@@ -74,7 +74,7 @@ public class CommonTermsQuery extends Qu
   protected final Occur highFreqOccur;
   protected float lowFreqBoost = 1.0f;
   protected float highFreqBoost = 1.0f;
-  protected int minNrShouldMatch = 0;
+  protected float minNrShouldMatch = 0;
   
   /**
    * Creates a new {@link CommonTermsQuery}
@@ -84,7 +84,7 @@ public class CommonTermsQuery extends Qu
    * @param lowFreqOccur
    *          {@link Occur} used for low frequency terms
    * @param maxTermFrequency
-   *          a value in [0..1] (or absolute number >=1) representing the
+   *          a value in [0..1) (or absolute number >=1) representing the
    *          maximum threshold of a terms document frequency to be considered a
    *          low frequency term.
    * @throws IllegalArgumentException
@@ -104,7 +104,7 @@ public class CommonTermsQuery extends Qu
    * @param lowFreqOccur
    *          {@link Occur} used for low frequency terms
    * @param maxTermFrequency
-   *          a value in [0..1] (or absolute number >=1) representing the
+   *          a value in [0..1) (or absolute number >=1) representing the
    *          maximum threshold of a terms document frequency to be considered a
    *          low frequency term.
    * @param disableCoord
@@ -160,15 +160,19 @@ public class CommonTermsQuery extends Qu
     return buildQuery(maxDoc, contextArray, queryTerms);
   }
   
+  protected int calcLowFreqMinimumNumberShouldMatch(int numOptional) {
+      if (minNrShouldMatch >= 1.0f || minNrShouldMatch == 0.0f) {
+          return (int) minNrShouldMatch;
+      }
+      return (int) (Math.round(minNrShouldMatch * numOptional));
+  }
+  
   protected Query buildQuery(final int maxDoc,
       final TermContext[] contextArray, final Term[] queryTerms) {
     BooleanQuery lowFreq = new BooleanQuery(disableCoord);
     BooleanQuery highFreq = new BooleanQuery(disableCoord);
     highFreq.setBoost(highFreqBoost);
     lowFreq.setBoost(lowFreqBoost);
-    if (lowFreqOccur == Occur.SHOULD) {
-      lowFreq.setMinimumNumberShouldMatch(minNrShouldMatch);
-    }
     BooleanQuery query = new BooleanQuery(true);
     for (int i = 0; i < queryTerms.length; i++) {
       TermContext termContext = contextArray[i];
@@ -186,6 +190,11 @@ public class CommonTermsQuery extends Qu
       }
       
     }
+    final int numLowFreqClauses = lowFreq.clauses().size(); 
+    if (lowFreqOccur == Occur.SHOULD && numLowFreqClauses > 0) {
+      int minMustMatch = calcLowFreqMinimumNumberShouldMatch(numLowFreqClauses);
+      lowFreq.setMinimumNumberShouldMatch(minMustMatch);
+    }
     if (lowFreq.clauses().isEmpty()) {
       /*
        * if lowFreq is empty we rewrite the high freq terms in a conjunction to
@@ -265,7 +274,9 @@ public class CommonTermsQuery extends Qu
   /**
    * Specifies a minimum number of the optional BooleanClauses which must be
    * satisfied in order to produce a match on the low frequency terms query
-   * part.
+   * part. This method accepts a float value in the range [0..1) as a fraction
+   * of the actual query terms in the low frequent clause or a number
+   * <tt>&gt;=1</tt> as an absolut number of clauses that need to match.
    * 
    * <p>
    * By default no optional clauses are necessary for a match (unless there are
@@ -276,7 +287,7 @@ public class CommonTermsQuery extends Qu
    * @param min
    *          the number of optional clauses that must match
    */
-  public void setMinimumNumberShouldMatch(int min) {
+  public void setMinimumNumberShouldMatch(float min) {
     this.minNrShouldMatch = min;
   }
   
@@ -284,7 +295,7 @@ public class CommonTermsQuery extends Qu
    * Gets the minimum number of the optional BooleanClauses which must be
    * satisfied.
    */
-  public int getMinimumNumberShouldMatch() {
+  public float getMinimumNumberShouldMatch() {
     return minNrShouldMatch;
   }
   
@@ -332,7 +343,7 @@ public class CommonTermsQuery extends Qu
     result = prime * result
         + ((lowFreqOccur == null) ? 0 : lowFreqOccur.hashCode());
     result = prime * result + Float.floatToIntBits(maxTermFrequency);
-    result = prime * result + minNrShouldMatch;
+    result = prime * result + Float.floatToIntBits(minNrShouldMatch);
     result = prime * result + ((terms == null) ? 0 : terms.hashCode());
     return result;
   }

Modified: lucene/dev/branches/branch_4x/lucene/queries/src/test/org/apache/lucene/queries/CommonTermsQueryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/queries/src/test/org/apache/lucene/queries/CommonTermsQueryTest.java?rev=1439450&r1=1439449&r2=1439450&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/queries/src/test/org/apache/lucene/queries/CommonTermsQueryTest.java
(original)
+++ lucene/dev/branches/branch_4x/lucene/queries/src/test/org/apache/lucene/queries/CommonTermsQueryTest.java
Mon Jan 28 15:36:31 2013
@@ -175,6 +175,90 @@ public class CommonTermsQueryTest extend
     }
   }
   
+  public void testMinShouldMatch() throws IOException {
+    Directory dir = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+    String[] docs = new String[] {"this is the end of the world right",
+        "is this it or maybe not",
+        "this is the end of the universe as we know it",
+        "there is the famous restaurant at the end of the universe",};
+    for (int i = 0; i < docs.length; i++) {
+      Document doc = new Document();
+      doc.add(newStringField("id", "" + i, Field.Store.YES));
+      doc.add(newTextField("field", docs[i], Field.Store.NO));
+      w.addDocument(doc);
+    }
+    
+    IndexReader r = w.getReader();
+    IndexSearcher s = newSearcher(r);
+    {
+      CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
+          random().nextBoolean() ? 2.0f : 0.5f);
+      query.add(new Term("field", "is"));
+      query.add(new Term("field", "this"));
+      query.add(new Term("field", "end"));
+      query.add(new Term("field", "world"));
+      query.add(new Term("field", "universe"));
+      query.add(new Term("field", "right"));
+      query.setMinimumNumberShouldMatch(0.5f);
+      TopDocs search = s.search(query, 10);
+      assertEquals(search.totalHits, 1);
+      assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
+    }
+    {
+      CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
+          random().nextBoolean() ? 2.0f : 0.5f);
+      query.add(new Term("field", "is"));
+      query.add(new Term("field", "this"));
+      query.add(new Term("field", "end"));
+      query.add(new Term("field", "world"));
+      query.add(new Term("field", "universe"));
+      query.add(new Term("field", "right"));
+      query.setMinimumNumberShouldMatch(2.0f);
+      TopDocs search = s.search(query, 10);
+      assertEquals(search.totalHits, 1);
+      assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
+    }
+    
+    {
+      CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
+          random().nextBoolean() ? 2.0f : 0.5f);
+      query.add(new Term("field", "is"));
+      query.add(new Term("field", "this"));
+      query.add(new Term("field", "end"));
+      query.add(new Term("field", "world"));
+      query.add(new Term("field", "universe"));
+      query.add(new Term("field", "right"));
+      query.setMinimumNumberShouldMatch(0.49f);
+      TopDocs search = s.search(query, 10);
+      assertEquals(search.totalHits, 3);
+      assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
+      assertEquals("2", r.document(search.scoreDocs[1].doc).get("id"));
+      assertEquals("3", r.document(search.scoreDocs[2].doc).get("id"));
+    }
+    
+    {
+      CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
+          random().nextBoolean() ? 2.0f : 0.5f);
+      query.add(new Term("field", "is"));
+      query.add(new Term("field", "this"));
+      query.add(new Term("field", "end"));
+      query.add(new Term("field", "world"));
+      query.add(new Term("field", "universe"));
+      query.add(new Term("field", "right"));
+      query.setMinimumNumberShouldMatch(1.0f);
+      TopDocs search = s.search(query, 10);
+      assertEquals(search.totalHits, 3);
+      assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
+      assertEquals("2", r.document(search.scoreDocs[1].doc).get("id"));
+      assertEquals("3", r.document(search.scoreDocs[2].doc).get("id"));
+    }
+   
+    r.close();
+    w.close();
+    dir.close();
+  }
+  
   public void testIllegalOccur() {
     Random random = random();
     



Mime
View raw message