lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r823324 - in /lucene/java/trunk: CHANGES.txt src/java/org/apache/lucene/search/SingleTermEnum.java src/java/org/apache/lucene/search/WildcardQuery.java src/test/org/apache/lucene/search/TestWildcard.java
Date Thu, 08 Oct 2009 20:59:32 GMT
Author: mikemccand
Date: Thu Oct  8 20:59:31 2009
New Revision: 823324

URL: http://svn.apache.org/viewvc?rev=823324&view=rev
Log:
LUCENE-1951: fix WildcardQuery to correctly rewrite single term query and prefix query

Added:
    lucene/java/trunk/src/java/org/apache/lucene/search/SingleTermEnum.java   (with props)
Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/search/WildcardQuery.java
    lucene/java/trunk/src/test/org/apache/lucene/search/TestWildcard.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=823324&r1=823323&r2=823324&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Thu Oct  8 20:59:31 2009
@@ -41,6 +41,12 @@
 
 Bug fixes
 
+* LUCENE-1951: When the text provided to WildcardQuery has no wildcard
+  characters (ie matches a single term), don't lose the boost and
+  rewrite method settings.  Also, rewrite to PrefixQuery if the
+  wildcard is form "foo*", for slightly faster performance. (Robert
+  Muir via Mike McCandless)
+
 New features
 
 * LUCENE-1933: Provide a convenience AttributeFactory that creates a

Added: lucene/java/trunk/src/java/org/apache/lucene/search/SingleTermEnum.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/SingleTermEnum.java?rev=823324&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/SingleTermEnum.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/SingleTermEnum.java Thu Oct  8 20:59:31
2009
@@ -0,0 +1,67 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+
+/**
+ * Subclass of FilteredTermEnum for enumerating a single term.
+ * <p>
+ * This can be used by {@link MultiTermQuery}s that need only visit one term,
+ * but want to preserve MultiTermQuery semantics such as
+ * {@link MultiTermQuery#rewriteMethod}.
+ */
+public class SingleTermEnum extends FilteredTermEnum {
+  private Term singleTerm;
+  private boolean endEnum = false;
+  
+  /**
+   * Creates a new <code>SingleTermEnum</code>.
+   * <p>
+   * After calling the constructor the enumeration is already pointing to the term,
+   * if it exists.
+   */
+  public SingleTermEnum(IndexReader reader, Term singleTerm) throws IOException {
+    super();
+    this.singleTerm = singleTerm;
+    setEnum(reader.terms(singleTerm));
+  }
+
+  @Override
+  public float difference() {
+    return 1.0F;
+  }
+
+  @Override
+  protected boolean endEnum() {
+    return endEnum;
+  }
+
+  @Override
+  protected boolean termCompare(Term term) {
+    if (term.equals(singleTerm)) {
+      return true;
+    } else {
+      endEnum = true;
+      return false;
+    }
+  }
+}

Propchange: lucene/java/trunk/src/java/org/apache/lucene/search/SingleTermEnum.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/trunk/src/java/org/apache/lucene/search/WildcardQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/WildcardQuery.java?rev=823324&r1=823323&r2=823324&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/WildcardQuery.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/WildcardQuery.java Thu Oct  8 20:59:31
2009
@@ -37,16 +37,25 @@
  * @see WildcardTermEnum */
 public class WildcardQuery extends MultiTermQuery {
   private boolean termContainsWildcard;
+  private boolean termIsPrefix;
   protected Term term;
     
   public WildcardQuery(Term term) {
     super(term); //will be removed in 3.0
     this.term = term;
-    this.termContainsWildcard = (term.text().indexOf('*') != -1) || (term.text().indexOf('?')
!= -1);
+    String text = term.text();
+    this.termContainsWildcard = (text.indexOf('*') != -1)
+        || (text.indexOf('?') != -1);
+    this.termIsPrefix = termContainsWildcard 
+        && (text.indexOf('?') == -1) 
+        && (text.indexOf('*') == text.length() - 1);
   }
 
   protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
-    return new WildcardTermEnum(reader, getTerm());
+    if (termContainsWildcard)
+      return new WildcardTermEnum(reader, getTerm());
+    else
+      return new SingleTermEnum(reader, getTerm());
   }
   
   /**
@@ -57,10 +66,15 @@
   }
 
   public Query rewrite(IndexReader reader) throws IOException {
-    if (!termContainsWildcard)
-      return new TermQuery(getTerm());
-    else
+    if (termIsPrefix) {
+      MultiTermQuery rewritten = new PrefixQuery(term.createTerm(term.text()
+          .substring(0, term.text().indexOf('*'))));
+      rewritten.setBoost(getBoost());
+      rewritten.setRewriteMethod(getRewriteMethod());
+      return rewritten;
+    } else {
       return super.rewrite(reader);
+    }
   }
   
   /** Prints a user-readable version of this query. */

Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestWildcard.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestWildcard.java?rev=823324&r1=823323&r2=823324&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestWildcard.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestWildcard.java Thu Oct  8 20:59:31
2009
@@ -58,17 +58,91 @@
   
   /**
    * Tests if a WildcardQuery that has no wildcard in the term is rewritten to a single
-   * TermQuery.
+   * TermQuery. The boost should be preserved, and the rewrite should return
+   * a ConstantScoreQuery if the WildcardQuery had a ConstantScore rewriteMethod.
    */
   public void testTermWithoutWildcard() throws IOException {
       RAMDirectory indexStore = getIndexStore("field", new String[]{"nowildcard", "nowildcardx"});
       IndexSearcher searcher = new IndexSearcher(indexStore, true);
 
-      Query wq = new WildcardQuery(new Term("field", "nowildcard"));
+      MultiTermQuery wq = new WildcardQuery(new Term("field", "nowildcard"));
       assertMatches(searcher, wq, 1);
 
-      wq = searcher.rewrite(wq);
-      assertTrue(wq instanceof TermQuery);
+      wq.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+      wq.setBoost(0.1F);
+      Query q = searcher.rewrite(wq);
+      assertTrue(q instanceof TermQuery);
+      assertEquals(q.getBoost(), wq.getBoost());
+      
+      wq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
+      wq.setBoost(0.2F);
+      q = searcher.rewrite(wq);
+      assertTrue(q instanceof ConstantScoreQuery);
+      assertEquals(q.getBoost(), wq.getBoost());
+      
+      wq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
+      wq.setBoost(0.3F);
+      q = searcher.rewrite(wq);
+      assertTrue(q instanceof ConstantScoreQuery);
+      assertEquals(q.getBoost(), wq.getBoost());
+      
+      wq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE);
+      wq.setBoost(0.4F);
+      q = searcher.rewrite(wq);
+      assertTrue(q instanceof ConstantScoreQuery);
+      assertEquals(q.getBoost(), wq.getBoost());
+  }
+  
+  /**
+   * Tests if a WildcardQuery with an empty term is rewritten to an empty BooleanQuery
+   */
+  public void testEmptyTerm() throws IOException {
+    RAMDirectory indexStore = getIndexStore("field", new String[]{"nowildcard", "nowildcardx"});
+    IndexSearcher searcher = new IndexSearcher(indexStore, true);
+
+    MultiTermQuery wq = new WildcardQuery(new Term("field", ""));
+    wq.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+    assertMatches(searcher, wq, 0);
+    BooleanQuery expected = new BooleanQuery();
+    assertEquals(searcher.rewrite(expected), searcher.rewrite(wq));
+  }
+  
+  /**
+   * Tests if a WildcardQuery that has only a trailing * in the term is
+   * rewritten to a single PrefixQuery. The boost and rewriteMethod should be
+   * preserved.
+   */
+  public void testPrefixTerm() throws IOException {
+    RAMDirectory indexStore = getIndexStore("field", new String[]{"prefix", "prefixx"});
+    IndexSearcher searcher = new IndexSearcher(indexStore, true);
+
+    MultiTermQuery wq = new WildcardQuery(new Term("field", "prefix*"));
+    assertMatches(searcher, wq, 2);
+    
+    MultiTermQuery expected = new PrefixQuery(new Term("field", "prefix"));
+    wq.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+    wq.setBoost(0.1F);
+    expected.setRewriteMethod(wq.getRewriteMethod());
+    expected.setBoost(wq.getBoost());
+    assertEquals(searcher.rewrite(expected), searcher.rewrite(wq));
+    
+    wq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
+    wq.setBoost(0.2F);
+    expected.setRewriteMethod(wq.getRewriteMethod());
+    expected.setBoost(wq.getBoost());
+    assertEquals(searcher.rewrite(expected), searcher.rewrite(wq));
+    
+    wq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
+    wq.setBoost(0.3F);
+    expected.setRewriteMethod(wq.getRewriteMethod());
+    expected.setBoost(wq.getBoost());
+    assertEquals(searcher.rewrite(expected), searcher.rewrite(wq));
+    
+    wq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE);
+    wq.setBoost(0.4F);
+    expected.setRewriteMethod(wq.getRewriteMethod());
+    expected.setBoost(wq.getBoost());
+    assertEquals(searcher.rewrite(expected), searcher.rewrite(wq));
   }
 
   /**



Mime
View raw message