lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rm...@apache.org
Subject svn commit: r1002214 - in /lucene/dev/trunk/lucene: ./ contrib/highlighter/src/test/org/apache/lucene/search/highlight/ contrib/queryparser/src/java/org/apache/lucene/queryParser/core/messages/ contrib/queryparser/src/java/org/apache/lucene/queryParser...
Date Tue, 28 Sep 2010 15:11:13 GMT
Author: rmuir
Date: Tue Sep 28 15:11:12 2010
New Revision: 1002214

URL: http://svn.apache.org/viewvc?rev=1002214&view=rev
Log:
LUCENE-2667: Improve defaults for FuzzyQuery so it has good performance

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
    lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/core/messages/QueryParserMessages.java
    lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/CharStream.java
    lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/ParseException.java
    lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java
    lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj
    lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/Token.java
    lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/TokenMgrError.java
    lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/JavaCharStream.java
    lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java
    lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParser.java
    lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParser.jj
    lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/Token.java
    lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/TokenMgrError.java
    lucene/dev/trunk/lucene/contrib/queryparser/src/resources/org/apache/lucene/queryParser/core/messages/QueryParserMessages.properties
    lucene/dev/trunk/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/analyzing/TestAnalyzingQueryParser.java
    lucene/dev/trunk/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/CharStream.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/ParseException.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/QueryParser.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/Token.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/TokenMgrError.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyQuery.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/queryParser/TestMultiFieldQueryParser.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestFuzzyQuery.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1002214&r1=1002213&r2=1002214&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Tue Sep 28 15:11:12 2010
@@ -113,6 +113,14 @@ Changes in backwards compatibility polic
   If you index empty fields and uses positions/offsets information on that
   fields, reindex is recommended. (David Smiley, Koji Sekiguchi)
 
+* LUCENE-2667: FuzzyQuery's defaults have changed for more performant 
+  behavior: the minimum similarity is 2 edit distances from the word,
+  and the priority queue size is 50. To support this, FuzzyQuery now allows
+  specifying unscaled edit distances (foobar~2). If your application depends
+  upon the old defaults of 0.5 (scaled) minimum similarity and Integer.MAX_VALUE
+  priority queue size, you can use FuzzyQuery(Term, float, int, int) to specify 
+  those explicitly.
+  
 Changes in Runtime Behavior
 
 * LUCENE-2650: The behavior of FSDirectory.open has changed. On 64-bit

Modified: lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java?rev=1002214&r1=1002213&r2=1002214&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (original)
+++ lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java Tue Sep 28 15:11:12 2010
@@ -614,7 +614,7 @@ public class HighlighterTest extends Bas
       @Override
       public void run() throws Exception {
         numHighlights = 0;
-        doSearching("Kinnedy~");
+        doSearching("Kinnedy~0.5");
         doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this, true);
         assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
             numHighlights == 5);

Modified: lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/core/messages/QueryParserMessages.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/core/messages/QueryParserMessages.java?rev=1002214&r1=1002213&r2=1002214&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/core/messages/QueryParserMessages.java (original)
+++ lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/core/messages/QueryParserMessages.java Tue Sep 28 15:11:12 2010
@@ -40,6 +40,7 @@ public class QueryParserMessages extends
   public static String INVALID_SYNTAX;
   public static String INVALID_SYNTAX_CANNOT_PARSE;
   public static String INVALID_SYNTAX_FUZZY_LIMITS;
+  public static String INVALID_SYNTAX_FUZZY_EDITS;
   public static String INVALID_SYNTAX_ESCAPE_UNICODE_TRUNCATION;
   public static String INVALID_SYNTAX_ESCAPE_CHARACTER;
   public static String INVALID_SYNTAX_ESCAPE_NONE_HEX_UNICODE;

Modified: lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/CharStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/CharStream.java?rev=1002214&r1=1002213&r2=1002214&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/CharStream.java (original)
+++ lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/CharStream.java Tue Sep 28 15:11:12 2010
@@ -109,4 +109,4 @@ public interface CharStream {
   void Done();
 
 }
-/* JavaCC - OriginalChecksum=7bcd45d10a032f1c9da64691d073cf75 (do not edit this line) */
+/* JavaCC - OriginalChecksum=8cc617b193267dc876ef9699367c8186 (do not edit this line) */

Modified: lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/ParseException.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/ParseException.java?rev=1002214&r1=1002213&r2=1002214&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/ParseException.java (original)
+++ lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/ParseException.java Tue Sep 28 15:11:12 2010
@@ -195,4 +195,4 @@ public class ParseException extends Exce
    }
 
 }
-/* JavaCC - OriginalChecksum=4440e368eeef562faffeca98a200334b (do not edit this line) */
+/* JavaCC - OriginalChecksum=15fbbe38a36c8ac9e2740d030624c321 (do not edit this line) */

Modified: lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java?rev=1002214&r1=1002213&r2=1002214&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java (original)
+++ lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java Tue Sep 28 15:11:12 2010
@@ -164,7 +164,7 @@ public class PrecedenceQueryParser imple
 
   /**
    * Set the minimum similarity for fuzzy queries.
-   * Default is 0.5f.
+   * Default is 2f.
    */
   public void setFuzzyMinSim(float fuzzyMinSim) {
       this.fuzzyMinSim = fuzzyMinSim;
@@ -927,8 +927,10 @@ public class PrecedenceQueryParser imple
           try {
             fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue();
           } catch (Exception ignored) { }
-         if(fms < 0.0f || fms > 1.0f){
+         if(fms < 0.0f){
            {if (true) throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !");}
+         } else if (fms >= 1.0f && fms != (int) fms) {
+           {if (true) throw new ParseException("Fractional edit distances are not allowed!");}
          }
          q = getFuzzyQuery(field, termImage, fms);
        } else {

Modified: lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj?rev=1002214&r1=1002213&r2=1002214&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj (original)
+++ lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj Tue Sep 28 15:11:12 2010
@@ -188,7 +188,7 @@ public class PrecedenceQueryParser {
 
   /**
    * Set the minimum similarity for fuzzy queries.
-   * Default is 0.5f.
+   * Default is 2f.
    */
   public void setFuzzyMinSim(float fuzzyMinSim) {
       this.fuzzyMinSim = fuzzyMinSim;
@@ -905,8 +905,10 @@ Query Term(String field) : {
        	  try {
             fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue();
        	  } catch (Exception ignored) { }
-       	 if(fms < 0.0f || fms > 1.0f){
+       	 if(fms < 0.0f){
        	   throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !");
+       	 } else if (fms >= 1.0f && fms != (int) fms) {
+       	   throw new ParseException("Fractional edit distances are not allowed!");
        	 }
          q = getFuzzyQuery(field, termImage, fms);
        } else {

Modified: lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/Token.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/Token.java?rev=1002214&r1=1002213&r2=1002214&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/Token.java (original)
+++ lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/Token.java Tue Sep 28 15:11:12 2010
@@ -121,4 +121,4 @@ public class Token {
   }
 
 }
-/* JavaCC - OriginalChecksum=bc9495ddfa3189061fb4f1bf3c4f64e2 (do not edit this line) */
+/* JavaCC - OriginalChecksum=0dc5808f2ab8aac8775ea9175fa2cb51 (do not edit this line) */

Modified: lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/TokenMgrError.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/TokenMgrError.java?rev=1002214&r1=1002213&r2=1002214&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/TokenMgrError.java (original)
+++ lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/TokenMgrError.java Tue Sep 28 15:11:12 2010
@@ -138,4 +138,4 @@ public class TokenMgrError extends Error
       this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
    }
 }
-/* JavaCC - OriginalChecksum=e01667f2eb6d0b2f1fbb6958df0ca751 (do not edit this line) */
+/* JavaCC - OriginalChecksum=257b82f2650841e86289a309cb3dae76 (do not edit this line) */

Modified: lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/JavaCharStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/JavaCharStream.java?rev=1002214&r1=1002213&r2=1002214&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/JavaCharStream.java (original)
+++ lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/JavaCharStream.java Tue Sep 28 15:11:12 2010
@@ -613,4 +613,4 @@ public class JavaCharStream
   }
 
 }
-/* JavaCC - OriginalChecksum=31519f95b41182c6740c2afd8dfbf344 (do not edit this line) */
+/* JavaCC - OriginalChecksum=f19c73b8f7faf94cc4a581e7b2933cc6 (do not edit this line) */

Modified: lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java?rev=1002214&r1=1002213&r2=1002214&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java (original)
+++ lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java Tue Sep 28 15:11:12 2010
@@ -193,4 +193,4 @@ public class ParseException extends Quer
    }
 
 }
-/* JavaCC - OriginalChecksum=d0caeac083e9874065f9d1e298b5ccd9 (do not edit this line) */
+/* JavaCC - OriginalChecksum=38bce846fe6c8482993969f741c0323e (do not edit this line) */

Modified: lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParser.java?rev=1002214&r1=1002213&r2=1002214&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParser.java (original)
+++ lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParser.java Tue Sep 28 15:11:12 2010
@@ -433,8 +433,10 @@ public class StandardSyntaxParser implem
           try {
             fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue();
           } catch (Exception ignored) { }
-         if(fms < 0.0f || fms > 1.0f){
+         if(fms < 0.0f){
            {if (true) throw new ParseException(new MessageImpl(QueryParserMessages.INVALID_SYNTAX_FUZZY_LIMITS));}
+         } else if (fms >= 1.0f && fms != (int) fms) {
+           {if (true) throw new ParseException(new MessageImpl(QueryParserMessages.INVALID_SYNTAX_FUZZY_EDITS));}
          }
          q = new FuzzyQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), fms, term.beginColumn, term.endColumn);
        } else if (regexp) {

Modified: lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParser.jj
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParser.jj?rev=1002214&r1=1002213&r2=1002214&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParser.jj (original)
+++ lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParser.jj Tue Sep 28 15:11:12 2010
@@ -396,8 +396,10 @@ QueryNode Term(CharSequence field) : {
        	  try {
             fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue();
        	  } catch (Exception ignored) { }
-       	 if(fms < 0.0f || fms > 1.0f){
+       	 if(fms < 0.0f){
        	   throw new ParseException(new MessageImpl(QueryParserMessages.INVALID_SYNTAX_FUZZY_LIMITS));
+       	 } else if (fms >= 1.0f && fms != (int) fms) {
+       	   throw new ParseException(new MessageImpl(QueryParserMessages.INVALID_SYNTAX_FUZZY_EDITS));
        	 }
        	 q = new FuzzyQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), fms, term.beginColumn, term.endColumn);
        } else if (regexp) {

Modified: lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/Token.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/Token.java?rev=1002214&r1=1002213&r2=1002214&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/Token.java (original)
+++ lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/Token.java Tue Sep 28 15:11:12 2010
@@ -121,4 +121,4 @@ public class Token {
   }
 
 }
-/* JavaCC - OriginalChecksum=cecb6022e0f2e2fca751015375f6d319 (do not edit this line) */
+/* JavaCC - OriginalChecksum=0aac6816ecd328eda2f38b9d09739ab6 (do not edit this line) */

Modified: lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/TokenMgrError.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/TokenMgrError.java?rev=1002214&r1=1002213&r2=1002214&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/TokenMgrError.java (original)
+++ lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/TokenMgrError.java Tue Sep 28 15:11:12 2010
@@ -138,4 +138,4 @@ public class TokenMgrError extends Error
       this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
    }
 }
-/* JavaCC - OriginalChecksum=0e9c5fad06efef4f41f97b851ac7b0ce (do not edit this line) */
+/* JavaCC - OriginalChecksum=a75b5b61664a73631a032a6e44f4b38a (do not edit this line) */

Modified: lucene/dev/trunk/lucene/contrib/queryparser/src/resources/org/apache/lucene/queryParser/core/messages/QueryParserMessages.properties
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/queryparser/src/resources/org/apache/lucene/queryParser/core/messages/QueryParserMessages.properties?rev=1002214&r1=1002213&r2=1002214&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/queryparser/src/resources/org/apache/lucene/queryParser/core/messages/QueryParserMessages.properties (original)
+++ lucene/dev/trunk/lucene/contrib/queryparser/src/resources/org/apache/lucene/queryParser/core/messages/QueryParserMessages.properties Tue Sep 28 15:11:12 2010
@@ -13,6 +13,9 @@ INVALID_SYNTAX_CANNOT_PARSE = Syntax Err
 INVALID_SYNTAX_FUZZY_LIMITS = The similarity value for a fuzzy search must be between 0.0 and 1.0.
 
 #<CREATEDBY>Apache Lucene Community</CREATEDBY>
+INVALID_SYNTAX_FUZZY_EDITS = Fractional edit distances are not allowed.
+
+#<CREATEDBY>Apache Lucene Community</CREATEDBY>
 INVALID_SYNTAX_ESCAPE_UNICODE_TRUNCATION = Truncated unicode escape sequence.
 
 #<CREATEDBY>Apache Lucene Community</CREATEDBY>

Modified: lucene/dev/trunk/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/analyzing/TestAnalyzingQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/analyzing/TestAnalyzingQueryParser.java?rev=1002214&r1=1002213&r2=1002214&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/analyzing/TestAnalyzingQueryParser.java (original)
+++ lucene/dev/trunk/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/analyzing/TestAnalyzingQueryParser.java Tue Sep 28 15:11:12 2010
@@ -64,7 +64,7 @@ public class TestAnalyzingQueryParser ex
         "Mötley Crüe Mötley~0.75 Crüe~0.5",
         "Renée Zellweger Renée~0.9 Zellweger~" };
     fuzzyExpected = new String[] { "ubersetzung ubersetzung~0.9",
-        "motley crue motley~0.75 crue~0.5", "renee zellweger renee~0.9 zellweger~0.5" };
+        "motley crue motley~0.75 crue~0.5", "renee zellweger renee~0.9 zellweger~2.0" };
 
     a = new ASCIIAnalyzer();
   }

Modified: lucene/dev/trunk/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java?rev=1002214&r1=1002213&r2=1002214&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java (original)
+++ lucene/dev/trunk/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java Tue Sep 28 15:11:12 2010
@@ -543,10 +543,10 @@ public class TestPrecedenceQueryParser e
 
     assertQueryEquals("a:b\\\\?c", a, "a:b\\?c");
 
-    assertQueryEquals("a:b\\-c~", a, "a:b-c~0.5");
-    assertQueryEquals("a:b\\+c~", a, "a:b+c~0.5");
-    assertQueryEquals("a:b\\:c~", a, "a:b:c~0.5");
-    assertQueryEquals("a:b\\\\c~", a, "a:b\\c~0.5");
+    assertQueryEquals("a:b\\-c~", a, "a:b-c~2.0");
+    assertQueryEquals("a:b\\+c~", a, "a:b+c~2.0");
+    assertQueryEquals("a:b\\:c~", a, "a:b:c~2.0");
+    assertQueryEquals("a:b\\\\c~", a, "a:b\\c~2.0");
 
     assertQueryEquals("[ a\\- TO a\\+ ]", null, "[a- TO a+]");
     assertQueryEquals("[ a\\: TO a\\~ ]", null, "[a: TO a~]");

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/CharStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/CharStream.java?rev=1002214&r1=1002213&r2=1002214&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/CharStream.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/CharStream.java Tue Sep 28 15:11:12 2010
@@ -109,4 +109,4 @@ public interface CharStream {
   void Done();
 
 }
-/* JavaCC - OriginalChecksum=a83909a2403f969f94d18375f9f143e4 (do not edit this line) */
+/* JavaCC - OriginalChecksum=32a89423891f765dde472f7ef0e3ef7b (do not edit this line) */

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/ParseException.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/ParseException.java?rev=1002214&r1=1002213&r2=1002214&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/ParseException.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/ParseException.java Tue Sep 28 15:11:12 2010
@@ -195,4 +195,4 @@ public class ParseException extends Exce
    }
 
 }
-/* JavaCC - OriginalChecksum=c63b396885c4ff44d7aa48d3feae60cd (do not edit this line) */
+/* JavaCC - OriginalChecksum=c7631a240f7446940695eac31d9483ca (do not edit this line) */

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/QueryParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/QueryParser.java?rev=1002214&r1=1002213&r2=1002214&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/QueryParser.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/QueryParser.java Tue Sep 28 15:11:12 2010
@@ -269,7 +269,7 @@ public class QueryParser implements Quer
 
   /**
    * Set the minimum similarity for fuzzy queries.
-   * Default is 0.5f.
+   * Default is 2f.
    */
   public void setFuzzyMinSim(float fuzzyMinSim) {
       this.fuzzyMinSim = fuzzyMinSim;
@@ -1446,8 +1446,10 @@ public class QueryParser implements Quer
           try {
             fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue();
           } catch (Exception ignored) { }
-         if(fms < 0.0f || fms > 1.0f){
+         if(fms < 0.0f){
            {if (true) throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !");}
+         } else if (fms >= 1.0f && fms != (int) fms) {
+           {if (true) throw new ParseException("Fractional edit distances are not allowed!");}
          }
          q = getFuzzyQuery(field, termImage,fms);
        } else {

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj?rev=1002214&r1=1002213&r2=1002214&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj Tue Sep 28 15:11:12 2010
@@ -293,7 +293,7 @@ public class QueryParser {
 
   /**
    * Set the minimum similarity for fuzzy queries.
-   * Default is 0.5f.
+   * Default is 2f.
    */
   public void setFuzzyMinSim(float fuzzyMinSim) {
       this.fuzzyMinSim = fuzzyMinSim;
@@ -1412,8 +1412,10 @@ Query Term(String field) : {
        	  try {
             fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue();
        	  } catch (Exception ignored) { }
-       	 if(fms < 0.0f || fms > 1.0f){
+       	 if(fms < 0.0f){
        	   throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !");
+       	 } else if (fms >= 1.0f && fms != (int) fms) {
+       	   throw new ParseException("Fractional edit distances are not allowed!");
        	 }
        	 q = getFuzzyQuery(field, termImage,fms);
        } else {

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/Token.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/Token.java?rev=1002214&r1=1002213&r2=1002214&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/Token.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/Token.java Tue Sep 28 15:11:12 2010
@@ -121,4 +121,4 @@ public class Token {
   }
 
 }
-/* JavaCC - OriginalChecksum=37b1923f964a5a434f5ea3d6952ff200 (do not edit this line) */
+/* JavaCC - OriginalChecksum=c147cc166a7cf8812c7c39bc8c5eb868 (do not edit this line) */

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/TokenMgrError.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/TokenMgrError.java?rev=1002214&r1=1002213&r2=1002214&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/TokenMgrError.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/TokenMgrError.java Tue Sep 28 15:11:12 2010
@@ -138,4 +138,4 @@ public class TokenMgrError extends Error
       this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
    }
 }
-/* JavaCC - OriginalChecksum=334e679cf1a88b3070bb8e3d80ee3f5e (do not edit this line) */
+/* JavaCC - OriginalChecksum=1c94e13236c7e0121e49427992341ee3 (do not edit this line) */

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyQuery.java?rev=1002214&r1=1002213&r2=1002214&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyQuery.java Tue Sep 28 15:11:12 2010
@@ -21,16 +21,13 @@ import org.apache.lucene.index.IndexRead
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.util.ToStringUtils;
+import org.apache.lucene.util.automaton.LevenshteinAutomata;
 
 import java.io.IOException;
 
 /** Implements the fuzzy search query. The similarity measurement
  * is based on the Levenshtein (edit distance) algorithm.
  * 
- * <p><em>Warning:</em> this query is not very scalable with its default prefix
- * length of 0 - in this case, *every* term will be enumerated and
- * cause an edit score calculation.
- * 
  * <p>This query uses {@link MultiTermQuery.TopTermsScoringBooleanQueryRewrite}
  * as default. So terms will be collected and scored according to their
  * edit distance. Only the top terms are used for building the {@link BooleanQuery}.
@@ -38,9 +35,9 @@ import java.io.IOException;
  */
 public class FuzzyQuery extends MultiTermQuery {
   
-  public final static float defaultMinSimilarity = 0.5f;
+  public final static float defaultMinSimilarity = LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE;
   public final static int defaultPrefixLength = 0;
-  public final static int defaultMaxExpansions = Integer.MAX_VALUE;
+  public final static int defaultMaxExpansions = 50;
   
   private float minimumSimilarity;
   private int prefixLength;
@@ -60,6 +57,12 @@ public class FuzzyQuery extends MultiTer
    *  <code>minimumSimilarity</code> of <code>0.5</code> a term of the same length
    *  as the query term is considered similar to the query term if the edit distance
    *  between both terms is less than <code>length(term)*0.5</code>
+   *  <p>
+   *  Alternatively, if <code>minimumSimilarity</code> is >= 1f, it is interpreted 
+   *  as a pure Levenshtein edit distance. For example, a value of <code>2f</code>
+   *  will match all terms within an edit distance of <code>2</code> from the 
+   *  query term. Edit distances specified in this way may not be fractional.
+   *  
    * @param prefixLength length of common (non-fuzzy) prefix
    * @param maxExpansions the maximum number of terms to match. If this number is
    *  greater than {@link BooleanQuery#getMaxClauseCount} when the query is rewritten, 
@@ -72,9 +75,9 @@ public class FuzzyQuery extends MultiTer
     super(term.field());
     this.term = term;
     
-    if (minimumSimilarity >= 1.0f)
-      throw new IllegalArgumentException("minimumSimilarity >= 1");
-    else if (minimumSimilarity < 0.0f)
+    if (minimumSimilarity >= 1.0f && minimumSimilarity != (int)minimumSimilarity)
+      throw new IllegalArgumentException("fractional edit distances are not allowed");
+    if (minimumSimilarity < 0.0f)
       throw new IllegalArgumentException("minimumSimilarity < 0");
     if (prefixLength < 0)
       throw new IllegalArgumentException("prefixLength < 0");
@@ -84,7 +87,8 @@ public class FuzzyQuery extends MultiTer
     setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(maxExpansions));
     
     String text = term.text();
-    if (text.codePointCount(0, text.length()) > 1.0f / (1.0f - minimumSimilarity)) {
+    int len = text.codePointCount(0, text.length());
+    if (len > 0 && (minimumSimilarity >= 1f || len > 1.0f / (1.0f - minimumSimilarity))) {
       this.termLongEnough = true;
     }
     
@@ -93,21 +97,21 @@ public class FuzzyQuery extends MultiTer
   }
   
   /**
-   * Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, minimumSimilarity, prefixLength, Integer.MAX_VALUE)}.
+   * Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, minimumSimilarity, prefixLength, defaultMaxExpansions)}.
    */
   public FuzzyQuery(Term term, float minimumSimilarity, int prefixLength) {
     this(term, minimumSimilarity, prefixLength, defaultMaxExpansions);
   }
   
   /**
-   * Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, minimumSimilarity, 0, Integer.MAX_VALUE)}.
+   * Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, minimumSimilarity, 0, defaultMaxExpansions)}.
    */
   public FuzzyQuery(Term term, float minimumSimilarity) {
     this(term, minimumSimilarity, defaultPrefixLength, defaultMaxExpansions);
   }
 
   /**
-   * Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, 0.5f, 0, Integer.MAX_VALUE)}.
+   * Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, defaultMinSimilarity, 0, defaultMaxExpansions)}.
    */
   public FuzzyQuery(Term term) {
     this(term, defaultMinSimilarity, defaultPrefixLength, defaultMaxExpansions);

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java?rev=1002214&r1=1002213&r2=1002214&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java Tue Sep 28 15:11:12 2010
@@ -59,6 +59,7 @@ public final class FuzzyTermsEnum extend
   private final int termLength;
   
   private int maxEdits;
+  private final boolean raw;
 
   private List<ByteRunAutomaton> runAutomata;
   
@@ -77,15 +78,15 @@ public final class FuzzyTermsEnum extend
    * 
    * @param reader Delivers terms.
    * @param term Pattern term.
-   * @param minSimilarity Minimum required similarity for terms from the reader. Default value is 0.5f.
+   * @param minSimilarity Minimum required similarity for terms from the reader.
    * @param prefixLength Length of required common prefix. Default value is 0.
    * @throws IOException
    */
   public FuzzyTermsEnum(IndexReader reader, Term term, 
       final float minSimilarity, final int prefixLength) throws IOException {
-    if (minSimilarity >= 1.0f)
-      throw new IllegalArgumentException("minimumSimilarity cannot be greater than or equal to 1");
-    else if (minSimilarity < 0.0f)
+    if (minSimilarity >= 1.0f && minSimilarity != (int)minSimilarity)
+      throw new IllegalArgumentException("fractional edit distances are not allowed");
+    if (minSimilarity < 0.0f)
       throw new IllegalArgumentException("minimumSimilarity cannot be less than 0");
     if(prefixLength < 0)
       throw new IllegalArgumentException("prefixLength cannot be less than 0");
@@ -102,12 +103,19 @@ public final class FuzzyTermsEnum extend
     //The prefix could be longer than the word.
     //It's kind of silly though.  It means we must match the entire word.
     this.realPrefixLength = prefixLength > termLength ? termLength : prefixLength;
-    this.minSimilarity = minSimilarity;
-    this.scale_factor = 1.0f / (1.0f - minSimilarity);
-    
-    // calculate the maximum k edits for this similarity
-    maxEdits = initialMaxDistance(minSimilarity, termLength);
-  
+    // if minSimilarity >= 1, we treat it as number of edits
+    if (minSimilarity >= 1f) {
+      this.minSimilarity = 1 - (minSimilarity+1) / this.termLength;
+      maxEdits = (int) minSimilarity;
+      raw = true;
+    } else {
+      this.minSimilarity = minSimilarity;
+      // calculate the maximum k edits for this similarity
+      maxEdits = initialMaxDistance(this.minSimilarity, termLength);
+      raw = false;
+    }
+    this.scale_factor = 1.0f / (1.0f - this.minSimilarity);
+
     TermsEnum subEnum = getAutomatonEnum(maxEdits, null);
     setEnum(subEnum != null ? subEnum : 
       new LinearFuzzyTermsEnum());
@@ -176,15 +184,11 @@ public final class FuzzyTermsEnum extend
         setEnum(newEnum);
       }
     }
-    // TODO, besides changing linear -> automaton, and swapping in a smaller
-    // automaton, we can also use this information to optimize the linear case
-    // itself: re-init maxDistances so the fast-fail happens for more terms due
-    // to the now stricter constraints.
   }
 
   // for some raw min similarity and input term length, the maximum # of edits
   private int initialMaxDistance(float minimumSimilarity, int termLen) {
-    return (int) ((1-minimumSimilarity) * termLen);
+    return (int) ((1D-minimumSimilarity) * termLen);
   }
   
   // for some number of edits, the maximum possible scaled boost
@@ -442,7 +446,7 @@ public final class FuzzyTermsEnum extend
         //which is 8-3 or more precisely Math.abs(3-8).
         //if our maximum edit distance is 4, then we can discard this word
         //without looking at it.
-        return 0.0f;
+        return Float.NEGATIVE_INFINITY;
       }
       
       // init matrix d
@@ -473,7 +477,7 @@ public final class FuzzyTermsEnum extend
         if (j > maxDistance && bestPossibleEditDistance > maxDistance) {  //equal is okay, but not greater
           //the closest the target can be to the text is just too far away.
           //this target is leaving the party early.
-          return 0.0f;
+          return Float.NEGATIVE_INFINITY;
         }
 
         // copy current distance counts to 'previous row' distance counts: swap p and d
@@ -501,7 +505,8 @@ public final class FuzzyTermsEnum extend
      * @return the maximum levenshtein distance that we care about
      */
     private int calculateMaxDistance(int m) {
-      return (int) ((1-minSimilarity) * (Math.min(text.length, m) + realPrefixLength));
+      return raw ? maxEdits : Math.min(maxEdits, 
+          (int)((1-minSimilarity) * (Math.min(text.length, m) + realPrefixLength)));
     }
   }
 }

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/queryParser/TestMultiFieldQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/queryParser/TestMultiFieldQueryParser.java?rev=1002214&r1=1002213&r2=1002214&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/queryParser/TestMultiFieldQueryParser.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/queryParser/TestMultiFieldQueryParser.java Tue Sep 28 15:11:12 2010
@@ -87,7 +87,7 @@ public class TestMultiFieldQueryParser e
     assertEquals("((b:one t:one)^2.0) (b:two t:two)", q.toString());
 
     q = mfqp.parse("one~ two");
-    assertEquals("(b:one~0.5 t:one~0.5) (b:two t:two)", q.toString());
+    assertEquals("(b:one~2.0 t:one~2.0) (b:two t:two)", q.toString());
 
     q = mfqp.parse("one~0.8 two^2");
     assertEquals("(b:one~0.8 t:one~0.8) ((b:two t:two)^2.0)", q.toString());
@@ -274,7 +274,7 @@ public class TestMultiFieldQueryParser e
     q = parser.parse("bla*");
     assertEquals("f1:bla* f2:bla* f3:bla*", q.toString());
     q = parser.parse("bla~");
-    assertEquals("f1:bla~0.5 f2:bla~0.5 f3:bla~0.5", q.toString());
+    assertEquals("f1:bla~2.0 f2:bla~2.0 f3:bla~2.0", q.toString());
     q = parser.parse("[a TO c]");
     assertEquals("f1:[a TO c] f2:[a TO c] f3:[a TO c]", q.toString());
   }

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java?rev=1002214&r1=1002213&r2=1002214&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java Tue Sep 28 15:11:12 2010
@@ -431,10 +431,10 @@ public class TestQueryParser extends Luc
   public void testWildcard() throws Exception {
     assertQueryEquals("term*", null, "term*");
     assertQueryEquals("term*^2", null, "term*^2.0");
-    assertQueryEquals("term~", null, "term~0.5");
+    assertQueryEquals("term~", null, "term~2.0");
     assertQueryEquals("term~0.7", null, "term~0.7");
-    assertQueryEquals("term~^2", null, "term~0.5^2.0");
-    assertQueryEquals("term^2~", null, "term~0.5^2.0");
+    assertQueryEquals("term~^3", null, "term~2.0^3.0");
+    assertQueryEquals("term^3~", null, "term~2.0^3.0");
     assertQueryEquals("term*germ", null, "term*germ");
     assertQueryEquals("term*germ^3", null, "term*germ^3.0");
 
@@ -446,7 +446,7 @@ public class TestQueryParser extends Luc
     assertEquals(0.7f, fq.getMinSimilarity(), 0.1f);
     assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength());
     fq = (FuzzyQuery)getQuery("term~", null);
-    assertEquals(0.5f, fq.getMinSimilarity(), 0.1f);
+    assertEquals(2.0f, fq.getMinSimilarity(), 0.1f);
     assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength());
     
     assertParseException("term~1.1"); // value > 1, throws exception
@@ -481,9 +481,9 @@ public class TestQueryParser extends Luc
     assertWildcardQueryEquals("TE?M", false, "TE?M");
     assertWildcardQueryEquals("Te?m*gerM", false, "Te?m*gerM");
 //  Fuzzy queries:
-    assertWildcardQueryEquals("Term~", "term~0.5");
-    assertWildcardQueryEquals("Term~", true, "term~0.5");
-    assertWildcardQueryEquals("Term~", false, "Term~0.5");
+    assertWildcardQueryEquals("Term~", "term~2.0");
+    assertWildcardQueryEquals("Term~", true, "term~2.0");
+    assertWildcardQueryEquals("Term~", false, "Term~2.0");
 //  Range queries:
     assertWildcardQueryEquals("[A TO C]", "[a TO c]");
     assertWildcardQueryEquals("[A TO C]", true, "[a TO c]");
@@ -761,10 +761,10 @@ public class TestQueryParser extends Luc
 
     assertQueryEquals("a:b\\\\?c", a, "a:b\\?c");
 
-    assertQueryEquals("a:b\\-c~", a, "a:b-c~0.5");
-    assertQueryEquals("a:b\\+c~", a, "a:b+c~0.5");
-    assertQueryEquals("a:b\\:c~", a, "a:b:c~0.5");
-    assertQueryEquals("a:b\\\\c~", a, "a:b\\c~0.5");
+    assertQueryEquals("a:b\\-c~", a, "a:b-c~2.0");
+    assertQueryEquals("a:b\\+c~", a, "a:b+c~2.0");
+    assertQueryEquals("a:b\\:c~", a, "a:b:c~2.0");
+    assertQueryEquals("a:b\\\\c~", a, "a:b\\c~2.0");
 
     assertQueryEquals("[ a\\- TO a\\+ ]", null, "[a- TO a+]");
     assertQueryEquals("[ a\\: TO a\\~ ]", null, "[a: TO a~]");

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestFuzzyQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestFuzzyQuery.java?rev=1002214&r1=1002213&r2=1002214&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestFuzzyQuery.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestFuzzyQuery.java Tue Sep 28 15:11:12 2010
@@ -202,58 +202,58 @@ public class TestFuzzyQuery extends Luce
 
     FuzzyQuery query;
     // not similar enough:
-    query = new FuzzyQuery(new Term("field", "xxxxx"), FuzzyQuery.defaultMinSimilarity, 0);   
+    query = new FuzzyQuery(new Term("field", "xxxxx"), 0.5f, 0);   
     ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
     assertEquals(0, hits.length);
     // edit distance to "aaaaaaa" = 3, this matches because the string is longer than
     // in testDefaultFuzziness so a bigger difference is allowed:
-    query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 0);   
+    query = new FuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 0);   
     hits = searcher.search(query, null, 1000).scoreDocs;
     assertEquals(1, hits.length);
     assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaaaa"));
     
     // now with prefix
-    query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 1);   
+    query = new FuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 1);   
     hits = searcher.search(query, null, 1000).scoreDocs;
     assertEquals(1, hits.length);
     assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaaaa"));
-    query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 4);   
+    query = new FuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 4);   
     hits = searcher.search(query, null, 1000).scoreDocs;
     assertEquals(1, hits.length);
     assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaaaa"));
-    query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 5);   
+    query = new FuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 5);   
     hits = searcher.search(query, null, 1000).scoreDocs;
     assertEquals(0, hits.length);
 
     // no match, more than half of the characters is wrong:
-    query = new FuzzyQuery(new Term("field", "aaacccc"), FuzzyQuery.defaultMinSimilarity, 0);   
+    query = new FuzzyQuery(new Term("field", "aaacccc"), 0.5f, 0);   
     hits = searcher.search(query, null, 1000).scoreDocs;
     assertEquals(0, hits.length);
     
     // now with prefix
-    query = new FuzzyQuery(new Term("field", "aaacccc"), FuzzyQuery.defaultMinSimilarity, 2);   
+    query = new FuzzyQuery(new Term("field", "aaacccc"), 0.5f, 2);   
     hits = searcher.search(query, null, 1000).scoreDocs;
     assertEquals(0, hits.length);
 
     // "student" and "stellent" are indeed similar to "segment" by default:
-    query = new FuzzyQuery(new Term("field", "student"), FuzzyQuery.defaultMinSimilarity, 0);   
+    query = new FuzzyQuery(new Term("field", "student"), 0.5f, 0);   
     hits = searcher.search(query, null, 1000).scoreDocs;
     assertEquals(1, hits.length);
-    query = new FuzzyQuery(new Term("field", "stellent"), FuzzyQuery.defaultMinSimilarity, 0);   
+    query = new FuzzyQuery(new Term("field", "stellent"), 0.5f, 0);   
     hits = searcher.search(query, null, 1000).scoreDocs;
     assertEquals(1, hits.length);
     
     // now with prefix
-    query = new FuzzyQuery(new Term("field", "student"), FuzzyQuery.defaultMinSimilarity, 1);   
+    query = new FuzzyQuery(new Term("field", "student"), 0.5f, 1);   
     hits = searcher.search(query, null, 1000).scoreDocs;
     assertEquals(1, hits.length);
-    query = new FuzzyQuery(new Term("field", "stellent"), FuzzyQuery.defaultMinSimilarity, 1);   
+    query = new FuzzyQuery(new Term("field", "stellent"), 0.5f, 1);   
     hits = searcher.search(query, null, 1000).scoreDocs;
     assertEquals(1, hits.length);
-    query = new FuzzyQuery(new Term("field", "student"), FuzzyQuery.defaultMinSimilarity, 2);   
+    query = new FuzzyQuery(new Term("field", "student"), 0.5f, 2);   
     hits = searcher.search(query, null, 1000).scoreDocs;
     assertEquals(0, hits.length);
-    query = new FuzzyQuery(new Term("field", "stellent"), FuzzyQuery.defaultMinSimilarity, 2);   
+    query = new FuzzyQuery(new Term("field", "stellent"), 0.5f, 2);   
     hits = searcher.search(query, null, 1000).scoreDocs;
     assertEquals(0, hits.length);
     
@@ -328,7 +328,7 @@ public class TestFuzzyQuery extends Luce
     IndexSearcher searcher = new IndexSearcher(reader);
     writer.close();
     
-    FuzzyQuery query = new FuzzyQuery(new Term("field", "Lucene"));
+    FuzzyQuery query = new FuzzyQuery(new Term("field", "lucene"));
     query.setRewriteMethod(new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite());
     ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
     assertEquals(3, hits.length);
@@ -378,6 +378,54 @@ public class TestFuzzyQuery extends Luce
     r.close();
     index.close();
   }
+  
+  public void testDistanceAsEditsParsing() throws Exception {
+    QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer());
+    FuzzyQuery q = (FuzzyQuery) qp.parse("foobar~2");
+    assertEquals(2f, q.getMinSimilarity(), 0.0001f);
+  }
+  
+  public void testDistanceAsEditsSearching() throws Exception {
+    Directory index = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random, index);
+    addDoc("foobar", w);
+    addDoc("test", w);
+    addDoc("working", w);
+    IndexReader reader = w.getReader();
+    IndexSearcher searcher = new IndexSearcher(reader);
+    w.close();
+    QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer());
+    
+    FuzzyQuery q = (FuzzyQuery) qp.parse("fouba~2");
+    ScoreDoc[] hits = searcher.search(q, 10).scoreDocs;
+    assertEquals(1, hits.length);
+    assertEquals("foobar", searcher.doc(hits[0].doc).get("field"));
+    
+    q = (FuzzyQuery) qp.parse("foubara~2");
+    hits = searcher.search(q, 10).scoreDocs;
+    assertEquals(1, hits.length);
+    assertEquals("foobar", searcher.doc(hits[0].doc).get("field"));
+    
+    q = (FuzzyQuery) qp.parse("t~3");
+    hits = searcher.search(q, 10).scoreDocs;
+    assertEquals(1, hits.length);
+    assertEquals("test", searcher.doc(hits[0].doc).get("field"));
+    
+    q = new FuzzyQuery(new Term("field", "a"), 4f, 0, 50);
+    hits = searcher.search(q, 10).scoreDocs;
+    assertEquals(1, hits.length);
+    assertEquals("test", searcher.doc(hits[0].doc).get("field"));
+    
+    q = new FuzzyQuery(new Term("field", "a"), 6f, 0, 50);
+    hits = searcher.search(q, 10).scoreDocs;
+    assertEquals(2, hits.length);
+    assertEquals("test", searcher.doc(hits[0].doc).get("field"));
+    assertEquals("foobar", searcher.doc(hits[1].doc).get("field"));
+    
+    searcher.close();
+    reader.close();
+    index.close();
+  }
 
   private void addDoc(String text, RandomIndexWriter writer) throws IOException {
     Document doc = new Document();



Mime
View raw message