lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gsing...@apache.org
Subject svn commit: r696056 [1/2] - in /lucene/java/trunk: ./ src/java/org/apache/lucene/queryParser/ src/java/org/apache/lucene/search/ src/test/org/apache/lucene/queryParser/ src/test/org/apache/lucene/search/
Date Tue, 16 Sep 2008 21:03:21 GMT
Author: gsingers
Date: Tue Sep 16 14:03:21 2008
New Revision: 696056

URL: http://svn.apache.org/viewvc?rev=696056&view=rev
Log:
LUCENE-1279: Add support for Collator to RangeFilter, etc.

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/queryParser/CharStream.java
    lucene/java/trunk/src/java/org/apache/lucene/queryParser/ParseException.java
    lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.java
    lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.jj
    lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParserConstants.java
    lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java
    lucene/java/trunk/src/java/org/apache/lucene/queryParser/Token.java
    lucene/java/trunk/src/java/org/apache/lucene/queryParser/TokenMgrError.java
    lucene/java/trunk/src/java/org/apache/lucene/search/ConstantScoreRangeQuery.java
    lucene/java/trunk/src/java/org/apache/lucene/search/RangeFilter.java
    lucene/java/trunk/src/java/org/apache/lucene/search/RangeQuery.java
    lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java
    lucene/java/trunk/src/test/org/apache/lucene/search/BaseTestRangeFilter.java
    lucene/java/trunk/src/test/org/apache/lucene/search/TestConstantScoreRangeQuery.java
    lucene/java/trunk/src/test/org/apache/lucene/search/TestRangeFilter.java
    lucene/java/trunk/src/test/org/apache/lucene/search/TestRangeQuery.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=696056&r1=696055&r2=696056&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Tue Sep 16 14:03:21 2008
@@ -283,6 +283,8 @@
 
 19. LUCENE-1354: Provide programmatic access to CheckIndex (Grant Ingersoll, Mike McCandless)
 
+20. LUCENE-1279: Add support for Collators to RangeFilter/Query and Query Parser.  (Steve Rowe via Grant Ingersoll) 
+
 Optimizations
 
  1. LUCENE-705: When building a compound file, use

Modified: lucene/java/trunk/src/java/org/apache/lucene/queryParser/CharStream.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/queryParser/CharStream.java?rev=696056&r1=696055&r2=696056&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/queryParser/CharStream.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/queryParser/CharStream.java Tue Sep 16 14:03:21 2008
@@ -1,4 +1,5 @@
-/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 4.0 */
+/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 4.1 */
+/* JavaCCOptions:STATIC=false */
 package org.apache.lucene.queryParser;
 
 /**
@@ -27,14 +28,14 @@
 
   /**
    * Returns the column position of the character last read.
-   * @deprecated 
+   * @deprecated
    * @see #getEndColumn
    */
   int getColumn();
 
   /**
    * Returns the line number of the character last read.
-   * @deprecated 
+   * @deprecated
    * @see #getEndLine
    */
   int getLine();
@@ -79,7 +80,7 @@
   char BeginToken() throws java.io.IOException;
 
   /**
-   * Returns a string made up of characters from the marked token beginning 
+   * Returns a string made up of characters from the marked token beginning
    * to the current buffer position. Implementations have the choice of returning
    * anything that they want to. For example, for efficiency, one might decide
    * to just return null, which is a valid implementation.
@@ -108,3 +109,4 @@
   void Done();
 
 }
+/* JavaCC - OriginalChecksum=32a89423891f765dde472f7ef0e3ef7b (do not edit this line) */

Modified: lucene/java/trunk/src/java/org/apache/lucene/queryParser/ParseException.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/queryParser/ParseException.java?rev=696056&r1=696055&r2=696056&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/queryParser/ParseException.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/queryParser/ParseException.java Tue Sep 16 14:03:21 2008
@@ -1,4 +1,5 @@
-/* Generated By:JavaCC: Do not edit this line. ParseException.java Version 3.0 */
+/* Generated By:JavaCC: Do not edit this line. ParseException.java Version 4.1 */
+/* JavaCCOptions:KEEP_LINE_COL=null */
 package org.apache.lucene.queryParser;
 
 /**
@@ -51,6 +52,7 @@
     specialConstructor = false;
   }
 
+  /** Constructor with message. */
   public ParseException(String message) {
     super(message);
     specialConstructor = false;
@@ -105,7 +107,7 @@
         maxSize = expectedTokenSequences[i].length;
       }
       for (int j = 0; j < expectedTokenSequences[i].length; j++) {
-        expected.append(tokenImage[expectedTokenSequences[i][j]]).append(" ");
+        expected.append(tokenImage[expectedTokenSequences[i][j]]).append(' ');
       }
       if (expectedTokenSequences[i][expectedTokenSequences[i].length - 1] != 0) {
         expected.append("...");
@@ -120,8 +122,11 @@
         retval += tokenImage[0];
         break;
       }
+      retval += " " + tokenImage[tok.kind];
+      retval += " \"";
       retval += add_escapes(tok.image);
-      tok = tok.next; 
+      retval += " \"";
+      tok = tok.next;
     }
     retval += "\" at line " + currentToken.next.beginLine + ", column " + currentToken.next.beginColumn;
     retval += "." + eol;
@@ -138,7 +143,7 @@
    * The end of line string for this machine.
    */
   protected String eol = System.getProperty("line.separator", "\n");
- 
+
   /**
    * Used to convert raw characters to their escaped version
    * when these raw version cannot be used as part of an ASCII
@@ -190,3 +195,4 @@
    }
 
 }
+/* JavaCC - OriginalChecksum=c7631a240f7446940695eac31d9483ca (do not edit this line) */

Modified: lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.java?rev=696056&r1=696055&r2=696056&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.java Tue Sep 16 14:03:21 2008
@@ -4,6 +4,7 @@
 import java.io.IOException;
 import java.io.StringReader;
 import java.text.DateFormat;
+import java.text.Collator;
 import java.util.ArrayList;
 import java.util.Calendar;
 import java.util.Date;
@@ -132,6 +133,10 @@
   // maps field names to date resolutions
   Map fieldToDateResolution = null;
 
+  // The collator to use when determining range inclusion,
+  // for use when constructing RangeQuerys and ConstantScoreRangeQuerys.
+  Collator rangeCollator = null;
+
   /** The default operator for parsing queries. 
    * Use {@link QueryParser#setDefaultOperator} to change it.
    */
@@ -408,6 +413,35 @@
     return resolution;
   }
 
+  /** 
+   * Sets the collator used to determine index term inclusion in ranges
+   * specified either for ConstantScoreRangeQuerys or RangeQuerys (if
+   * {@link #setUseOldRangeQuery(boolean)} is called with a <code>true</code>
+   * value.)
+   * <p/>
+   * <strong>WARNING:</strong> Setting the rangeCollator to a non-null
+   * collator using this method will cause every single index Term in the
+   * Field referenced by lowerTerm and/or upperTerm to be examined.
+   * Depending on the number of index Terms in this Field, the operation could
+   * be very slow.
+   *
+   *  @param rc  the collator to use when constructing RangeQuerys
+   *             and ConstantScoreRangeQuerys
+   */
+  public void setRangeCollator(Collator rc) {
+    rangeCollator = rc;
+  }
+
+  /**
+   * @return the collator used to determine index term inclusion in ranges
+   *  specified either for ConstantScoreRangeQuerys or RangeQuerys (if
+   *  {@link #setUseOldRangeQuery(boolean)} is called with a <code>true</code>
+   *  value.)
+   */
+  public Collator getRangeCollator() {
+    return rangeCollator;
+  }
+
   /**
    * @deprecated use {@link #addClause(List, int, int, Query)} instead.
    */
@@ -711,11 +745,12 @@
     {
       return new RangeQuery(new Term(field, part1),
                             new Term(field, part2),
-                            inclusive);
+                            inclusive, rangeCollator);
     }
     else
     {
-      return new ConstantScoreRangeQuery(field,part1,part2,inclusive,inclusive);
+      return new ConstantScoreRangeQuery
+        (field, part1, part2, inclusive, inclusive, rangeCollator);
     }
   }
 
@@ -1448,26 +1483,26 @@
     throw new Error("Missing return statement in function");
   }
 
-  final private boolean jj_2_1(int xla) {
+  private boolean jj_2_1(int xla) {
     jj_la = xla; jj_lastpos = jj_scanpos = token;
     try { return !jj_3_1(); }
     catch(LookaheadSuccess ls) { return true; }
     finally { jj_save(0, xla); }
   }
 
-  final private boolean jj_3R_3() {
+  private boolean jj_3R_3() {
     if (jj_scan_token(STAR)) return true;
     if (jj_scan_token(COLON)) return true;
     return false;
   }
 
-  final private boolean jj_3R_2() {
+  private boolean jj_3R_2() {
     if (jj_scan_token(TERM)) return true;
     if (jj_scan_token(COLON)) return true;
     return false;
   }
 
-  final private boolean jj_3_1() {
+  private boolean jj_3_1() {
     Token xsp;
     xsp = jj_scanpos;
     if (jj_3R_2()) {
@@ -1477,31 +1512,34 @@
     return false;
   }
 
+  /** Generated Token Manager. */
   public QueryParserTokenManager token_source;
-  public Token token, jj_nt;
+  /** Current token. */
+  public Token token;
+  /** Next token. */
+  public Token jj_nt;
   private int jj_ntk;
   private Token jj_scanpos, jj_lastpos;
   private int jj_la;
-  public boolean lookingAhead = false;
-  private boolean jj_semLA;
   private int jj_gen;
   final private int[] jj_la1 = new int[23];
   static private int[] jj_la1_0;
   static private int[] jj_la1_1;
   static {
-      jj_la1_0();
-      jj_la1_1();
+      jj_la1_init_0();
+      jj_la1_init_1();
    }
-   private static void jj_la1_0() {
+   private static void jj_la1_init_0() {
       jj_la1_0 = new int[] {0x300,0x300,0x1c00,0x1c00,0x3ed3f00,0x90000,0x20000,0x3ed2000,0x2690000,0x100000,0x100000,0x20000,0x30000000,0x4000000,0x30000000,0x20000,0x0,0x40000000,0x0,0x20000,0x100000,0x20000,0x3ed0000,};
    }
-   private static void jj_la1_1() {
+   private static void jj_la1_init_1() {
       jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x3,0x0,0x3,0x0,0x0,0x0,0x0,};
    }
   final private JJCalls[] jj_2_rtns = new JJCalls[1];
   private boolean jj_rescan = false;
   private int jj_gc = 0;
 
+  /** Constructor with user supplied CharStream. */
   public QueryParser(CharStream stream) {
     token_source = new QueryParserTokenManager(stream);
     token = new Token();
@@ -1511,6 +1549,7 @@
     for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
   }
 
+  /** Reinitialise. */
   public void ReInit(CharStream stream) {
     token_source.ReInit(stream);
     token = new Token();
@@ -1520,6 +1559,7 @@
     for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
   }
 
+  /** Constructor with generated Token Manager. */
   public QueryParser(QueryParserTokenManager tm) {
     token_source = tm;
     token = new Token();
@@ -1529,6 +1569,7 @@
     for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
   }
 
+  /** Reinitialise. */
   public void ReInit(QueryParserTokenManager tm) {
     token_source = tm;
     token = new Token();
@@ -1538,7 +1579,7 @@
     for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
   }
 
-  final private Token jj_consume_token(int kind) throws ParseException {
+  private Token jj_consume_token(int kind) throws ParseException {
     Token oldToken;
     if ((oldToken = token).next != null) token = token.next;
     else token = token.next = token_source.getNextToken();
@@ -1564,7 +1605,7 @@
 
   static private final class LookaheadSuccess extends java.lang.Error { }
   final private LookaheadSuccess jj_ls = new LookaheadSuccess();
-  final private boolean jj_scan_token(int kind) {
+  private boolean jj_scan_token(int kind) {
     if (jj_scanpos == jj_lastpos) {
       jj_la--;
       if (jj_scanpos.next == null) {
@@ -1585,6 +1626,8 @@
     return false;
   }
 
+
+/** Get the next Token. */
   final public Token getNextToken() {
     if (token.next != null) token = token.next;
     else token = token.next = token_source.getNextToken();
@@ -1593,8 +1636,9 @@
     return token;
   }
 
+/** Get the specific Token. */
   final public Token getToken(int index) {
-    Token t = lookingAhead ? jj_scanpos : token;
+    Token t = token;
     for (int i = 0; i < index; i++) {
       if (t.next != null) t = t.next;
       else t = t.next = token_source.getNextToken();
@@ -1602,14 +1646,14 @@
     return t;
   }
 
-  final private int jj_ntk() {
+  private int jj_ntk() {
     if ((jj_nt=token.next) == null)
       return (jj_ntk = (token.next=token_source.getNextToken()).kind);
     else
       return (jj_ntk = jj_nt.kind);
   }
 
-  private java.util.Vector jj_expentries = new java.util.Vector();
+  private java.util.List jj_expentries = new java.util.ArrayList();
   private int[] jj_expentry;
   private int jj_kind = -1;
   private int[] jj_lasttokens = new int[100];
@@ -1624,31 +1668,26 @@
       for (int i = 0; i < jj_endpos; i++) {
         jj_expentry[i] = jj_lasttokens[i];
       }
-      boolean exists = false;
-      for (java.util.Enumeration e = jj_expentries.elements(); e.hasMoreElements();) {
-        int[] oldentry = (int[])(e.nextElement());
+      jj_entries_loop: for (java.util.Iterator it = jj_expentries.iterator(); it.hasNext();) {
+        int[] oldentry = (int[])(it.next());
         if (oldentry.length == jj_expentry.length) {
-          exists = true;
           for (int i = 0; i < jj_expentry.length; i++) {
             if (oldentry[i] != jj_expentry[i]) {
-              exists = false;
-              break;
+              continue jj_entries_loop;
             }
           }
-          if (exists) break;
+          jj_expentries.add(jj_expentry);
+          break jj_entries_loop;
         }
       }
-      if (!exists) jj_expentries.addElement(jj_expentry);
       if (pos != 0) jj_lasttokens[(jj_endpos = pos) - 1] = kind;
     }
   }
 
+  /** Generate ParseException. */
   public ParseException generateParseException() {
-    jj_expentries.removeAllElements();
+    jj_expentries.clear();
     boolean[] la1tokens = new boolean[34];
-    for (int i = 0; i < 34; i++) {
-      la1tokens[i] = false;
-    }
     if (jj_kind >= 0) {
       la1tokens[jj_kind] = true;
       jj_kind = -1;
@@ -1669,7 +1708,7 @@
       if (la1tokens[i]) {
         jj_expentry = new int[1];
         jj_expentry[0] = i;
-        jj_expentries.addElement(jj_expentry);
+        jj_expentries.add(jj_expentry);
       }
     }
     jj_endpos = 0;
@@ -1677,18 +1716,20 @@
     jj_add_error_token(0, 0);
     int[][] exptokseq = new int[jj_expentries.size()][];
     for (int i = 0; i < jj_expentries.size(); i++) {
-      exptokseq[i] = (int[])jj_expentries.elementAt(i);
+      exptokseq[i] = (int[])jj_expentries.get(i);
     }
     return new ParseException(token, exptokseq, tokenImage);
   }
 
+  /** Enable tracing. */
   final public void enable_tracing() {
   }
 
+  /** Disable tracing. */
   final public void disable_tracing() {
   }
 
-  final private void jj_rescan_token() {
+  private void jj_rescan_token() {
     jj_rescan = true;
     for (int i = 0; i < 1; i++) {
     try {
@@ -1707,7 +1748,7 @@
     jj_rescan = false;
   }
 
-  final private void jj_save(int index, int xla) {
+  private void jj_save(int index, int xla) {
     JJCalls p = jj_2_rtns[index];
     while (p.gen > jj_gen) {
       if (p.next == null) { p = p.next = new JJCalls(); break; }

Modified: lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.jj
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.jj?rev=696056&r1=696055&r2=696056&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.jj (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.jj Tue Sep 16 14:03:21 2008
@@ -28,6 +28,7 @@
 import java.io.IOException;
 import java.io.StringReader;
 import java.text.DateFormat;
+import java.text.Collator;
 import java.util.ArrayList;
 import java.util.Calendar;
 import java.util.Date;
@@ -159,6 +160,10 @@
   // maps field names to date resolutions
   Map fieldToDateResolution = null;
 
+  // The collator to use when determining range inclusion,
+  // for use when constructing RangeQuerys and ConstantScoreRangeQuerys.
+  Collator rangeCollator = null;
+
   /** The default operator for parsing queries. 
    * Use {@link QueryParser#setDefaultOperator} to change it.
    */
@@ -434,6 +439,35 @@
 
     return resolution;
   }
+  
+  /** 
+   * Sets the collator used to determine index term inclusion in ranges
+   * specified either for ConstantScoreRangeQuerys or RangeQuerys (if
+   * {@link #setUseOldRangeQuery(boolean)} is called with a <code>true</code>
+   * value.)
+   * <p/>
+   * <strong>WARNING:</strong> Setting the rangeCollator to a non-null
+   * collator using this method will cause every single index Term in the
+   * Field referenced by lowerTerm and/or upperTerm to be examined.
+   * Depending on the number of index Terms in this Field, the operation could
+   * be very slow.
+   *
+   *  @param rc  the collator to use when constructing RangeQuerys
+   *             and ConstantScoreRangeQuerys
+   */
+  public void setRangeCollator(Collator rc) {
+    rangeCollator = rc;
+  }
+  
+  /**
+   * @return the collator used to determine index term inclusion in ranges
+   *  specified either for ConstantScoreRangeQuerys or RangeQuerys (if
+   *  {@link #setUseOldRangeQuery(boolean)} is called with a <code>true</code>
+   *  value.)
+   */
+  public Collator getRangeCollator() {
+    return rangeCollator;
+  }
 
   /**
    * @deprecated use {@link #addClause(List, int, int, Query)} instead.
@@ -738,11 +772,12 @@
     {
       return new RangeQuery(new Term(field, part1),
                             new Term(field, part2),
-			    inclusive);
+                            inclusive, rangeCollator);
     }
     else
     {
-      return new ConstantScoreRangeQuery(field,part1,part2,inclusive,inclusive);
+      return new ConstantScoreRangeQuery
+        (field, part1, part2, inclusive, inclusive, rangeCollator);
     }
   }
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParserConstants.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParserConstants.java?rev=696056&r1=696055&r2=696056&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParserConstants.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParserConstants.java Tue Sep 16 14:03:21 2008
@@ -1,47 +1,90 @@
 /* Generated By:JavaCC: Do not edit this line. QueryParserConstants.java */
 package org.apache.lucene.queryParser;
 
+
+/**
+ * Token literal values and constants.
+ * Generated by org.javacc.parser.OtherFilesGen#start()
+ */
 public interface QueryParserConstants {
 
+  /** End of File. */
   int EOF = 0;
+  /** RegularExpression Id. */
   int _NUM_CHAR = 1;
+  /** RegularExpression Id. */
   int _ESCAPED_CHAR = 2;
+  /** RegularExpression Id. */
   int _TERM_START_CHAR = 3;
+  /** RegularExpression Id. */
   int _TERM_CHAR = 4;
+  /** RegularExpression Id. */
   int _WHITESPACE = 5;
+  /** RegularExpression Id. */
   int _QUOTED_CHAR = 6;
+  /** RegularExpression Id. */
   int AND = 8;
+  /** RegularExpression Id. */
   int OR = 9;
+  /** RegularExpression Id. */
   int NOT = 10;
+  /** RegularExpression Id. */
   int PLUS = 11;
+  /** RegularExpression Id. */
   int MINUS = 12;
+  /** RegularExpression Id. */
   int LPAREN = 13;
+  /** RegularExpression Id. */
   int RPAREN = 14;
+  /** RegularExpression Id. */
   int COLON = 15;
+  /** RegularExpression Id. */
   int STAR = 16;
+  /** RegularExpression Id. */
   int CARAT = 17;
+  /** RegularExpression Id. */
   int QUOTED = 18;
+  /** RegularExpression Id. */
   int TERM = 19;
+  /** RegularExpression Id. */
   int FUZZY_SLOP = 20;
+  /** RegularExpression Id. */
   int PREFIXTERM = 21;
+  /** RegularExpression Id. */
   int WILDTERM = 22;
+  /** RegularExpression Id. */
   int RANGEIN_START = 23;
+  /** RegularExpression Id. */
   int RANGEEX_START = 24;
+  /** RegularExpression Id. */
   int NUMBER = 25;
+  /** RegularExpression Id. */
   int RANGEIN_TO = 26;
+  /** RegularExpression Id. */
   int RANGEIN_END = 27;
+  /** RegularExpression Id. */
   int RANGEIN_QUOTED = 28;
+  /** RegularExpression Id. */
   int RANGEIN_GOOP = 29;
+  /** RegularExpression Id. */
   int RANGEEX_TO = 30;
+  /** RegularExpression Id. */
   int RANGEEX_END = 31;
+  /** RegularExpression Id. */
   int RANGEEX_QUOTED = 32;
+  /** RegularExpression Id. */
   int RANGEEX_GOOP = 33;
 
+  /** Lexical state. */
   int Boost = 0;
+  /** Lexical state. */
   int RangeEx = 1;
+  /** Lexical state. */
   int RangeIn = 2;
+  /** Lexical state. */
   int DEFAULT = 3;
 
+  /** Literal token values. */
   String[] tokenImage = {
     "<EOF>",
     "<_NUM_CHAR>",

Modified: lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java?rev=696056&r1=696055&r2=696056&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java Tue Sep 16 14:03:21 2008
@@ -3,6 +3,7 @@
 import java.io.IOException;
 import java.io.StringReader;
 import java.text.DateFormat;
+import java.text.Collator;
 import java.util.ArrayList;
 import java.util.Calendar;
 import java.util.Date;
@@ -30,9 +31,13 @@
 import org.apache.lucene.search.WildcardQuery;
 import org.apache.lucene.util.Parameter;
 
+/** Token Manager. */
 public class QueryParserTokenManager implements QueryParserConstants
 {
+
+  /** Debug output. */
   public  java.io.PrintStream debugStream = System.out;
+  /** Set debug output. */
   public  void setDebugStream(java.io.PrintStream ds) { debugStream = ds; }
 private final int jjStopStringLiteralDfa_3(int pos, long active0)
 {
@@ -46,21 +51,13 @@
 {
    return jjMoveNfa_3(jjStopStringLiteralDfa_3(pos, active0), pos + 1);
 }
-private final int jjStopAtPos(int pos, int kind)
+private int jjStopAtPos(int pos, int kind)
 {
    jjmatchedKind = kind;
    jjmatchedPos = pos;
    return pos + 1;
 }
-private final int jjStartNfaWithStates_3(int pos, int kind, int state)
-{
-   jjmatchedKind = kind;
-   jjmatchedPos = pos;
-   try { curChar = input_stream.readChar(); }
-   catch(java.io.IOException e) { return pos + 1; }
-   return jjMoveNfa_3(state, pos + 1);
-}
-private final int jjMoveStringLiteralDfa0_3()
+private int jjMoveStringLiteralDfa0_3()
 {
    switch(curChar)
    {
@@ -86,35 +83,13 @@
          return jjMoveNfa_3(0, 0);
    }
 }
-private final void jjCheckNAdd(int state)
+private int jjStartNfaWithStates_3(int pos, int kind, int state)
 {
-   if (jjrounds[state] != jjround)
-   {
-      jjstateSet[jjnewStateCnt++] = state;
-      jjrounds[state] = jjround;
-   }
-}
-private final void jjAddStates(int start, int end)
-{
-   do {
-      jjstateSet[jjnewStateCnt++] = jjnextStates[start];
-   } while (start++ != end);
-}
-private final void jjCheckNAddTwoStates(int state1, int state2)
-{
-   jjCheckNAdd(state1);
-   jjCheckNAdd(state2);
-}
-private final void jjCheckNAddStates(int start, int end)
-{
-   do {
-      jjCheckNAdd(jjnextStates[start]);
-   } while (start++ != end);
-}
-private final void jjCheckNAddStates(int start)
-{
-   jjCheckNAdd(jjnextStates[start]);
-   jjCheckNAdd(jjnextStates[start + 1]);
+   jjmatchedKind = kind;
+   jjmatchedPos = pos;
+   try { curChar = input_stream.readChar(); }
+   catch(java.io.IOException e) { return pos + 1; }
+   return jjMoveNfa_3(state, pos + 1);
 }
 static final long[] jjbitVec0 = {
    0xfffffffffffffffeL, 0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffffffffffffffL
@@ -122,14 +97,13 @@
 static final long[] jjbitVec2 = {
    0x0L, 0x0L, 0xffffffffffffffffL, 0xffffffffffffffffL
 };
-private final int jjMoveNfa_3(int startState, int curPos)
+private int jjMoveNfa_3(int startState, int curPos)
 {
-   int[] nextStates;
    int startsAt = 0;
    jjnewStateCnt = 36;
    int i = 1;
    jjstateSet[0] = startState;
-   int j, kind = 0x7fffffff;
+   int kind = 0x7fffffff;
    for (;;)
    {
       if (++jjround == 0x7fffffff)
@@ -137,7 +111,7 @@
       if (curChar < 64)
       {
          long l = 1L << curChar;
-         MatchLoop: do
+         do
          {
             switch(jjstateSet[--i])
             {
@@ -276,7 +250,7 @@
       else if (curChar < 128)
       {
          long l = 1L << (curChar & 077);
-         MatchLoop: do
+         do
          {
             switch(jjstateSet[--i])
             {
@@ -450,7 +424,7 @@
          long l1 = 1L << (hiByte & 077);
          int i2 = (curChar & 0xff) >> 6;
          long l2 = 1L << (curChar & 077);
-         MatchLoop: do
+         do
          {
             switch(jjstateSet[--i])
             {
@@ -545,15 +519,7 @@
 {
    return jjMoveNfa_1(jjStopStringLiteralDfa_1(pos, active0), pos + 1);
 }
-private final int jjStartNfaWithStates_1(int pos, int kind, int state)
-{
-   jjmatchedKind = kind;
-   jjmatchedPos = pos;
-   try { curChar = input_stream.readChar(); }
-   catch(java.io.IOException e) { return pos + 1; }
-   return jjMoveNfa_1(state, pos + 1);
-}
-private final int jjMoveStringLiteralDfa0_1()
+private int jjMoveStringLiteralDfa0_1()
 {
    switch(curChar)
    {
@@ -565,7 +531,7 @@
          return jjMoveNfa_1(0, 0);
    }
 }
-private final int jjMoveStringLiteralDfa1_1(long active0)
+private int jjMoveStringLiteralDfa1_1(long active0)
 {
    try { curChar = input_stream.readChar(); }
    catch(java.io.IOException e) {
@@ -583,14 +549,21 @@
    }
    return jjStartNfa_1(0, active0);
 }
-private final int jjMoveNfa_1(int startState, int curPos)
+private int jjStartNfaWithStates_1(int pos, int kind, int state)
+{
+   jjmatchedKind = kind;
+   jjmatchedPos = pos;
+   try { curChar = input_stream.readChar(); }
+   catch(java.io.IOException e) { return pos + 1; }
+   return jjMoveNfa_1(state, pos + 1);
+}
+private int jjMoveNfa_1(int startState, int curPos)
 {
-   int[] nextStates;
    int startsAt = 0;
    jjnewStateCnt = 7;
    int i = 1;
    jjstateSet[0] = startState;
-   int j, kind = 0x7fffffff;
+   int kind = 0x7fffffff;
    for (;;)
    {
       if (++jjround == 0x7fffffff)
@@ -598,7 +571,7 @@
       if (curChar < 64)
       {
          long l = 1L << curChar;
-         MatchLoop: do
+         do
          {
             switch(jjstateSet[--i])
             {
@@ -647,7 +620,7 @@
       else if (curChar < 128)
       {
          long l = 1L << (curChar & 077);
-         MatchLoop: do
+         do
          {
             switch(jjstateSet[--i])
             {
@@ -677,7 +650,7 @@
          long l1 = 1L << (hiByte & 077);
          int i2 = (curChar & 0xff) >> 6;
          long l2 = 1L << (curChar & 077);
-         MatchLoop: do
+         do
          {
             switch(jjstateSet[--i])
             {
@@ -710,18 +683,17 @@
       catch(java.io.IOException e) { return curPos; }
    }
 }
-private final int jjMoveStringLiteralDfa0_0()
+private int jjMoveStringLiteralDfa0_0()
 {
    return jjMoveNfa_0(0, 0);
 }
-private final int jjMoveNfa_0(int startState, int curPos)
+private int jjMoveNfa_0(int startState, int curPos)
 {
-   int[] nextStates;
    int startsAt = 0;
    jjnewStateCnt = 3;
    int i = 1;
    jjstateSet[0] = startState;
-   int j, kind = 0x7fffffff;
+   int kind = 0x7fffffff;
    for (;;)
    {
       if (++jjround == 0x7fffffff)
@@ -729,7 +701,7 @@
       if (curChar < 64)
       {
          long l = 1L << curChar;
-         MatchLoop: do
+         do
          {
             switch(jjstateSet[--i])
             {
@@ -758,7 +730,7 @@
       else if (curChar < 128)
       {
          long l = 1L << (curChar & 077);
-         MatchLoop: do
+         do
          {
             switch(jjstateSet[--i])
             {
@@ -773,7 +745,7 @@
          long l1 = 1L << (hiByte & 077);
          int i2 = (curChar & 0xff) >> 6;
          long l2 = 1L << (curChar & 077);
-         MatchLoop: do
+         do
          {
             switch(jjstateSet[--i])
             {
@@ -813,15 +785,7 @@
 {
    return jjMoveNfa_2(jjStopStringLiteralDfa_2(pos, active0), pos + 1);
 }
-private final int jjStartNfaWithStates_2(int pos, int kind, int state)
-{
-   jjmatchedKind = kind;
-   jjmatchedPos = pos;
-   try { curChar = input_stream.readChar(); }
-   catch(java.io.IOException e) { return pos + 1; }
-   return jjMoveNfa_2(state, pos + 1);
-}
-private final int jjMoveStringLiteralDfa0_2()
+private int jjMoveStringLiteralDfa0_2()
 {
    switch(curChar)
    {
@@ -833,7 +797,7 @@
          return jjMoveNfa_2(0, 0);
    }
 }
-private final int jjMoveStringLiteralDfa1_2(long active0)
+private int jjMoveStringLiteralDfa1_2(long active0)
 {
    try { curChar = input_stream.readChar(); }
    catch(java.io.IOException e) {
@@ -851,14 +815,21 @@
    }
    return jjStartNfa_2(0, active0);
 }
-private final int jjMoveNfa_2(int startState, int curPos)
+private int jjStartNfaWithStates_2(int pos, int kind, int state)
+{
+   jjmatchedKind = kind;
+   jjmatchedPos = pos;
+   try { curChar = input_stream.readChar(); }
+   catch(java.io.IOException e) { return pos + 1; }
+   return jjMoveNfa_2(state, pos + 1);
+}
+private int jjMoveNfa_2(int startState, int curPos)
 {
-   int[] nextStates;
    int startsAt = 0;
    jjnewStateCnt = 7;
    int i = 1;
    jjstateSet[0] = startState;
-   int j, kind = 0x7fffffff;
+   int kind = 0x7fffffff;
    for (;;)
    {
       if (++jjround == 0x7fffffff)
@@ -866,7 +837,7 @@
       if (curChar < 64)
       {
          long l = 1L << curChar;
-         MatchLoop: do
+         do
          {
             switch(jjstateSet[--i])
             {
@@ -915,7 +886,7 @@
       else if (curChar < 128)
       {
          long l = 1L << (curChar & 077);
-         MatchLoop: do
+         do
          {
             switch(jjstateSet[--i])
             {
@@ -945,7 +916,7 @@
          long l1 = 1L << (hiByte & 077);
          int i2 = (curChar & 0xff) >> 6;
          long l2 = 1L << (curChar & 077);
-         MatchLoop: do
+         do
          {
             switch(jjstateSet[--i])
             {
@@ -988,22 +959,28 @@
    {
       case 0:
          return ((jjbitVec2[i2] & l2) != 0L);
-      default : 
+      default :
          if ((jjbitVec0[i1] & l1) != 0L)
             return true;
          return false;
    }
 }
+
+/** Token literal values. */
 public static final String[] jjstrLiteralImages = {
 "", null, null, null, null, null, null, null, null, null, null, "\53", "\55", 
 "\50", "\51", "\72", "\52", "\136", null, null, null, null, null, "\133", "\173", 
 null, "\124\117", "\135", null, null, "\124\117", "\175", null, null, };
+
+/** Lexer state names. */
 public static final String[] lexStateNames = {
-   "Boost", 
-   "RangeEx", 
-   "RangeIn", 
-   "DEFAULT", 
+   "Boost",
+   "RangeEx",
+   "RangeIn",
+   "DEFAULT",
 };
+
+/** Lex State array. */
 public static final int[] jjnewLexState = {
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, 2, 1, 
    3, -1, 3, -1, -1, -1, 3, -1, -1, 
@@ -1018,13 +995,18 @@
 private final int[] jjrounds = new int[36];
 private final int[] jjstateSet = new int[72];
 protected char curChar;
+/** Constructor. */
 public QueryParserTokenManager(CharStream stream){
    input_stream = stream;
 }
+
+/** Constructor. */
 public QueryParserTokenManager(CharStream stream, int lexState){
    this(stream);
    SwitchTo(lexState);
 }
+
+/** Reinitialise parser. */
 public void ReInit(CharStream stream)
 {
    jjmatchedPos = jjnewStateCnt = 0;
@@ -1032,18 +1014,22 @@
    input_stream = stream;
    ReInitRounds();
 }
-private final void ReInitRounds()
+private void ReInitRounds()
 {
    int i;
    jjround = 0x80000001;
    for (i = 36; i-- > 0;)
       jjrounds[i] = 0x80000000;
 }
+
+/** Reinitialise parser. */
 public void ReInit(CharStream stream, int lexState)
 {
    ReInit(stream);
    SwitchTo(lexState);
 }
+
+/** Switch to specified lex state. */
 public void SwitchTo(int lexState)
 {
    if (lexState >= 4 || lexState < 0)
@@ -1054,14 +1040,25 @@
 
 protected Token jjFillToken()
 {
-   Token t = Token.newToken(jjmatchedKind);
-   t.kind = jjmatchedKind;
+   final Token t;
+   final String curTokenImage;
+   final int beginLine;
+   final int endLine;
+   final int beginColumn;
+   final int endColumn;
    String im = jjstrLiteralImages[jjmatchedKind];
-   t.image = (im == null) ? input_stream.GetImage() : im;
-   t.beginLine = input_stream.getBeginLine();
-   t.beginColumn = input_stream.getBeginColumn();
-   t.endLine = input_stream.getEndLine();
-   t.endColumn = input_stream.getEndColumn();
+   curTokenImage = (im == null) ? input_stream.GetImage() : im;
+   beginLine = input_stream.getBeginLine();
+   beginColumn = input_stream.getBeginColumn();
+   endLine = input_stream.getEndLine();
+   endColumn = input_stream.getEndColumn();
+   t = Token.newToken(jjmatchedKind, curTokenImage);
+
+   t.beginLine = beginLine;
+   t.endLine = endLine;
+   t.beginColumn = beginColumn;
+   t.endColumn = endColumn;
+
    return t;
 }
 
@@ -1072,22 +1069,21 @@
 int jjmatchedPos;
 int jjmatchedKind;
 
+/** Get the next Token. */
 public Token getNextToken() 
 {
-  int kind;
-  Token specialToken = null;
   Token matchedToken;
   int curPos = 0;
 
   EOFLoop :
   for (;;)
-  {   
-   try   
-   {     
+  {
+   try
+   {
       curChar = input_stream.BeginToken();
-   }     
+   }
    catch(java.io.IOException e)
-   {        
+   {
       jjmatchedKind = 0;
       matchedToken = jjFillToken();
       return matchedToken;
@@ -1157,4 +1153,31 @@
   }
 }
 
+private void jjCheckNAdd(int state)
+{
+   if (jjrounds[state] != jjround)
+   {
+      jjstateSet[jjnewStateCnt++] = state;
+      jjrounds[state] = jjround;
+   }
+}
+private void jjAddStates(int start, int end)
+{
+   do {
+      jjstateSet[jjnewStateCnt++] = jjnextStates[start];
+   } while (start++ != end);
+}
+private void jjCheckNAddTwoStates(int state1, int state2)
+{
+   jjCheckNAdd(state1);
+   jjCheckNAdd(state2);
+}
+
+private void jjCheckNAddStates(int start, int end)
+{
+   do {
+      jjCheckNAdd(jjnextStates[start]);
+   } while (start++ != end);
+}
+
 }

Modified: lucene/java/trunk/src/java/org/apache/lucene/queryParser/Token.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/queryParser/Token.java?rev=696056&r1=696055&r2=696056&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/queryParser/Token.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/queryParser/Token.java Tue Sep 16 14:03:21 2008
@@ -1,4 +1,5 @@
-/* Generated By:JavaCC: Do not edit this line. Token.java Version 3.0 */
+/* Generated By:JavaCC: Do not edit this line. Token.java Version 4.1 */
+/* JavaCCOptions:TOKEN_EXTENDS=,KEEP_LINE_COL=null */
 package org.apache.lucene.queryParser;
 
 /**
@@ -14,12 +15,14 @@
    */
   public int kind;
 
-  /**
-   * beginLine and beginColumn describe the position of the first character
-   * of this token; endLine and endColumn describe the position of the
-   * last character of this token.
-   */
-  public int beginLine, beginColumn, endLine, endColumn;
+  /** The line number of the first character of this Token. */
+  public int beginLine;
+  /** The column number of the first character of this Token. */
+  public int beginColumn;
+  /** The line number of the last character of this Token. */
+  public int endLine;
+  /** The column number of the last character of this Token. */
+  public int endColumn;
 
   /**
    * The string image of the token.
@@ -51,6 +54,40 @@
   public Token specialToken;
 
   /**
+   * An optional attribute value of the Token.
+   * Tokens which are not used as syntactic sugar will often contain
+   * meaningful values that will be used later on by the compiler or
+   * interpreter. This attribute value is often different from the image.
+   * Any subclass of Token that actually wants to return a non-null value can
+   * override this method as appropriate.
+   */
+  public Object getValue() {
+    return null;
+  }
+
+  /**
+   * No-argument constructor
+   */
+  public Token() {}
+
+  /**
+   * Constructs a new token for the specified Image.
+   */
+  public Token(int kind)
+  {
+     this(kind, null);
+  }
+
+  /**
+   * Constructs a new token for the specified Image and Kind.
+   */
+  public Token(int kind, String image)
+  {
+     this.kind = kind;
+     this.image = image;
+  }
+
+  /**
    * Returns the image.
    */
   public String toString()
@@ -63,19 +100,25 @@
    * can create and return subclass objects based on the value of ofKind.
    * Simply add the cases to the switch for all those special cases.
    * For example, if you have a subclass of Token called IDToken that
-   * you want to create if ofKind is ID, simlpy add something like :
+   * you want to create if ofKind is ID, simply add something like :
    *
-   *    case MyParserConstants.ID : return new IDToken();
+   *    case MyParserConstants.ID : return new IDToken(ofKind, image);
    *
    * to the following switch statement. Then you can cast matchedToken
-   * variable to the appropriate type and use it in your lexical actions.
+   * variable to the appropriate type and use sit in your lexical actions.
    */
-  public static final Token newToken(int ofKind)
+  public static Token newToken(int ofKind, String image)
   {
      switch(ofKind)
      {
-       default : return new Token();
+       default : return new Token(ofKind, image);
      }
   }
 
+  public static Token newToken(int ofKind)
+  {
+     return newToken(ofKind, null);
+  }
+
 }
+/* JavaCC - OriginalChecksum=c147cc166a7cf8812c7c39bc8c5eb868 (do not edit this line) */

Modified: lucene/java/trunk/src/java/org/apache/lucene/queryParser/TokenMgrError.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/queryParser/TokenMgrError.java?rev=696056&r1=696055&r2=696056&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/queryParser/TokenMgrError.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/queryParser/TokenMgrError.java Tue Sep 16 14:03:21 2008
@@ -1,19 +1,22 @@
-/* Generated By:JavaCC: Do not edit this line. TokenMgrError.java Version 3.0 */
+/* Generated By:JavaCC: Do not edit this line. TokenMgrError.java Version 4.1 */
+/* JavaCCOptions: */
 package org.apache.lucene.queryParser;
 
+/** Token Manager Error. */
 public class TokenMgrError extends Error
 {
+
    /*
     * Ordinals for various reasons why an Error of this type can be thrown.
     */
 
    /**
-    * Lexical error occured.
+    * Lexical error occurred.
     */
    static final int LEXICAL_ERROR = 0;
 
    /**
-    * An attempt wass made to create a second instance of a static token manager.
+    * An attempt was made to create a second instance of a static token manager.
     */
    static final int STATIC_LEXER_ERROR = 1;
 
@@ -34,7 +37,7 @@
    int errorCode;
 
    /**
-    * Replaces unprintable characters by their espaced (or unicode escaped)
+    * Replaces unprintable characters by their escaped (or unicode escaped)
     * equivalents in the given string
     */
    protected static final String addEscapes(String str) {
@@ -85,12 +88,12 @@
    /**
     * Returns a detailed message for the Error when it is thrown by the
     * token manager to indicate a lexical error.
-    * Parameters : 
-    *    EOFSeen     : indicates if EOF caused the lexicl error
-    *    curLexState : lexical state in which this error occured
-    *    errorLine   : line number when the error occured
-    *    errorColumn : column number when the error occured
-    *    errorAfter  : prefix that was seen before this error occured
+    * Parameters :
+    *    EOFSeen     : indicates if EOF caused the lexical error
+    *    curLexState : lexical state in which this error occurred
+    *    errorLine   : line number when the error occurred
+    *    errorColumn : column number when the error occurred
+    *    errorAfter  : prefix that was seen before this error occurred
     *    curchar     : the offending character
     * Note: You can customize the lexical error message by modifying this method.
     */
@@ -105,7 +108,7 @@
    /**
     * You can also modify the body of this method to customize your error messages.
     * For example, cases like LOOP_DETECTED and INVALID_LEXICAL_STATE are not
-    * of end-users concern, so you can return something like : 
+    * of end-users concern, so you can return something like :
     *
     *     "Internal Error : Please file a bug report .... "
     *
@@ -119,15 +122,19 @@
     * Constructors of various flavors follow.
     */
 
+   /** No arg constructor. */
    public TokenMgrError() {
    }
 
+   /** Constructor with message and reason. */
    public TokenMgrError(String message, int reason) {
       super(message);
       errorCode = reason;
    }
 
+   /** Full Constructor. */
    public TokenMgrError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar, int reason) {
       this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
    }
 }
+/* JavaCC - OriginalChecksum=186d5bcc64733844c7daab5ad5a6e349 (do not edit this line) */

Modified: lucene/java/trunk/src/java/org/apache/lucene/search/ConstantScoreRangeQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/ConstantScoreRangeQuery.java?rev=696056&r1=696055&r2=696056&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/ConstantScoreRangeQuery.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/ConstantScoreRangeQuery.java Tue Sep 16 14:03:21 2008
@@ -20,6 +20,7 @@
 import org.apache.lucene.index.IndexReader;
 
 import java.io.IOException;
+import java.text.Collator;
 
 /**
  * A range query that returns a constant score equal to its boost for
@@ -42,6 +43,7 @@
   private final String upperVal;
   private final boolean includeLower;
   private final boolean includeUpper;
+  private Collator collator;
 
 
   public ConstantScoreRangeQuery(String fieldName, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper)
@@ -65,6 +67,14 @@
     this.includeUpper = includeUpper;
   }
 
+  public ConstantScoreRangeQuery(String fieldName, String lowerVal,
+                                 String upperVal, boolean includeLower,
+                                 boolean includeUpper, Collator collator)
+  {
+    this(fieldName, lowerVal, upperVal, includeLower, includeUpper);
+    this.collator = collator;
+  }
+
   /** Returns the field name for this query */
   public String getField() { return fieldName; }
   /** Returns the value of the lower endpoint of this range query, null if open ended */
@@ -78,9 +88,10 @@
 
   public Query rewrite(IndexReader reader) throws IOException {
     // Map to RangeFilter semantics which are slightly different...
-    RangeFilter rangeFilt = new RangeFilter(fieldName,
-            lowerVal!=null?lowerVal:"",
-            upperVal, lowerVal==""?false:includeLower, upperVal==null?false:includeUpper);
+    RangeFilter rangeFilt = new RangeFilter
+        (fieldName, lowerVal != null?lowerVal:"", upperVal,
+         lowerVal==""?false:includeLower, upperVal==null?false:includeUpper,
+         collator);
     Query q = new ConstantScoreQuery(rangeFilt);
     q.setBoost(getBoost());
     return q;
@@ -117,6 +128,7 @@
         if (this.fieldName != other.fieldName  // interned comparison
             || this.includeLower != other.includeLower
             || this.includeUpper != other.includeUpper
+            || (this.collator != null && ! this.collator.equals(other.collator))
            ) { return false; }
         if (this.lowerVal != null ? !this.lowerVal.equals(other.lowerVal) : other.lowerVal != null) return false;
         if (this.upperVal != null ? !this.upperVal.equals(other.upperVal) : other.upperVal != null) return false;
@@ -134,6 +146,7 @@
       h ^= (upperVal != null ? (upperVal.hashCode()) : 0x5a695a69);
       h ^= (includeLower ? 0x665599aa : 0)
          ^ (includeUpper ? 0x99aa5566 : 0);
+      h ^= collator != null ? collator.hashCode() : 0;
       return h;
     }
 }

Modified: lucene/java/trunk/src/java/org/apache/lucene/search/RangeFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/RangeFilter.java?rev=696056&r1=696055&r2=696056&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/RangeFilter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/RangeFilter.java Tue Sep 16 14:03:21 2008
@@ -25,6 +25,7 @@
 
 import java.io.IOException;
 import java.util.BitSet;
+import java.text.Collator;
 
 /**
  * A Filter that restricts search results to a range of values in a given
@@ -42,8 +43,9 @@
     private String upperTerm;
     private boolean includeLower;
     private boolean includeUpper;
+    private Collator collator;
 
-    /**
+  /**
      * @param fieldName The field this range applies to
      * @param lowerTerm The lower bound on this range
      * @param upperTerm The upper bound on this range
@@ -74,7 +76,31 @@
                 ("The upper bound must be non-null to be inclusive");
         }
     }
-    
+
+    /**
+     * <strong>WARNING:</strong> Using this constructor and supplying a non-null
+     * value in the <code>collator</code> parameter will cause every single 
+     * index Term in the Field referenced by lowerTerm and/or upperTerm to be
+     * examined.  Depending on the number of index Terms in this Field, the 
+     * operation could be very slow.
+     *
+     * @param lowerTerm The lower bound on this range
+     * @param upperTerm The upper bound on this range
+     * @param includeLower Does this range include the lower bound?
+     * @param includeUpper Does this range include the upper bound?
+     * @param collator The collator to use when determining range inclusion; set
+     *  to null to use Unicode code point ordering instead of collation.
+     * @throws IllegalArgumentException if both terms are null or if
+     *  lowerTerm is null and includeLower is true (similar for upperTerm
+     *  and includeUpper)
+     */
+    public RangeFilter(String fieldName, String lowerTerm, String upperTerm,
+                       boolean includeLower, boolean includeUpper,
+                       Collator collator) {
+        this(fieldName, lowerTerm, upperTerm, includeLower, includeUpper);
+        this.collator = collator;
+    }
+
     /**
      * Constructs a filter for field <code>fieldName</code> matching
      * less than or equal to <code>upperTerm</code>.
@@ -100,7 +126,7 @@
     public BitSet bits(IndexReader reader) throws IOException {
         BitSet bits = new BitSet(reader.maxDoc());
         TermEnum enumerator =
-            (null != lowerTerm
+            (null != lowerTerm && collator == null
              ? reader.terms(new Term(fieldName, lowerTerm))
              : reader.terms(new Term(fieldName)));
         
@@ -110,40 +136,61 @@
                 return bits;
             }
             
-            boolean checkLower = false;
-            if (!includeLower) // make adjustments to set to exclusive
-                checkLower = true;
-        
             TermDocs termDocs = reader.termDocs();
             try {
-                
-                do {
-                    Term term = enumerator.term();
-                    if (term != null && term.field().equals(fieldName)) {
-                        if (!checkLower || null==lowerTerm || term.text().compareTo(lowerTerm) > 0) {
-                            checkLower = false;
-                            if (upperTerm != null) {
-                                int compare = upperTerm.compareTo(term.text());
-                                /* if beyond the upper term, or is exclusive and
-                                 * this is equal to the upper term, break out */
-                                if ((compare < 0) ||
-                                    (!includeUpper && compare==0)) {
-                                    break;
+                if (collator != null) {
+                    do {
+                        Term term = enumerator.term();
+                        if (term != null && term.field().equals(fieldName)) {
+                            if ((lowerTerm == null
+                                 || (includeLower
+                                     ? collator.compare(term.text(), lowerTerm) >= 0
+                                     : collator.compare(term.text(), lowerTerm) > 0))
+                                && (upperTerm == null
+                                    || (includeUpper
+                                        ? collator.compare(term.text(), upperTerm) <= 0
+                                        : collator.compare(term.text(), upperTerm) < 0))) {
+                              /* we have a good term, find the docs */
+                                termDocs.seek(enumerator.term());
+                                while (termDocs.next()) {
+                                    bits.set(termDocs.doc());
                                 }
                             }
-                            /* we have a good term, find the docs */
+                        }
+                    }
+                    while (enumerator.next());
+                } else { // collator is null - use Unicode code point ordering
+                    boolean checkLower = false;
+                    if (!includeLower) // make adjustments to set to exclusive
+                        checkLower = true;
+       
+                    do {
+                        Term term = enumerator.term();
+                        if (term != null && term.field().equals(fieldName)) {
+                            if (!checkLower || null==lowerTerm || term.text().compareTo(lowerTerm) > 0) {
+                                checkLower = false;
+                                if (upperTerm != null) {
+                                    int compare = upperTerm.compareTo(term.text());
+                                    /* if beyond the upper term, or is exclusive and
+                                     * this is equal to the upper term, break out */
+                                    if ((compare < 0) ||
+                                        (!includeUpper && compare==0)) {
+                                        break;
+                                    }
+                                }
+                                /* we have a good term, find the docs */
                             
-                            termDocs.seek(enumerator.term());
-                            while (termDocs.next()) {
-                                bits.set(termDocs.doc());
+                                termDocs.seek(enumerator.term());
+                                while (termDocs.next()) {
+                                    bits.set(termDocs.doc());
+                                }
                             }
+                        } else {
+                            break;
                         }
-                    } else {
-                        break;
                     }
+                    while (enumerator.next());
                 }
-                while (enumerator.next());
-                
             } finally {
                 termDocs.close();
             }
@@ -162,7 +209,7 @@
         OpenBitSet bits = new OpenBitSet(reader.maxDoc());
         
         TermEnum enumerator =
-            (null != lowerTerm
+            (null != lowerTerm && collator == null
              ? reader.terms(new Term(fieldName, lowerTerm))
              : reader.terms(new Term(fieldName)));
         
@@ -171,40 +218,63 @@
             if (enumerator.term() == null) {
                 return bits;
             }
-            
-            boolean checkLower = false;
-            if (!includeLower) // make adjustments to set to exclusive
-                checkLower = true;
-        
+
             TermDocs termDocs = reader.termDocs();
+
             try {
-                
-                do {
-                    Term term = enumerator.term();
-                    if (term != null && term.field().equals(fieldName)) {
-                        if (!checkLower || null==lowerTerm || term.text().compareTo(lowerTerm) > 0) {
-                            checkLower = false;
-                            if (upperTerm != null) {
-                                int compare = upperTerm.compareTo(term.text());
-                                /* if beyond the upper term, or is exclusive and
-                                 * this is equal to the upper term, break out */
-                                if ((compare < 0) ||
-                                    (!includeUpper && compare==0)) {
-                                    break;
+                if (collator != null) {
+                    do {
+                        Term term = enumerator.term();
+                        if (term != null && term.field().equals(fieldName)) {
+                            if ((lowerTerm == null
+                                 || (includeLower
+                                     ? collator.compare(term.text(), lowerTerm) >= 0
+                                     : collator.compare(term.text(), lowerTerm) > 0))
+                                && (upperTerm == null
+                                    || (includeUpper
+                                        ? collator.compare(term.text(), upperTerm) <= 0
+                                        : collator.compare(term.text(), upperTerm) < 0))) {
+                                /* we have a good term, find the docs */
+                                termDocs.seek(enumerator.term());
+                                while (termDocs.next()) {
+                                    bits.set(termDocs.doc());
                                 }
                             }
-                            /* we have a good term, find the docs */
+                        }
+                    }
+                    while (enumerator.next());
+                } else { // collator is null - use Unicode code point ordering
+                    boolean checkLower = false;
+                    if (!includeLower) // make adjustments to set to exclusive
+                        checkLower = true;
+        
+                    do {
+                        Term term = enumerator.term();
+                        if (term != null && term.field().equals(fieldName)) {
+                            if (!checkLower || null==lowerTerm || term.text().compareTo(lowerTerm) > 0) {
+                                checkLower = false;
+                                if (upperTerm != null) {
+                                    int compare = upperTerm.compareTo(term.text());
+                                    /* if beyond the upper term, or is exclusive and
+                                     * this is equal to the upper term, break out */
+                                    if ((compare < 0) ||
+                                        (!includeUpper && compare==0)) {
+                                        break;
+                                    }
+                                }
+                                /* we have a good term, find the docs */
                             
-                            termDocs.seek(enumerator.term());
-                            while (termDocs.next()) {
-                                bits.set(termDocs.doc());
+                                termDocs.seek(enumerator.term());
+                                while (termDocs.next()) {
+                                    bits.set(termDocs.doc());
+                                }
                             }
+                        } else {
+                            break;
                         }
-                    } else {
-                        break;
                     }
+                    while (enumerator.next());
                 }
-                while (enumerator.next());
                 
             } finally {
                 termDocs.close();
@@ -241,6 +311,7 @@
         if (!this.fieldName.equals(other.fieldName)
             || this.includeLower != other.includeLower
             || this.includeUpper != other.includeUpper
+            || (this.collator != null && ! this.collator.equals(other.collator))
            ) { return false; }
         if (this.lowerTerm != null ? !this.lowerTerm.equals(other.lowerTerm) : other.lowerTerm != null) return false;
         if (this.upperTerm != null ? !this.upperTerm.equals(other.upperTerm) : other.upperTerm != null) return false;
@@ -255,6 +326,7 @@
       h ^= (upperTerm != null ? (upperTerm.hashCode()) : 0x91BEC2C2);
       h ^= (includeLower ? 0xD484B933 : 0)
          ^ (includeUpper ? 0x6AE423AC : 0);
+      h ^= collator != null ? collator.hashCode() : 0;
       return h;
     }
 }

Modified: lucene/java/trunk/src/java/org/apache/lucene/search/RangeQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/RangeQuery.java?rev=696056&r1=696055&r2=696056&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/RangeQuery.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/RangeQuery.java Tue Sep 16 14:03:21 2008
@@ -18,6 +18,7 @@
  */
 
 import java.io.IOException;
+import java.text.Collator;
 
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermEnum;
@@ -46,12 +47,18 @@
     private Term lowerTerm;
     private Term upperTerm;
     private boolean inclusive;
+    private Collator collator;
 
     /** Constructs a query selecting all terms greater than
      * <code>lowerTerm</code> but less than <code>upperTerm</code>.
      * There must be at least one term and either term may be null,
      * in which case there is no bound on that side, but if there are
      * two terms, both terms <b>must</b> be for the same field.
+     *
+     * @param lowerTerm The Term at the lower end of the range
+     * @param upperTerm The Term at the upper end of the range
+     * @param inclusive If true, both <code>lowerTerm</code> and
+     *  <code>upperTerm</code> will themselves be included in the range.
      */
     public RangeQuery(Term lowerTerm, Term upperTerm, boolean inclusive)
     {
@@ -76,48 +83,109 @@
         this.inclusive = inclusive;
     }
 
+    /** Constructs a query selecting all terms greater than
+     * <code>lowerTerm</code> but less than <code>upperTerm</code>.
+     * There must be at least one term and either term may be null,
+     * in which case there is no bound on that side, but if there are
+     * two terms, both terms <b>must</b> be for the same field.
+     * <p>
+     * If <code>collator</code> is not null, it will be used to decide whether
+     * index terms are within the given range, rather than using the Unicode code
+     * point order in which index terms are stored.
+     * <p>
+     * <strong>WARNING:</strong> Using this constructor and supplying a non-null
+     * value in the <code>collator</code> parameter will cause every single 
+     * index Term in the Field referenced by lowerTerm and/or upperTerm to be
+     * examined.  Depending on the number of index Terms in this Field, the 
+     * operation could be very slow.
+     *
+     * @param lowerTerm The Term at the lower end of the range
+     * @param upperTerm The Term at the upper end of the range
+     * @param inclusive If true, both <code>lowerTerm</code> and
+     *  <code>upperTerm</code> will themselves be included in the range.
+     * @param collator The collator to use to collate index Terms, to determine
+     *  their membership in the range bounded by <code>lowerTerm</code> and
+     *  <code>upperTerm</code>.
+     */
+    public RangeQuery(Term lowerTerm, Term upperTerm, boolean inclusive,
+                      Collator collator)
+    {
+        this(lowerTerm, upperTerm, inclusive);
+        this.collator = collator;
+    }
+
     public Query rewrite(IndexReader reader) throws IOException {
 
         BooleanQuery query = new BooleanQuery(true);
-        TermEnum enumerator = reader.terms(lowerTerm);
+        String testField = getField();
+        if (collator != null) {
+            TermEnum enumerator = reader.terms(new Term(testField, ""));
+            String lowerTermText = lowerTerm != null ? lowerTerm.text() : null;
+            String upperTermText = upperTerm != null ? upperTerm.text() : null;
+
+            try {
+                do {
+                    Term term = enumerator.term();
+                    if (term != null && term.field() == testField) { // interned comparison
+                        if ((lowerTermText == null
+                             || (inclusive ? collator.compare(term.text(), lowerTermText) >= 0
+                                           : collator.compare(term.text(), lowerTermText) > 0))
+                            && (upperTermText == null
+                                || (inclusive ? collator.compare(term.text(), upperTermText) <= 0
+                                              : collator.compare(term.text(), upperTermText) < 0))) {
+                            addTermToQuery(term, query);
+                        }
+                    }
+                }
+                while (enumerator.next());
+            }
+            finally {
+                enumerator.close();
+            }
+        }
+        else { // collator is null
+            TermEnum enumerator = reader.terms(lowerTerm);
 
-        try {
+            try {
 
-            boolean checkLower = false;
-            if (!inclusive) // make adjustments to set to exclusive
-                checkLower = true;
-
-            String testField = getField();
-
-            do {
-                Term term = enumerator.term();
-                if (term != null && term.field() == testField) { // interned comparison
-                    if (!checkLower || term.text().compareTo(lowerTerm.text()) > 0) {
-                        checkLower = false;
-                        if (upperTerm != null) {
-                            int compare = upperTerm.text().compareTo(term.text());
-                            /* if beyond the upper term, or is exclusive and
-                             * this is equal to the upper term, break out */
-                            if ((compare < 0) || (!inclusive && compare == 0))
-                                break;
+                boolean checkLower = false;
+                if (!inclusive) // make adjustments to set to exclusive
+                    checkLower = true;
+
+                do {
+                    Term term = enumerator.term();
+                    if (term != null && term.field() == testField) { // interned comparison
+                        if (!checkLower || term.text().compareTo(lowerTerm.text()) > 0) {
+                            checkLower = false;
+                            if (upperTerm != null) {
+                                int compare = upperTerm.text().compareTo(term.text());
+                                /* if beyond the upper term, or is exclusive and
+                                 * this is equal to the upper term, break out */
+                                if ((compare < 0) || (!inclusive && compare == 0))
+                                    break;
+                            }
+                            addTermToQuery(term, query); // Found a match
                         }
-                        TermQuery tq = new TermQuery(term); // found a match
-                        tq.setBoost(getBoost()); // set the boost
-                        query.add(tq, BooleanClause.Occur.SHOULD); // add to query
+                    }
+                    else {
+                        break;
                     }
                 }
-                else {
-                    break;
-                }
+                while (enumerator.next());
+            }
+            finally {
+                enumerator.close();
             }
-            while (enumerator.next());
-        }
-        finally {
-            enumerator.close();
         }
         return query;
     }
 
+    private void addTermToQuery(Term term, BooleanQuery query) {
+        TermQuery tq = new TermQuery(term);
+        tq.setBoost(getBoost()); // set the boost
+        query.add(tq, BooleanClause.Occur.SHOULD); // add to query
+    }
+
     /** Returns the field name for this query */
     public String getField() {
       return (lowerTerm != null ? lowerTerm.field() : upperTerm.field());
@@ -132,6 +200,9 @@
     /** Returns <code>true</code> if the range query is inclusive */
     public boolean isInclusive() { return inclusive; }
 
+    /** Returns the collator used to determine range inclusion, if any. */
+    public Collator getCollator() { return collator; }
+
 
     /** Prints a user-readable version of this query. */
     public String toString(String field)
@@ -159,6 +230,9 @@
         final RangeQuery other = (RangeQuery) o;
         if (this.getBoost() != other.getBoost()) return false;
         if (this.inclusive != other.inclusive) return false;
+        if (this.collator != null && ! this.collator.equals(other.collator)) 
+            return false;
+
         // one of lowerTerm and upperTerm can be null
         if (this.lowerTerm != null ? !this.lowerTerm.equals(other.lowerTerm) : other.lowerTerm != null) return false;
         if (this.upperTerm != null ? !this.upperTerm.equals(other.upperTerm) : other.upperTerm != null) return false;
@@ -174,6 +248,7 @@
       h ^= (h << 25) | (h >>> 8);
       h ^= upperTerm != null ? upperTerm.hashCode() : 0;
       h ^= this.inclusive ? 0x2742E74A : 0;
+      h ^= collator != null ? collator.hashCode() : 0; 
       return h;
     }
 }

Modified: lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java?rev=696056&r1=696055&r2=696056&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java Tue Sep 16 14:03:21 2008
@@ -20,6 +20,7 @@
 import java.io.IOException;
 import java.io.Reader;
 import java.text.DateFormat;
+import java.text.Collator;
 import java.util.Calendar;
 import java.util.Date;
 import java.util.Locale;
@@ -429,6 +430,51 @@
     assertQueryEquals("( bar blar { a TO z}) ", null, "bar blar {a TO z}");
     assertQueryEquals("gack ( bar blar { a TO z}) ", null, "gack (bar blar {a TO z})");
   }
+    
+  public void testFarsiRangeCollating() throws Exception {
+    
+    RAMDirectory ramDir = new RAMDirectory();
+    IndexWriter iw = new IndexWriter(ramDir, new WhitespaceAnalyzer(), true, 
+                                     IndexWriter.MaxFieldLength.LIMITED);
+    Document doc = new Document();
+    doc.add(new Field("content","\u0633\u0627\u0628", 
+                      Field.Store.YES, Field.Index.UN_TOKENIZED));
+    iw.addDocument(doc);
+    iw.close();
+    IndexSearcher is = new IndexSearcher(ramDir);
+
+    QueryParser qp = new QueryParser("content", new WhitespaceAnalyzer());
+
+    // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
+    // RuleBasedCollator.  However, the Arabic Locale seems to order the Farsi
+    // characters properly.
+    Collator c = Collator.getInstance(new Locale("ar"));
+    qp.setRangeCollator(c);
+
+    // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
+    // orders the U+0698 character before the U+0633 character, so the single
+    // index Term below should NOT be returned by a ConstantScoreRangeQuery
+    // with a Farsi Collator (or an Arabic one for the case when Farsi is not
+    // supported).
+      
+    // Test ConstantScoreRangeQuery
+    qp.setUseOldRangeQuery(false);
+    ScoreDoc[] result = is.search(qp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs;
+    assertEquals("The index Term should not be included.", 0, result.length);
+
+    result = is.search(qp.parse("[ \u0633 TO \u0638 ]"), null, 1000).scoreDocs;
+    assertEquals("The index Term should be included.", 1, result.length);
+
+    // Test RangeQuery
+    qp.setUseOldRangeQuery(true);
+    result = is.search(qp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs;
+    assertEquals("The index Term should not be included.", 0, result.length);
+
+    result = is.search(qp.parse("[ \u0633 TO \u0638 ]"), null, 1000).scoreDocs;
+    assertEquals("The index Term should be included.", 1, result.length);
+
+    is.close();
+  }
   
   /** for testing legacy DateField support */
   private String getLegacyDate(String s) throws Exception {

Modified: lucene/java/trunk/src/test/org/apache/lucene/search/BaseTestRangeFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/BaseTestRangeFilter.java?rev=696056&r1=696055&r2=696056&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/BaseTestRangeFilter.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/BaseTestRangeFilter.java Tue Sep 16 14:03:21 2008
@@ -32,12 +32,30 @@
     public static final boolean F = false;
     public static final boolean T = true;
     
-    RAMDirectory index = new RAMDirectory();
     Random rand = new Random(101); // use a set seed to test is deterministic
-    
-    int maxR = Integer.MIN_VALUE;
-    int minR = Integer.MAX_VALUE;
 
+    /** 
+     * Collation interacts badly with hyphens -- collation produces different
+     * ordering than Unicode code-point ordering -- so two indexes are created:
+     * one which can't have negative random integers, for testing collated 
+     * ranges, and the other which can have negative random integers, for all
+     * other tests. 
+     */
+    class TestIndex { 
+        int maxR;
+        int minR;
+        boolean allowNegativeRandomInts;
+        RAMDirectory index = new RAMDirectory();
+
+        TestIndex(int minR, int maxR, boolean allowNegativeRandomInts) {
+            this.minR = minR;
+            this.maxR = maxR;
+            this.allowNegativeRandomInts = allowNegativeRandomInts;
+        }
+    }
+    TestIndex signedIndex = new TestIndex(Integer.MAX_VALUE, Integer.MIN_VALUE, true);
+    TestIndex unsignedIndex = new TestIndex(Integer.MAX_VALUE, 0, false);
+    
     int minId = 0;
     int maxId = 10000;
 
@@ -65,28 +83,31 @@
 
     public BaseTestRangeFilter(String name) {
 	super(name);
-        build();
+        build(signedIndex);
+        build(unsignedIndex);
     }
     public BaseTestRangeFilter() {
-        build();
+        build(signedIndex);
+        build(unsignedIndex);
     }
     
-    private void build() {
+    private void build(TestIndex index) {
         try {
             
             /* build an index */
-            IndexWriter writer = new IndexWriter(index, new SimpleAnalyzer(), T, 
+            IndexWriter writer = new IndexWriter(index.index, new SimpleAnalyzer(), T, 
                                                  IndexWriter.MaxFieldLength.LIMITED);
 
           for (int d = minId; d <= maxId; d++) {
                 Document doc = new Document();
                 doc.add(new Field("id",pad(d), Field.Store.YES, Field.Index.NOT_ANALYZED));
-                int r= rand.nextInt();
-                if (maxR < r) {
-                    maxR = r;
+                int r= index.allowNegativeRandomInts 
+                       ? rand.nextInt() : rand.nextInt(Integer.MAX_VALUE);
+                if (index.maxR < r) {
+                    index.maxR = r;
                 }
-                if (r < minR) {
-                    minR = r;
+                  if (r < index.minR) {
+                    index.minR = r;
                 }
                 doc.add(new Field("rand",pad(r), Field.Store.YES, Field.Index.NOT_ANALYZED));
                 doc.add(new Field("body","body", Field.Store.YES, Field.Index.NOT_ANALYZED));

Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestConstantScoreRangeQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestConstantScoreRangeQuery.java?rev=696056&r1=696055&r2=696056&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestConstantScoreRangeQuery.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestConstantScoreRangeQuery.java Tue Sep 16 14:03:21 2008
@@ -18,6 +18,7 @@
  */
 
 import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.analysis.SimpleAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexReader;
@@ -27,6 +28,8 @@
 import org.apache.lucene.store.RAMDirectory;
 
 import java.io.IOException;
+import java.text.Collator;
+import java.util.Locale;
 
 import junit.framework.Assert;
 
@@ -92,12 +95,25 @@
         return new ConstantScoreRangeQuery(f,l,h,il,ih);
     }
 
+    /** macro for readability */
+    public static Query csrq(String f, String l, String h,
+                             boolean il, boolean ih, Collator c) {
+        return new ConstantScoreRangeQuery(f,l,h,il,ih,c);
+    }
+
     public void testBasics() throws IOException {
       QueryUtils.check(csrq("data","1","6",T,T));
       QueryUtils.check(csrq("data","A","Z",T,T));
       QueryUtils.checkUnequal(csrq("data","1","6",T,T), csrq("data","A","Z",T,T));
     }
 
+    public void testBasicsCollating() throws IOException {
+      Collator c = Collator.getInstance(Locale.ENGLISH);
+      QueryUtils.check(csrq("data","1","6",T,T,c));
+      QueryUtils.check(csrq("data","A","Z",T,T,c));
+      QueryUtils.checkUnequal(csrq("data","1","6",T,T,c), csrq("data","A","Z",T,T,c));
+    }
+
     public void testEqualScores() throws IOException {
         // NOTE: uses index build in *this* setUp
         
@@ -205,7 +221,7 @@
     public void testRangeQueryId() throws IOException {
         // NOTE: uses index build in *super* setUp
 
-        IndexReader reader = IndexReader.open(index);
+        IndexReader reader = IndexReader.open(signedIndex.index);
 	IndexSearcher search = new IndexSearcher(reader);
 
         int medId = ((maxId - minId) / 2);
@@ -284,21 +300,105 @@
         
     }
 
+  
+  public void testRangeQueryIdCollating() throws IOException {
+    // NOTE: uses index build in *super* setUp
+
+    IndexReader reader = IndexReader.open(signedIndex.index);
+    IndexSearcher search = new IndexSearcher(reader);
+
+    int medId = ((maxId - minId) / 2);
+        
+    String minIP = pad(minId);
+    String maxIP = pad(maxId);
+    String medIP = pad(medId);
+    
+    int numDocs = reader.numDocs();
+        
+    assertEquals("num of docs", numDocs, 1+ maxId - minId);
+        
+    ScoreDoc[] result;
+        
+    Collator c = Collator.getInstance(Locale.ENGLISH);
+
+    // test id, bounded on both ends
+        
+    result = search.search(csrq("id",minIP,maxIP,T,T,c), null, numDocs).scoreDocs;
+    assertEquals("find all", numDocs, result.length);
+
+    result = search.search(csrq("id",minIP,maxIP,T,F,c), null, numDocs).scoreDocs;
+    assertEquals("all but last", numDocs-1, result.length);
+
+    result = search.search(csrq("id",minIP,maxIP,F,T,c), null, numDocs).scoreDocs;
+    assertEquals("all but first", numDocs-1, result.length);
+        
+    result = search.search(csrq("id",minIP,maxIP,F,F,c), null, numDocs).scoreDocs;
+    assertEquals("all but ends", numDocs-2, result.length);
+    
+    result = search.search(csrq("id",medIP,maxIP,T,T,c), null, numDocs).scoreDocs;
+    assertEquals("med and up", 1+ maxId-medId, result.length);
+        
+    result = search.search(csrq("id",minIP,medIP,T,T,c), null, numDocs).scoreDocs;
+    assertEquals("up to med", 1+ medId-minId, result.length);
+
+    // unbounded id
+
+    result = search.search(csrq("id",minIP,null,T,F,c), null, numDocs).scoreDocs;
+    assertEquals("min and up", numDocs, result.length);
+
+    result = search.search(csrq("id",null,maxIP,F,T,c), null, numDocs).scoreDocs;
+    assertEquals("max and down", numDocs, result.length);
+
+    result = search.search(csrq("id",minIP,null,F,F,c), null, numDocs).scoreDocs;
+    assertEquals("not min, but up", numDocs-1, result.length);
+        
+    result = search.search(csrq("id",null,maxIP,F,F,c), null, numDocs).scoreDocs;
+    assertEquals("not max, but down", numDocs-1, result.length);
+        
+    result = search.search(csrq("id",medIP,maxIP,T,F,c), null, numDocs).scoreDocs;
+    assertEquals("med and up, not max", maxId-medId, result.length);
+        
+    result = search.search(csrq("id",minIP,medIP,F,T,c), null, numDocs).scoreDocs;
+    assertEquals("not min, up to med", medId-minId, result.length);
+
+    // very small sets
+
+    result = search.search(csrq("id",minIP,minIP,F,F,c), null, numDocs).scoreDocs;
+    assertEquals("min,min,F,F,c", 0, result.length);
+    result = search.search(csrq("id",medIP,medIP,F,F,c), null, numDocs).scoreDocs;
+    assertEquals("med,med,F,F,c", 0, result.length);
+    result = search.search(csrq("id",maxIP,maxIP,F,F,c), null, numDocs).scoreDocs;
+    assertEquals("max,max,F,F,c", 0, result.length);
+                     
+    result = search.search(csrq("id",minIP,minIP,T,T,c), null, numDocs).scoreDocs;
+    assertEquals("min,min,T,T,c", 1, result.length);
+    result = search.search(csrq("id",null,minIP,F,T,c), null, numDocs).scoreDocs;
+    assertEquals("nul,min,F,T,c", 1, result.length);
+
+    result = search.search(csrq("id",maxIP,maxIP,T,T,c), null, numDocs).scoreDocs;
+    assertEquals("max,max,T,T,c", 1, result.length);
+    result = search.search(csrq("id",maxIP,null,T,F,c), null, numDocs).scoreDocs;
+    assertEquals("max,nul,T,T,c", 1, result.length);
+
+    result = search.search(csrq("id",medIP,medIP,T,T,c), null, numDocs).scoreDocs;
+    assertEquals("med,med,T,T,c", 1, result.length);
+  }
+    
+  
     public void testRangeQueryRand() throws IOException {
         // NOTE: uses index build in *super* setUp
 
-        IndexReader reader = IndexReader.open(index);
+        IndexReader reader = IndexReader.open(signedIndex.index);
 	IndexSearcher search = new IndexSearcher(reader);
 
-        String minRP = pad(minR);
-        String maxRP = pad(maxR);
+        String minRP = pad(signedIndex.minR);
+        String maxRP = pad(signedIndex.maxR);
     
         int numDocs = reader.numDocs();
         
         assertEquals("num of docs", numDocs, 1+ maxId - minId);
         
   ScoreDoc[] result;
-        Query q = new TermQuery(new Term("body","body"));
 
         // test extremes, bounded on both ends
         
@@ -347,4 +447,104 @@
         
     }
 
+    public void testRangeQueryRandCollating() throws IOException {
+        // NOTE: uses index build in *super* setUp
+
+        // using the unsigned index because collation seems to ignore hyphens
+        IndexReader reader = IndexReader.open(unsignedIndex.index);
+        IndexSearcher search = new IndexSearcher(reader);
+
+        String minRP = pad(unsignedIndex.minR);
+        String maxRP = pad(unsignedIndex.maxR);
+    
+        int numDocs = reader.numDocs();
+        
+        assertEquals("num of docs", numDocs, 1+ maxId - minId);
+        
+        ScoreDoc[] result;
+        
+        Collator c = Collator.getInstance(Locale.ENGLISH);
+
+        // test extremes, bounded on both ends
+        
+        result = search.search(csrq("rand",minRP,maxRP,T,T,c), null, numDocs).scoreDocs;
+        assertEquals("find all", numDocs, result.length);
+
+        result = search.search(csrq("rand",minRP,maxRP,T,F,c), null, numDocs).scoreDocs;
+        assertEquals("all but biggest", numDocs-1, result.length);
+
+        result = search.search(csrq("rand",minRP,maxRP,F,T,c), null, numDocs).scoreDocs;
+        assertEquals("all but smallest", numDocs-1, result.length);
+        
+        result = search.search(csrq("rand",minRP,maxRP,F,F,c), null, numDocs).scoreDocs;
+        assertEquals("all but extremes", numDocs-2, result.length);
+    
+        // unbounded
+
+        result = search.search(csrq("rand",minRP,null,T,F,c), null, numDocs).scoreDocs;
+        assertEquals("smallest and up", numDocs, result.length);
+
+        result = search.search(csrq("rand",null,maxRP,F,T,c), null, numDocs).scoreDocs;
+        assertEquals("biggest and down", numDocs, result.length);
+
+        result = search.search(csrq("rand",minRP,null,F,F,c), null, numDocs).scoreDocs;
+        assertEquals("not smallest, but up", numDocs-1, result.length);
+        
+        result = search.search(csrq("rand",null,maxRP,F,F,c), null, numDocs).scoreDocs;
+        assertEquals("not biggest, but down", numDocs-1, result.length);
+        
+        // very small sets
+
+        result = search.search(csrq("rand",minRP,minRP,F,F,c), null, numDocs).scoreDocs;
+        assertEquals("min,min,F,F,c", 0, result.length);
+        result = search.search(csrq("rand",maxRP,maxRP,F,F,c), null, numDocs).scoreDocs;
+        assertEquals("max,max,F,F,c", 0, result.length);
+                     
+        result = search.search(csrq("rand",minRP,minRP,T,T,c), null, numDocs).scoreDocs;
+        assertEquals("min,min,T,T,c", 1, result.length);
+        result = search.search(csrq("rand",null,minRP,F,T,c), null, numDocs).scoreDocs;
+        assertEquals("nul,min,F,T,c", 1, result.length);
+
+        result = search.search(csrq("rand",maxRP,maxRP,T,T,c), null, numDocs).scoreDocs;
+        assertEquals("max,max,T,T,c", 1, result.length);
+        result = search.search(csrq("rand",maxRP,null,T,F,c), null, numDocs).scoreDocs;
+        assertEquals("max,nul,T,T,c", 1, result.length);
+    }
+    
+    public void testFarsi() throws Exception {
+            
+        /* build an index */
+        RAMDirectory farsiIndex = new RAMDirectory();
+        IndexWriter writer = new IndexWriter(farsiIndex, new SimpleAnalyzer(), T, 
+                                             IndexWriter.MaxFieldLength.LIMITED);
+        Document doc = new Document();
+        doc.add(new Field("content","\u0633\u0627\u0628", 
+                          Field.Store.YES, Field.Index.NOT_ANALYZED));
+        doc.add(new Field("body", "body",
+                          Field.Store.YES, Field.Index.NOT_ANALYZED));
+        writer.addDocument(doc);
+            
+        writer.optimize();
+        writer.close();
+
+        IndexReader reader = IndexReader.open(farsiIndex);
+        IndexSearcher search = new IndexSearcher(reader);
+
+        // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
+        // RuleBasedCollator.  However, the Arabic Locale seems to order the Farsi
+        // characters properly.
+        Collator c = Collator.getInstance(new Locale("ar"));
+        
+        // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
+        // orders the U+0698 character before the U+0633 character, so the single
+        // index Term below should NOT be returned by a ConstantScoreRangeQuery
+        // with a Farsi Collator (or an Arabic one for the case when Farsi is 
+        // not supported).
+        ScoreDoc[] result = search.search(csrq("content","\u062F", "\u0698", T, T, c), null, 1000).scoreDocs;
+        assertEquals("The index Term should not be included.", 0, result.length);
+
+        result = search.search(csrq("content", "\u0633", "\u0638", T, T, c), null, 1000).scoreDocs;
+        assertEquals("The index Term should be included.", 1, result.length);
+        search.close();
+    }
 }



Mime
View raw message