lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From uschind...@apache.org
Subject svn commit: r793823 - in /lucene/java/trunk: ./ src/java/org/apache/lucene/analysis/ src/java/org/apache/lucene/document/ src/java/org/apache/lucene/search/ src/java/org/apache/lucene/util/ src/test/org/apache/lucene/analysis/ src/test/org/apache/lucen...
Date Tue, 14 Jul 2009 09:17:46 GMT
Author: uschindler
Date: Tue Jul 14 09:17:44 2009
New Revision: 793823

URL: http://svn.apache.org/viewvc?rev=793823&view=rev
Log:
LUCENE-1712: Set default precisionStep for NumericField and NumericRange*

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/analysis/NumericTokenStream.java
    lucene/java/trunk/src/java/org/apache/lucene/document/NumericField.java
    lucene/java/trunk/src/java/org/apache/lucene/search/NumericRangeFilter.java
    lucene/java/trunk/src/java/org/apache/lucene/search/NumericRangeQuery.java
    lucene/java/trunk/src/java/org/apache/lucene/util/NumericUtils.java
    lucene/java/trunk/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java
    lucene/java/trunk/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java
    lucene/java/trunk/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=793823&r1=793822&r2=793823&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Tue Jul 14 09:17:44 2009
@@ -516,7 +516,7 @@
     See the Javadocs for NGramDistance.java for a reference paper on why
     this is helpful (Tom Morton via Grant Ingersoll)
 
-27. LUCENE-1470, LUCENE-1582, LUCENE-1602, LUCENE-1673, LUCENE-1701:
+27. LUCENE-1470, LUCENE-1582, LUCENE-1602, LUCENE-1673, LUCENE-1701, LUCENE-1712:
     Added NumericRangeQuery and NumericRangeFilter, a fast alternative to
     RangeQuery/RangeFilter for numeric searches. They depend on a specific
     structure of terms in the index that can be created by indexing

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/NumericTokenStream.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/NumericTokenStream.java?rev=793823&r1=793822&r2=793823&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/NumericTokenStream.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/NumericTokenStream.java Tue Jul
14 09:17:44 2009
@@ -77,7 +77,10 @@
  *
  * <p>Values indexed by this stream can be loaded into the {@link FieldCache}
  * and can be sorted (use {@link SortField}{@code .TYPE} to specify the correct
- * type; {@link SortField#AUTO} does not work with this type of field)
+ * type; {@link SortField#AUTO} does not work with this type of field).
+ * Values solely used for sorting can be indexed using a <code>precisionStep</code>
+ * of {@link Integer#MAX_VALUE} (at least &ge;64), because this step only produces
+ * one value token with highest precision.
  *
  * <p><font color="red"><b>NOTE:</b> This API is experimental and
  * might change in incompatible ways in the next release.</font>
@@ -86,24 +89,30 @@
  */
 public final class NumericTokenStream extends TokenStream {
 
-  /** The full precision 64 bit token gets this token type assigned. */
-  public static final String TOKEN_TYPE_FULL_PREC_64  = "fullPrecNumeric64";
+  /** The full precision token gets this token type assigned. */
+  public static final String TOKEN_TYPE_FULL_PREC  = "fullPrecNumeric";
 
-  /** The lower precision 64 bit tokens gets this token type assigned. */
-  public static final String TOKEN_TYPE_LOWER_PREC_64 = "lowerPrecNumeric64";
-
-  /** The full precision 32 bit token gets this token type assigned. */
-  public static final String TOKEN_TYPE_FULL_PREC_32  = "fullPrecNumeric32";
-
-  /** The lower precision 32 bit tokens gets this token type assigned. */
-  public static final String TOKEN_TYPE_LOWER_PREC_32 = "lowerPrecNumeric32";
+  /** The lower precision tokens gets this token type assigned. */
+  public static final String TOKEN_TYPE_LOWER_PREC = "lowerPrecNumeric";
 
   /**
-   * Creates a token stream for numeric values. The stream is not yet initialized,
+   * Creates a token stream for numeric values using the default <code>precisionStep</code>
+   * {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The stream is not yet initialized,
+   * before using set a value using the various set<em>???</em>Value() methods.
+   */
+  public NumericTokenStream() {
+    this(NumericUtils.PRECISION_STEP_DEFAULT);
+  }
+  
+  /**
+   * Creates a token stream for numeric values with the specified
+   * <code>precisionStep</code>. The stream is not yet initialized,
    * before using set a value using the various set<em>???</em>Value() methods.
    */
   public NumericTokenStream(final int precisionStep) {
     this.precisionStep = precisionStep;
+    if (precisionStep < 1)
+      throw new IllegalArgumentException("precisionStep must be >=1");
     termAtt = (TermAttribute) addAttribute(TermAttribute.class);
     typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
     posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
@@ -165,8 +174,6 @@
   public void reset() {
     if (valSize == 0)
       throw new IllegalStateException("call set???Value() before usage");
-    if (precisionStep < 1 || precisionStep > valSize)
-      throw new IllegalArgumentException("precisionStep may only be 1.."+valSize);
     shift = 0;
   }
 
@@ -180,15 +187,13 @@
     final char[] buffer;
     switch (valSize) {
       case 64:
-        buffer = termAtt.resizeTermBuffer(NumericUtils.LONG_BUF_SIZE);
+        buffer = termAtt.resizeTermBuffer(NumericUtils.BUF_SIZE_LONG);
         termAtt.setTermLength(NumericUtils.longToPrefixCoded(value, shift, buffer));
-        typeAtt.setType((shift == 0) ? TOKEN_TYPE_FULL_PREC_64 : TOKEN_TYPE_LOWER_PREC_64);
         break;
       
       case 32:
-        buffer = termAtt.resizeTermBuffer(NumericUtils.INT_BUF_SIZE);
+        buffer = termAtt.resizeTermBuffer(NumericUtils.BUF_SIZE_INT);
         termAtt.setTermLength(NumericUtils.intToPrefixCoded((int) value, shift, buffer));
-        typeAtt.setType((shift == 0) ? TOKEN_TYPE_FULL_PREC_32 : TOKEN_TYPE_LOWER_PREC_32);
         break;
       
       default:
@@ -196,6 +201,7 @@
         throw new IllegalArgumentException("valSize must be 32 or 64");
     }
     
+    typeAtt.setType((shift == 0) ? TOKEN_TYPE_FULL_PREC : TOKEN_TYPE_LOWER_PREC);
     posIncrAtt.setPositionIncrement((shift == 0) ? 1 : 0);
     shift += precisionStep;
     return true;
@@ -215,15 +221,13 @@
     final char[] buffer;
     switch (valSize) {
       case 64:
-        buffer = reusableToken.resizeTermBuffer(NumericUtils.LONG_BUF_SIZE);
+        buffer = reusableToken.resizeTermBuffer(NumericUtils.BUF_SIZE_LONG);
         reusableToken.setTermLength(NumericUtils.longToPrefixCoded(value, shift, buffer));
-        reusableToken.setType((shift == 0) ? TOKEN_TYPE_FULL_PREC_64 : TOKEN_TYPE_LOWER_PREC_64);
         break;
       
       case 32:
-        buffer = reusableToken.resizeTermBuffer(NumericUtils.INT_BUF_SIZE);
+        buffer = reusableToken.resizeTermBuffer(NumericUtils.BUF_SIZE_INT);
         reusableToken.setTermLength(NumericUtils.intToPrefixCoded((int) value, shift, buffer));
-        reusableToken.setType((shift == 0) ? TOKEN_TYPE_FULL_PREC_32 : TOKEN_TYPE_LOWER_PREC_32);
         break;
       
       default:
@@ -231,6 +235,7 @@
         throw new IllegalArgumentException("valSize must be 32 or 64");
     }
 
+    reusableToken.setType((shift == 0) ? TOKEN_TYPE_FULL_PREC : TOKEN_TYPE_LOWER_PREC);
     reusableToken.setPositionIncrement((shift == 0) ? 1 : 0);
     shift += precisionStep;
     return reusableToken;

Modified: lucene/java/trunk/src/java/org/apache/lucene/document/NumericField.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/document/NumericField.java?rev=793823&r1=793822&r2=793823&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/document/NumericField.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/document/NumericField.java Tue Jul 14 09:17:44
2009
@@ -21,6 +21,7 @@
 
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.NumericTokenStream;
+import org.apache.lucene.util.NumericUtils;
 import org.apache.lucene.search.NumericRangeQuery; // javadocs
 import org.apache.lucene.search.NumericRangeFilter; // javadocs
 import org.apache.lucene.search.SortField; // javadocs
@@ -72,7 +73,10 @@
  *
  * <p>Values indexed by this field can be loaded into the {@link FieldCache}
  * and can be sorted (use {@link SortField}{@code .TYPE} to specify the correct
- * type; {@link SortField#AUTO} does not work with this type of field)
+ * type; {@link SortField#AUTO} does not work with this type of field).
+ * Values solely used for sorting can be indexed using a <code>precisionStep</code>
+ * of {@link Integer#MAX_VALUE} (at least &ge;64), because this step only produces
+ * one value token with highest precision.
  *
  * <p><font color="red"><b>NOTE:</b> This API is experimental and
  * might change in incompatible ways in the next release.</font>
@@ -84,7 +88,34 @@
   private final NumericTokenStream tokenStream;
 
   /**
-   * Creates a field for numeric values. The instance is not yet initialized with
+   * Creates a field for numeric values using the default <code>precisionStep</code>
+   * {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The instance is not yet initialized
with
+   * a numeric value, before indexing a document containing this field,
+   * set a value using the various set<em>???</em>Value() methods.
+   * This constrcutor creates an indexed, but not stored field.
+   * @param name the field name
+   */
+  public NumericField(String name) {
+    this(name, NumericUtils.PRECISION_STEP_DEFAULT, Field.Store.NO, true);
+  }
+  
+  /**
+   * Creates a field for numeric values using the default <code>precisionStep</code>
+   * {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The instance is not yet initialized
with
+   * a numeric value, before indexing a document containing this field,
+   * set a value using the various set<em>???</em>Value() methods.
+   * @param name the field name
+   * @param store if the field should be stored in plain text form
+   *  (according to <code>toString(value)</code> of the used data type)
+   * @param index if the field should be indexed using {@link NumericTokenStream}
+   */
+  public NumericField(String name, Field.Store store, boolean index) {
+    this(name, NumericUtils.PRECISION_STEP_DEFAULT, store, index);
+  }
+  
+  /**
+   * Creates a field for numeric values with the specified
+   * <code>precisionStep</code>. The instance is not yet initialized with
    * a numeric value, before indexing a document containing this field,
    * set a value using the various set<em>???</em>Value() methods.
    * This constrcutor creates an indexed, but not stored field.
@@ -96,7 +127,8 @@
   }
 
   /**
-   * Creates a field for numeric values. The instance is not yet initialized with
+   * Creates a field for numeric values with the specified
+   * <code>precisionStep</code>. The instance is not yet initialized with
    * a numeric value, before indexing a document containing this field,
    * set a value using the various set<em>???</em>Value() methods.
    * @param name the field name

Modified: lucene/java/trunk/src/java/org/apache/lucene/search/NumericRangeFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/NumericRangeFilter.java?rev=793823&r1=793822&r2=793823&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/NumericRangeFilter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/NumericRangeFilter.java Tue Jul 14
09:17:44 2009
@@ -19,6 +19,7 @@
 
 import org.apache.lucene.analysis.NumericTokenStream; // for javadocs
 import org.apache.lucene.document.NumericField; // for javadocs
+import org.apache.lucene.util.NumericUtils; // for javadocs
 
 /**
  * Implementation of a {@link Filter} that implements <em>trie-based</em> range
filtering
@@ -65,6 +66,21 @@
   }
   
   /**
+   * Factory that creates a <code>NumericRangeFilter</code>, that queries a <code>long</code>
+   * range using the default <code>precisionStep</code> {@link NumericUtils#PRECISION_STEP_DEFAULT}
(4).
+   * You can have half-open ranges (which are in fact &lt;/&le; or &gt;/&ge;
queries)
+   * by setting the min or max value to <code>null</code>. By setting inclusive
to false, it will
+   * match all documents excluding the bounds, with inclusive on, the boundaries are hits,
too.
+   */
+  public static NumericRangeFilter newLongRange(final String field,
+    Long min, Long max, final boolean minInclusive, final boolean maxInclusive
+  ) {
+    return new NumericRangeFilter(
+      NumericRangeQuery.newLongRange(field, min, max, minInclusive, maxInclusive)
+    );
+  }
+  
+  /**
    * Factory that creates a <code>NumericRangeFilter</code>, that filters a <code>int</code>
    * range using the given <a href="NumericRangeQuery.html#precisionStepDesc"><code>precisionStep</code></a>.
    * You can have half-open ranges (which are in fact &lt;/&le; or &gt;/&ge;
queries)
@@ -80,6 +96,21 @@
   }
   
   /**
+   * Factory that creates a <code>NumericRangeFilter</code>, that queries a <code>int</code>
+   * range using the default <code>precisionStep</code> {@link NumericUtils#PRECISION_STEP_DEFAULT}
(4).
+   * You can have half-open ranges (which are in fact &lt;/&le; or &gt;/&ge;
queries)
+   * by setting the min or max value to <code>null</code>. By setting inclusive
to false, it will
+   * match all documents excluding the bounds, with inclusive on, the boundaries are hits,
too.
+   */
+  public static NumericRangeFilter newIntRange(final String field,
+    Integer min, Integer max, final boolean minInclusive, final boolean maxInclusive
+  ) {
+    return new NumericRangeFilter(
+      NumericRangeQuery.newIntRange(field, min, max, minInclusive, maxInclusive)
+    );
+  }
+  
+  /**
    * Factory that creates a <code>NumericRangeFilter</code>, that filters a <code>double</code>
    * range using the given <a href="NumericRangeQuery.html#precisionStepDesc"><code>precisionStep</code></a>.
    * You can have half-open ranges (which are in fact &lt;/&le; or &gt;/&ge;
queries)
@@ -95,6 +126,21 @@
   }
   
   /**
+   * Factory that creates a <code>NumericRangeFilter</code>, that queries a <code>double</code>
+   * range using the default <code>precisionStep</code> {@link NumericUtils#PRECISION_STEP_DEFAULT}
(4).
+   * You can have half-open ranges (which are in fact &lt;/&le; or &gt;/&ge;
queries)
+   * by setting the min or max value to <code>null</code>. By setting inclusive
to false, it will
+   * match all documents excluding the bounds, with inclusive on, the boundaries are hits,
too.
+   */
+  public static NumericRangeFilter newDoubleRange(final String field,
+    Double min, Double max, final boolean minInclusive, final boolean maxInclusive
+  ) {
+    return new NumericRangeFilter(
+      NumericRangeQuery.newDoubleRange(field, min, max, minInclusive, maxInclusive)
+    );
+  }
+  
+  /**
    * Factory that creates a <code>NumericRangeFilter</code>, that filters a <code>float</code>
    * range using the given <a href="NumericRangeQuery.html#precisionStepDesc"><code>precisionStep</code></a>.
    * You can have half-open ranges (which are in fact &lt;/&le; or &gt;/&ge;
queries)
@@ -109,6 +155,21 @@
     );
   }
 
+  /**
+   * Factory that creates a <code>NumericRangeFilter</code>, that queries a <code>float</code>
+   * range using the default <code>precisionStep</code> {@link NumericUtils#PRECISION_STEP_DEFAULT}
(4).
+   * You can have half-open ranges (which are in fact &lt;/&le; or &gt;/&ge;
queries)
+   * by setting the min or max value to <code>null</code>. By setting inclusive
to false, it will
+   * match all documents excluding the bounds, with inclusive on, the boundaries are hits,
too.
+   */
+  public static NumericRangeFilter newFloatRange(final String field,
+    Float min, Float max, final boolean minInclusive, final boolean maxInclusive
+  ) {
+    return new NumericRangeFilter(
+      NumericRangeQuery.newFloatRange(field, min, max, minInclusive, maxInclusive)
+    );
+  }
+  
   /** Returns the field name for this filter */
   public String getField() { return ((NumericRangeQuery)query).getField(); }
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/search/NumericRangeQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/NumericRangeQuery.java?rev=793823&r1=793822&r2=793823&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/NumericRangeQuery.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/NumericRangeQuery.java Tue Jul 14
09:17:44 2009
@@ -38,8 +38,9 @@
  * An important setting is the <a href="#precisionStepDesc"><code>precisionStep</code></a>,
which specifies,
  * how many different precisions per numeric value are indexed to speed up range queries.
  * Lower values create more terms but speed up search, higher values create less terms, but
- * slow down search. Suitable values are 2, 4, or 8. A good starting point to test is 4.
- * For code examples see {@link NumericField}.
+ * slow down search. Suitable values are between <b>1</b> and <b>8</b>.
A good starting point to test is <b>4</b>,
+ * which is the default value for all <code>Numeric*</code> classes. For a discussion
about ideal
+ * values, see below. Indexing code examples can be found in {@link NumericField}.
  *
  * <h4>Searching</h4>
  * <p>This class has no constructor, you can create queries depending on the data type
@@ -51,6 +52,8 @@
  *                                           new Float(0.3f), new Float(0.10f),
  *                                           true, true);
  * </pre>
+ * The used <a href="#precisionStepDesc"><code>precisionStep</code></a>
must be compatible
+ * to the one used during indexing (see below). The default is also <b>4</b>.
  *
  * <h3>How it works</h3>
  *
@@ -101,18 +104,31 @@
  * be found out by testing. <b>Important:</b> You can index with a lower precision
step value and test search speed
  * using a multiple of the original step value.</p>
  *
+ * <p>Good values for <code>precisionStep</code> are depending on usage
and data type:
+ * <ul>
+ *  <li>The default for all data types is <b>4</b>, which is used, when
no <code>precisionStep</code> is given.
+ *  <li>Ideal value in most cases for <em>64 bit</em> data types <em>(long,
double)</em> is <b>6</b> or <b>8</b>.
+ *  <li>Ideal value in most cases for <em>32 bit</em> data types <em>(int,
float)</em> is <b>4</b>.
+ *  <li>Steps <b>&ge;64</b> for <em>long/double</em> and
<b>&ge;32</b> for <em>int/float</em> produces one token
+ *  per value in the index and querying is as slow as a conventional {@link TermRangeQuery}.
But it can be used
+ *  to produce fields, that are solely used for sorting (in this case simply use {@link Integer#MAX_VALUE}
as
+ *  <code>precisionStep</code>). Using {@link NumericField NumericFields} for
sorting
+ *  is ideal, because building the field cache is much faster than with text-only numbers.
+ *  Sorting is also possible with range query optimized fields using one of the above <code>precisionSteps</code>.
+ * </ul>
+ *
  * <p>This dramatically improves the performance of Apache Lucene with range queries,
which
  * are no longer dependent on the index size and the number of distinct values because there
is
  * an upper limit unrelated to either of these properties.</p>
  *
  * <p>Comparisions of the different types of RangeQueries on an index with about 500,000
docs showed
- * that the old {@link RangeQuery} (with raised {@link BooleanQuery} clause count) took about
30-40
- * secs to complete, {@link ConstantScoreRangeQuery} took 5 secs and executing
- * this class took &lt;100ms to complete (on an Opteron64 machine, Java 1.5, 8 bit precision
step).
- * This query type was developed for a geographic portal, where the performance for
+ * that {@link TermRangeQuery} in boolean rewrite mode (with raised {@link BooleanQuery}
clause count)
+ * took about 30-40 secs to complete, {@link TermRangeQuery} in constant score rewrite mode
took 5 secs
+ * and executing this class took &lt;100ms to complete (on an Opteron64 machine, Java
1.5, 8 bit
+ * precision step). This query type was developed for a geographic portal, where the performance
for
  * e.g. bounding boxes or exact date/time stamps is important.</p>
  *
- * <p>The query is in {@linkplain #setConstantScoreRewrite constant score mode} per
default.
+ * <p>The query defaults to {@linkplain #setConstantScoreRewrite constant score rewrite
mode}.
  * With precision steps of &le;4, this query can be run in conventional {@link BooleanQuery}
  * rewrite mode without changing the max clause count.
  *
@@ -127,8 +143,8 @@
     Number min, Number max, final boolean minInclusive, final boolean maxInclusive
   ) {
     assert (valSize == 32 || valSize == 64);
-    if (precisionStep < 1 || precisionStep > valSize)
-      throw new IllegalArgumentException("precisionStep may only be 1.."+valSize);
+    if (precisionStep < 1)
+      throw new IllegalArgumentException("precisionStep must be >=1");
     this.field = field.intern();
     this.precisionStep = precisionStep;
     this.valSize = valSize;
@@ -153,6 +169,19 @@
   }
   
   /**
+   * Factory that creates a <code>NumericRangeQuery</code>, that queries a <code>long</code>
+   * range using the default <code>precisionStep</code> {@link NumericUtils#PRECISION_STEP_DEFAULT}
(4).
+   * You can have half-open ranges (which are in fact &lt;/&le; or &gt;/&ge;
queries)
+   * by setting the min or max value to <code>null</code>. By setting inclusive
to false, it will
+   * match all documents excluding the bounds, with inclusive on, the boundaries are hits,
too.
+   */
+  public static NumericRangeQuery newLongRange(final String field,
+    Long min, Long max, final boolean minInclusive, final boolean maxInclusive
+  ) {
+    return new NumericRangeQuery(field, NumericUtils.PRECISION_STEP_DEFAULT, 64, min, max,
minInclusive, maxInclusive);
+  }
+  
+  /**
    * Factory that creates a <code>NumericRangeQuery</code>, that queries a <code>int</code>
    * range using the given <a href="#precisionStepDesc"><code>precisionStep</code></a>.
    * You can have half-open ranges (which are in fact &lt;/&le; or &gt;/&ge;
queries)
@@ -166,6 +195,19 @@
   }
   
   /**
+   * Factory that creates a <code>NumericRangeQuery</code>, that queries a <code>int</code>
+   * range using the default <code>precisionStep</code> {@link NumericUtils#PRECISION_STEP_DEFAULT}
(4).
+   * You can have half-open ranges (which are in fact &lt;/&le; or &gt;/&ge;
queries)
+   * by setting the min or max value to <code>null</code>. By setting inclusive
to false, it will
+   * match all documents excluding the bounds, with inclusive on, the boundaries are hits,
too.
+   */
+  public static NumericRangeQuery newIntRange(final String field,
+    Integer min, Integer max, final boolean minInclusive, final boolean maxInclusive
+  ) {
+    return new NumericRangeQuery(field, NumericUtils.PRECISION_STEP_DEFAULT, 32, min, max,
minInclusive, maxInclusive);
+  }
+  
+  /**
    * Factory that creates a <code>NumericRangeQuery</code>, that queries a <code>double</code>
    * range using the given <a href="#precisionStepDesc"><code>precisionStep</code></a>.
    * You can have half-open ranges (which are in fact &lt;/&le; or &gt;/&ge;
queries)
@@ -179,6 +221,19 @@
   }
   
   /**
+   * Factory that creates a <code>NumericRangeQuery</code>, that queries a <code>double</code>
+   * range using the default <code>precisionStep</code> {@link NumericUtils#PRECISION_STEP_DEFAULT}
(4).
+   * You can have half-open ranges (which are in fact &lt;/&le; or &gt;/&ge;
queries)
+   * by setting the min or max value to <code>null</code>. By setting inclusive
to false, it will
+   * match all documents excluding the bounds, with inclusive on, the boundaries are hits,
too.
+   */
+  public static NumericRangeQuery newDoubleRange(final String field,
+    Double min, Double max, final boolean minInclusive, final boolean maxInclusive
+  ) {
+    return new NumericRangeQuery(field, NumericUtils.PRECISION_STEP_DEFAULT, 64, min, max,
minInclusive, maxInclusive);
+  }
+  
+  /**
    * Factory that creates a <code>NumericRangeQuery</code>, that queries a <code>float</code>
    * range using the given <a href="#precisionStepDesc"><code>precisionStep</code></a>.
    * You can have half-open ranges (which are in fact &lt;/&le; or &gt;/&ge;
queries)
@@ -191,6 +246,19 @@
     return new NumericRangeQuery(field, precisionStep, 32, min, max, minInclusive, maxInclusive);
   }
   
+  /**
+   * Factory that creates a <code>NumericRangeQuery</code>, that queries a <code>float</code>
+   * range using the default <code>precisionStep</code> {@link NumericUtils#PRECISION_STEP_DEFAULT}
(4).
+   * You can have half-open ranges (which are in fact &lt;/&le; or &gt;/&ge;
queries)
+   * by setting the min or max value to <code>null</code>. By setting inclusive
to false, it will
+   * match all documents excluding the bounds, with inclusive on, the boundaries are hits,
too.
+   */
+  public static NumericRangeQuery newFloatRange(final String field,
+    Float min, Float max, final boolean minInclusive, final boolean maxInclusive
+  ) {
+    return new NumericRangeQuery(field, NumericUtils.PRECISION_STEP_DEFAULT, 32, min, max,
minInclusive, maxInclusive);
+  }
+  
   //@Override
   protected FilteredTermEnum getEnum(final IndexReader reader) throws IOException {
     return new NumericRangeTermEnum(reader);

Modified: lucene/java/trunk/src/java/org/apache/lucene/util/NumericUtils.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/util/NumericUtils.java?rev=793823&r1=793822&r2=793823&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/util/NumericUtils.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/util/NumericUtils.java Tue Jul 14 09:17:44
2009
@@ -18,6 +18,7 @@
  */
 
 import org.apache.lucene.analysis.NumericTokenStream; // for javadocs
+import org.apache.lucene.document.NumericField; // for javadocs
 import org.apache.lucene.search.NumericRangeQuery; // for javadocs
 import org.apache.lucene.search.NumericRangeFilter; // for javadocs
 
@@ -62,9 +63,15 @@
 public final class NumericUtils {
 
   private NumericUtils() {} // no instance!
-
+  
+  /**
+   * The default precision step used by {@link NumericField}, {@link NumericTokenStream},
+   * {@link NumericRangeQuery}, and {@link NumericRangeFilter} as default
+   */
+  public static final int PRECISION_STEP_DEFAULT = 4;
+  
   /**
-   * Longs are stored at lower precision by shifting off lower bits. The shift count is
+   * Expert: Longs are stored at lower precision by shifting off lower bits. The shift count
is
    * stored as <code>SHIFT_START_LONG+shift</code> in the first character
    */
   public static final char SHIFT_START_LONG = (char)0x20;
@@ -74,10 +81,10 @@
    * for encoding <code>long</code> values.
    * @see #longToPrefixCoded(long,int,char[])
    */
-  public static final int LONG_BUF_SIZE = 63/7 + 2;
+  public static final int BUF_SIZE_LONG = 63/7 + 2;
 
   /**
-   * Integers are stored at lower precision by shifting off lower bits. The shift count is
+   * Expert: Integers are stored at lower precision by shifting off lower bits. The shift
count is
    * stored as <code>SHIFT_START_INT+shift</code> in the first character
    */
   public static final char SHIFT_START_INT  = (char)0x60;
@@ -87,14 +94,14 @@
    * for encoding <code>int</code> values.
    * @see #intToPrefixCoded(int,int,char[])
    */
-  public static final int INT_BUF_SIZE = 31/7 + 2;
+  public static final int BUF_SIZE_INT = 31/7 + 2;
 
   /**
    * Expert: Returns prefix coded bits after reducing the precision by <code>shift</code>
bits.
    * This is method is used by {@link NumericTokenStream}.
    * @param val the numeric value
    * @param shift how many bits to strip from the right
-   * @param buffer that will contain the encoded chars, must be at least of {@link #LONG_BUF_SIZE}
+   * @param buffer that will contain the encoded chars, must be at least of {@link #BUF_SIZE_LONG}
    * length
    * @return number of chars written to buffer
    */
@@ -122,7 +129,7 @@
    * @param shift how many bits to strip from the right
    */
   public static String longToPrefixCoded(final long val, final int shift) {
-    final char[] buffer = new char[LONG_BUF_SIZE];
+    final char[] buffer = new char[BUF_SIZE_LONG];
     final int len = longToPrefixCoded(val, shift, buffer);
     return new String(buffer, 0, len);
   }
@@ -142,7 +149,7 @@
    * This is method is used by {@link NumericTokenStream}.
    * @param val the numeric value
    * @param shift how many bits to strip from the right
-   * @param buffer that will contain the encoded chars, must be at least of {@link #INT_BUF_SIZE}
+   * @param buffer that will contain the encoded chars, must be at least of {@link #BUF_SIZE_INT}
    * length
    * @return number of chars written to buffer
    */
@@ -170,7 +177,7 @@
    * @param shift how many bits to strip from the right
    */
   public static String intToPrefixCoded(final int val, final int shift) {
-    final char[] buffer = new char[INT_BUF_SIZE];
+    final char[] buffer = new char[BUF_SIZE_INT];
     final int len = intToPrefixCoded(val, shift, buffer);
     return new String(buffer, 0, len);
   }
@@ -294,8 +301,6 @@
   public static void splitLongRange(final LongRangeBuilder builder,
     final int precisionStep,  final long minBound, final long maxBound
   ) {
-    if (precisionStep<1 || precisionStep>64)
-      throw new IllegalArgumentException("precisionStep may only be 1..64");
     splitRange(builder, 64, precisionStep, minBound, maxBound);
   }
   
@@ -310,8 +315,6 @@
   public static void splitIntRange(final IntRangeBuilder builder,
     final int precisionStep,  final int minBound, final int maxBound
   ) {
-    if (precisionStep<1 || precisionStep>32)
-      throw new IllegalArgumentException("precisionStep may only be 1..32");
     splitRange(builder, 32, precisionStep, (long)minBound, (long)maxBound);
   }
   
@@ -320,6 +323,8 @@
     final Object builder, final int valSize,
     final int precisionStep, long minBound, long maxBound
   ) {
+    if (precisionStep < 1)
+      throw new IllegalArgumentException("precisionStep must be >=1");
     if (minBound > maxBound) return;
     for (int shift=0; ; shift += precisionStep) {
       // calculate new bounds for inner precision

Modified: lucene/java/trunk/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java?rev=793823&r1=793822&r2=793823&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java Tue
Jul 14 09:17:44 2009
@@ -20,61 +20,67 @@
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.NumericUtils;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 
 public class TestNumericTokenStream extends LuceneTestCase {
 
-  static final int precisionStep = 8;
   static final long lvalue = 4573245871874382L;
   static final int ivalue = 123456;
 
   public void testLongStreamNewAPI() throws Exception {
-    final NumericTokenStream stream=new NumericTokenStream(precisionStep).setLongValue(lvalue);
+    final NumericTokenStream stream=new NumericTokenStream().setLongValue(lvalue);
     stream.setUseNewAPI(true);
     // use getAttribute to test if attributes really exist, if not an IAE will be throwed
     final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class);
-    for (int shift=0; shift<64; shift+=precisionStep) {
+    final TypeAttribute typeAtt = (TypeAttribute) stream.getAttribute(TypeAttribute.class);
+    for (int shift=0; shift<64; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
       assertTrue("New token is available", stream.incrementToken());
       assertEquals("Term is correctly encoded", NumericUtils.longToPrefixCoded(lvalue, shift),
termAtt.term());
+      assertEquals("Type correct", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC
: NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type());
     }
     assertFalse("No more tokens available", stream.incrementToken());
   }
   
   public void testLongStreamOldAPI() throws Exception {
-    final NumericTokenStream stream=new NumericTokenStream(precisionStep).setLongValue(lvalue);
+    final NumericTokenStream stream=new NumericTokenStream().setLongValue(lvalue);
     stream.setUseNewAPI(false);
     Token tok=new Token();
-    for (int shift=0; shift<64; shift+=precisionStep) {
+    for (int shift=0; shift<64; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
       assertNotNull("New token is available", tok=stream.next(tok));
       assertEquals("Term is correctly encoded", NumericUtils.longToPrefixCoded(lvalue, shift),
tok.term());
+      assertEquals("Type correct", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC
: NumericTokenStream.TOKEN_TYPE_LOWER_PREC, tok.type());
     }
     assertNull("No more tokens available", stream.next(tok));
   }
 
   public void testIntStreamNewAPI() throws Exception {
-    final NumericTokenStream stream=new NumericTokenStream(precisionStep).setIntValue(ivalue);
+    final NumericTokenStream stream=new NumericTokenStream().setIntValue(ivalue);
     stream.setUseNewAPI(true);
     // use getAttribute to test if attributes really exist, if not an IAE will be throwed
     final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class);
-    for (int shift=0; shift<32; shift+=precisionStep) {
+    final TypeAttribute typeAtt = (TypeAttribute) stream.getAttribute(TypeAttribute.class);
+    for (int shift=0; shift<32; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
       assertTrue("New token is available", stream.incrementToken());
       assertEquals("Term is correctly encoded", NumericUtils.intToPrefixCoded(ivalue, shift),
termAtt.term());
+      assertEquals("Type correct", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC
: NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type());
     }
     assertFalse("No more tokens available", stream.incrementToken());
   }
   
   public void testIntStreamOldAPI() throws Exception {
-    final NumericTokenStream stream=new NumericTokenStream(precisionStep).setIntValue(ivalue);
+    final NumericTokenStream stream=new NumericTokenStream().setIntValue(ivalue);
     stream.setUseNewAPI(false);
     Token tok=new Token();
-    for (int shift=0; shift<32; shift+=precisionStep) {
+    for (int shift=0; shift<32; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
       assertNotNull("New token is available", tok=stream.next(tok));
       assertEquals("Term is correctly encoded", NumericUtils.intToPrefixCoded(ivalue, shift),
tok.term());
+      assertEquals("Type correct", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC
: NumericTokenStream.TOKEN_TYPE_LOWER_PREC, tok.type());
     }
     assertNull("No more tokens available", stream.next(tok));
   }
   
   public void testNotInitialized() throws Exception {
-    final NumericTokenStream stream=new NumericTokenStream(precisionStep);
+    final NumericTokenStream stream=new NumericTokenStream();
     
     try {
       stream.reset();

Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java?rev=793823&r1=793822&r2=793823&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java Tue Jul
14 09:17:44 2009
@@ -53,13 +53,14 @@
         field8 = new NumericField("field8", 8, Field.Store.YES, true),
         field4 = new NumericField("field4", 4, Field.Store.YES, true),
         field2 = new NumericField("field2", 2, Field.Store.YES, true),
+        fieldNoTrie = new NumericField("field"+Integer.MAX_VALUE, Integer.MAX_VALUE, Field.Store.YES,
true),
         ascfield8 = new NumericField("ascfield8", 8, Field.Store.NO, true),
         ascfield4 = new NumericField("ascfield4", 4, Field.Store.NO, true),
         ascfield2 = new NumericField("ascfield2", 2, Field.Store.NO, true);
       
       Document doc = new Document();
       // add fields, that have a distance to test general functionality
-      doc.add(field8); doc.add(field4); doc.add(field2);
+      doc.add(field8); doc.add(field4); doc.add(field2); doc.add(fieldNoTrie);
       // add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct
splitting of range and inclusive/exclusive
       doc.add(ascfield8); doc.add(ascfield4); doc.add(ascfield2);
       
@@ -69,6 +70,7 @@
         field8.setIntValue(val);
         field4.setIntValue(val);
         field2.setIntValue(val);
+        fieldNoTrie.setIntValue(val);
 
         val=l-(noDocs/2);
         ascfield8.setIntValue(val);
@@ -261,9 +263,13 @@
       termCountT += tq.getTotalNumberOfTerms();
       termCountC += cq.getTotalNumberOfTerms();
     }
-    System.out.println("Average number of terms during random search on '" + field + "':");
-    System.out.println(" Trie query: " + (((double)termCountT)/(50*4)));
-    System.out.println(" Classical query: " + (((double)termCountC)/(50*4)));
+    if (precisionStep == Integer.MAX_VALUE) {
+      assertEquals("Total number of terms should be equal for unlimited precStep", termCountT,
termCountC);
+    } else {
+      System.out.println("Average number of terms during random search on '" + field + "':");
+      System.out.println(" Trie query: " + (((double)termCountT)/(50*4)));
+      System.out.println(" Classical query: " + (((double)termCountC)/(50*4)));
+    }
   }
   
   public void testRandomTrieAndClassicRangeQuery_8bit() throws Exception {
@@ -278,6 +284,10 @@
     testRandomTrieAndClassicRangeQuery(2);
   }
   
+  public void testRandomTrieAndClassicRangeQuery_NoTrie() throws Exception {
+    testRandomTrieAndClassicRangeQuery(Integer.MAX_VALUE);
+  }
+  
   private void testRangeSplit(int precisionStep) throws Exception {
     final Random rnd=newRandom();
     String field="ascfield"+precisionStep;

Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java?rev=793823&r1=793822&r2=793823&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java Tue Jul
14 09:17:44 2009
@@ -51,27 +51,33 @@
       
       NumericField
         field8 = new NumericField("field8", 8, Field.Store.YES, true),
+        field6 = new NumericField("field6", 6, Field.Store.YES, true),
         field4 = new NumericField("field4", 4, Field.Store.YES, true),
         field2 = new NumericField("field2", 2, Field.Store.YES, true),
+        fieldNoTrie = new NumericField("field"+Integer.MAX_VALUE, Integer.MAX_VALUE, Field.Store.YES,
true),
         ascfield8 = new NumericField("ascfield8", 8, Field.Store.NO, true),
+        ascfield6 = new NumericField("ascfield6", 6, Field.Store.NO, true),
         ascfield4 = new NumericField("ascfield4", 4, Field.Store.NO, true),
         ascfield2 = new NumericField("ascfield2", 2, Field.Store.NO, true);
       
       Document doc = new Document();
       // add fields, that have a distance to test general functionality
-      doc.add(field8); doc.add(field4); doc.add(field2);
+      doc.add(field8); doc.add(field6); doc.add(field4); doc.add(field2); doc.add(fieldNoTrie);
       // add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct
splitting of range and inclusive/exclusive
-      doc.add(ascfield8); doc.add(ascfield4); doc.add(ascfield2);
+      doc.add(ascfield8); doc.add(ascfield6); doc.add(ascfield4); doc.add(ascfield2);
       
       // Add a series of noDocs docs with increasing long values, by updating the fields
       for (int l=0; l<noDocs; l++) {
         long val=distance*l+startOffset;
         field8.setLongValue(val);
+        field6.setLongValue(val);
         field4.setLongValue(val);
         field2.setLongValue(val);
+        fieldNoTrie.setLongValue(val);
 
         val=l-(noDocs/2);
         ascfield8.setLongValue(val);
+        ascfield6.setLongValue(val);
         ascfield4.setLongValue(val);
         ascfield2.setLongValue(val);
         writer.addDocument(doc);
@@ -139,6 +145,10 @@
     testRange(8);
   }
   
+  public void testRange_6bit() throws Exception {
+    testRange(6);
+  }
+  
   public void testRange_4bit() throws Exception {
     testRange(4);
   }
@@ -178,6 +188,10 @@
     testLeftOpenRange(8);
   }
   
+  public void testLeftOpenRange_6bit() throws Exception {
+    testLeftOpenRange(6);
+  }
+  
   public void testLeftOpenRange_4bit() throws Exception {
     testLeftOpenRange(4);
   }
@@ -206,6 +220,10 @@
     testRightOpenRange(8);
   }
   
+  public void testRightOpenRange_6bit() throws Exception {
+    testRightOpenRange(6);
+  }
+  
   public void testRightOpenRange_4bit() throws Exception {
     testRightOpenRange(4);
   }
@@ -261,15 +279,23 @@
       termCountT += tq.getTotalNumberOfTerms();
       termCountC += cq.getTotalNumberOfTerms();
     }
-    System.out.println("Average number of terms during random search on '" + field + "':");
-    System.out.println(" Trie query: " + (((double)termCountT)/(50*4)));
-    System.out.println(" Classical query: " + (((double)termCountC)/(50*4)));
+    if (precisionStep == Integer.MAX_VALUE) {
+      assertEquals("Total number of terms should be equal for unlimited precStep", termCountT,
termCountC);
+    } else {
+      System.out.println("Average number of terms during random search on '" + field + "':");
+      System.out.println(" Trie query: " + (((double)termCountT)/(50*4)));
+      System.out.println(" Classical query: " + (((double)termCountC)/(50*4)));
+    }
   }
   
   public void testRandomTrieAndClassicRangeQuery_8bit() throws Exception {
     testRandomTrieAndClassicRangeQuery(8);
   }
   
+  public void testRandomTrieAndClassicRangeQuery_6bit() throws Exception {
+    testRandomTrieAndClassicRangeQuery(6);
+  }
+  
   public void testRandomTrieAndClassicRangeQuery_4bit() throws Exception {
     testRandomTrieAndClassicRangeQuery(4);
   }
@@ -278,6 +304,10 @@
     testRandomTrieAndClassicRangeQuery(2);
   }
   
+  public void testRandomTrieAndClassicRangeQuery_NoTrie() throws Exception {
+    testRandomTrieAndClassicRangeQuery(Integer.MAX_VALUE);
+  }
+  
   private void testRangeSplit(int precisionStep) throws Exception {
     final Random rnd=newRandom();
     String field="ascfield"+precisionStep;
@@ -311,6 +341,10 @@
     testRangeSplit(8);
   }
   
+  public void testRangeSplit_6bit() throws Exception {
+    testRangeSplit(6);
+  }
+  
   public void testRangeSplit_4bit() throws Exception {
     testRangeSplit(4);
   }
@@ -339,6 +373,10 @@
     testDoubleRange(8);
   }
   
+  public void testDoubleRange_6bit() throws Exception {
+    testDoubleRange(6);
+  }
+  
   public void testDoubleRange_4bit() throws Exception {
     testDoubleRange(4);
   }
@@ -376,6 +414,10 @@
     testSorting(8);
   }
   
+  public void testSorting_6bit() throws Exception {
+    testSorting(6);
+  }
+  
   public void testSorting_4bit() throws Exception {
     testSorting(4);
   }



Mime
View raw message