lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From uschind...@apache.org
Subject svn commit: r786470 [1/2] - in /lucene/java/trunk: ./ contrib/spatial/src/java/org/apache/lucene/spatial/ src/java/org/apache/lucene/analysis/ src/java/org/apache/lucene/document/ src/java/org/apache/lucene/search/ src/java/org/apache/lucene/util/ src/...
Date Fri, 19 Jun 2009 12:09:53 GMT
Author: uschindler
Date: Fri Jun 19 12:09:52 2009
New Revision: 786470

URL: http://svn.apache.org/viewvc?rev=786470&view=rev
Log:
LUCENE-1673: Move TrieRange to core (part 1: addition to core)

Added:
    lucene/java/trunk/src/java/org/apache/lucene/analysis/NumericTokenStream.java   (with props)
    lucene/java/trunk/src/java/org/apache/lucene/search/NumericRangeFilter.java   (with props)
    lucene/java/trunk/src/java/org/apache/lucene/search/NumericRangeQuery.java   (with props)
    lucene/java/trunk/src/java/org/apache/lucene/util/NumericUtils.java   (with props)
    lucene/java/trunk/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java   (with props)
    lucene/java/trunk/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java   (with props)
    lucene/java/trunk/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java   (with props)
    lucene/java/trunk/src/test/org/apache/lucene/util/TestNumericUtils.java   (with props)
Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/contrib/spatial/src/java/org/apache/lucene/spatial/NumberUtils.java
    lucene/java/trunk/src/java/org/apache/lucene/document/DateField.java
    lucene/java/trunk/src/java/org/apache/lucene/document/DateTools.java
    lucene/java/trunk/src/java/org/apache/lucene/document/NumberTools.java
    lucene/java/trunk/src/java/org/apache/lucene/search/RangeFilter.java
    lucene/java/trunk/src/java/org/apache/lucene/search/RangeQuery.java
    lucene/java/trunk/src/java/org/apache/lucene/search/package.html

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=786470&r1=786469&r2=786470&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Fri Jun 19 12:09:52 2009
@@ -207,6 +207,10 @@
     ReadOnlySegmentReader.class, along with the src/gcj/*
     specializations for GCJ, are now deprecated, to be removed in
     3.0.  (Earwin Burrfoot via Mike McCandless)
+
+23. LUCENE-1673: Deprecated NumberTools in favour of the new
+    NumericRangeQuery and its new indexing format for numeric or
+    date values.  (Uwe Schindler)
     
 Bug fixes
 
@@ -408,8 +412,15 @@
     via Mike McCandless)
 
 26. LUCENE-1550: Added new n-gram based String distance measure for spell checking.
-    See the Javadocs for NGramDistance.java for a reference paper on why this is helpful (Tom Morton via Grant Ingersoll)
-        
+    See the Javadocs for NGramDistance.java for a reference paper on why
+    this is helpful (Tom Morton via Grant Ingersoll)
+
+27. LUCENE-1470, LUCENE-1582, LUCENE-1602, LUCENE-1673: Added
+    NumericRangeQuery and NumericRangeFilter, a fast alternative to
+    RangeQuery/RangeFilter for numeric searches. They depend on a specific
+    structure of terms in the index that can be created by indexing
+    using the new NumericTokenStream class.  (Uwe Schindler,
+    Yonik Seeley, Mike McCandless)
     
 Optimizations
 

Modified: lucene/java/trunk/contrib/spatial/src/java/org/apache/lucene/spatial/NumberUtils.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/spatial/src/java/org/apache/lucene/spatial/NumberUtils.java?rev=786470&r1=786469&r2=786470&view=diff
==============================================================================
--- lucene/java/trunk/contrib/spatial/src/java/org/apache/lucene/spatial/NumberUtils.java (original)
+++ lucene/java/trunk/contrib/spatial/src/java/org/apache/lucene/spatial/NumberUtils.java Fri Jun 19 12:09:52 2009
@@ -17,12 +17,20 @@
 
 package org.apache.lucene.spatial;
 
+import org.apache.lucene.analysis.NumericTokenStream; // for javadocs
+import org.apache.lucene.search.NumericRangeQuery; // for javadocs
+import org.apache.lucene.util.NumericUtils; // for javadocs
+
 /**
  * TODO -- when solr moves NumberUtils to lucene, this should be redundant
  * 
  * This is a copy of solr's number utils with only the functions we use...
  * 
- * @deprecated will be replaced with lucene version of solr copy...
+ * @deprecated TODO: This helper class will be removed soonly.
+ * For new indexes use {@link NumericUtils} instead, which provides a sortable
+ * binary representation (prefix encoded) of numeric values.
+ * To index and efficiently query numeric values use {@link NumericTokenStream}
+ * and {@link NumericRangeQuery}.
  */
 @Deprecated
 public class NumberUtils {

Added: lucene/java/trunk/src/java/org/apache/lucene/analysis/NumericTokenStream.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/NumericTokenStream.java?rev=786470&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/NumericTokenStream.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/NumericTokenStream.java Fri Jun 19 12:09:52 2009
@@ -0,0 +1,244 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.NumericUtils;
+import org.apache.lucene.search.NumericRangeQuery; // for javadocs
+import org.apache.lucene.search.NumericRangeFilter; // for javadocs
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+
+/**
+ * This class provides a {@link TokenStream} for indexing numeric values
+ * that can be used by {@link NumericRangeQuery}/{@link NumericRangeFilter}.
+ * For more information, how to use this class and its configuration properties
+ * (<a href="../search/NumericRangeQuery.html#precisionStepDesc"><code>precisionStep</code></a>)
+ * read the docs of {@link NumericRangeQuery}.
+ *
+ * <p>This stream is not intended to be used in analyzers, its more for iterating the
+ * different precisions during indexing a specific numeric value.
+ * A numeric value is indexed as multiple string encoded terms, each reduced
+ * by zeroing bits from the right. Each value is also prefixed (in the first char) by the
+ * <code>shift</code> value (number of bits removed) used during encoding.
+ * The number of bits removed from the right for each trie entry is called
+ * <code>precisionStep</code> in this API.
+ *
+ * <p>The usage pattern is (it is recommened to switch off norms and term frequencies
+ * for numeric fields; it does not make sense to have them):
+ * <pre>
+ *  Field field = new Field(name, new NumericTokenStream(precisionStep).set<em>???</em>Value(value));
+ *  field.setOmitNorms(true);
+ *  field.setOmitTermFreqAndPositions(true);
+ *  document.add(field);
+ * </pre>
+ * <p>For optimal performance, re-use the TokenStream and Field instance
+ * for more than one document:
+ * <pre>
+ *  <em>// init</em>
+ *  NumericTokenStream stream = new NumericTokenStream(precisionStep);
+ *  Field field = new Field(name, stream);
+ *  field.setOmitNorms(true);
+ *  field.setOmitTermFreqAndPositions(true);
+ *  Document doc = new Document();
+ *  document.add(field);
+ *  <em>// use this code to index many documents:</em>
+ *  stream.set<em>???</em>Value(value1)
+ *  writer.addDocument(document);
+ *  stream.set<em>???</em>Value(value2)
+ *  writer.addDocument(document);
+ *  ...
+ * </pre>
+ * <p><em>Please note:</em> Token streams are read, when the document is added to index.
+ * If you index more than one numeric field, use a separate instance for each.
+ *
+ * <p>Values indexed by this stream can be sorted on or loaded into the field cache.
+ * For that factories like {@link NumericUtils#getLongSortField} are available,
+ * as well as parsers for filling the field cache (e.g., {@link NumericUtils#FIELD_CACHE_LONG_PARSER})
+ *
+ * @since 2.9
+ */
+public final class NumericTokenStream extends TokenStream {
+
+  /** The full precision 64 bit token gets this token type assigned. */
+  public static final String TOKEN_TYPE_FULL_PREC_64  = "fullPrecNumeric64";
+
+  /** The lower precision 64 bit tokens gets this token type assigned. */
+  public static final String TOKEN_TYPE_LOWER_PREC_64 = "lowerPrecNumeric64";
+
+  /** The full precision 32 bit token gets this token type assigned. */
+  public static final String TOKEN_TYPE_FULL_PREC_32  = "fullPrecNumeric32";
+
+  /** The lower precision 32 bit tokens gets this token type assigned. */
+  public static final String TOKEN_TYPE_LOWER_PREC_32 = "lowerPrecNumeric32";
+
+  /**
+   * Creates a token stream for numeric values. The stream is not yet initialized,
+   * before using set a value using the various set<em>???</em>Value() methods.
+   */
+  public NumericTokenStream(final int precisionStep) {
+    this.precisionStep = precisionStep;
+    termAtt = (TermAttribute) addAttribute(TermAttribute.class);
+    typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
+    posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
+  }
+
+  /**
+   * Initializes the token stream with the supplied <code>long</code> value.
+   * @param value the value, for which this TokenStream should enumerate tokens.
+   * @return this instance, because of this you can use it the following way:
+   * <code>new Field(name, new NumericTokenStream(precisionStep).setLongValue(value))</code>
+   */
+  public NumericTokenStream setLongValue(final long value) {
+    this.value = value;
+    valSize = 64;
+    shift = 0;
+    return this;
+  }
+  
+  /**
+   * Initializes the token stream with the supplied <code>int</code> value.
+   * @param value the value, for which this TokenStream should enumerate tokens.
+   * @return this instance, because of this you can use it the following way:
+   * <code>new Field(name, new NumericTokenStream(precisionStep).setIntValue(value))</code>
+   */
+  public NumericTokenStream setIntValue(final int value) {
+    this.value = (long) value;
+    valSize = 32;
+    shift = 0;
+    return this;
+  }
+  
+  /**
+   * Initializes the token stream with the supplied <code>double</code> value.
+   * @param value the value, for which this TokenStream should enumerate tokens.
+   * @return this instance, because of this you can use it the following way:
+   * <code>new Field(name, new NumericTokenStream(precisionStep).setDoubleValue(value))</code>
+   */
+  public NumericTokenStream setDoubleValue(final double value) {
+    this.value = NumericUtils.doubleToSortableLong(value);
+    valSize = 64;
+    shift = 0;
+    return this;
+  }
+  
+  /**
+   * Initializes the token stream with the supplied <code>float</code> value.
+   * @param value the value, for which this TokenStream should enumerate tokens.
+   * @return this instance, because of this you can use it the following way:
+   * <code>new Field(name, new NumericTokenStream(precisionStep).setFloatValue(value))</code>
+   */
+  public NumericTokenStream setFloatValue(final float value) {
+    this.value = (long) NumericUtils.floatToSortableInt(value);
+    valSize = 32;
+    shift = 0;
+    return this;
+  }
+  
+  // @Override
+  public void reset() {
+    if (valSize == 0)
+      throw new IllegalStateException("call set???Value() before usage");
+    if (precisionStep < 1 || precisionStep > valSize)
+      throw new IllegalArgumentException("precisionStep may only be 1.."+valSize);
+    shift = 0;
+  }
+
+  // @Override
+  public boolean incrementToken() {
+    if (valSize == 0)
+      throw new IllegalStateException("call set???Value() before usage");
+    if (shift >= valSize)
+      return false;
+
+    final char[] buffer;
+    switch (valSize) {
+      case 64:
+        buffer = termAtt.resizeTermBuffer(NumericUtils.LONG_BUF_SIZE);
+        termAtt.setTermLength(NumericUtils.longToPrefixCoded(value, shift, buffer));
+        typeAtt.setType((shift == 0) ? TOKEN_TYPE_FULL_PREC_64 : TOKEN_TYPE_LOWER_PREC_64);
+        break;
+      
+      case 32:
+        buffer = termAtt.resizeTermBuffer(NumericUtils.INT_BUF_SIZE);
+        termAtt.setTermLength(NumericUtils.intToPrefixCoded((int) value, shift, buffer));
+        typeAtt.setType((shift == 0) ? TOKEN_TYPE_FULL_PREC_32 : TOKEN_TYPE_LOWER_PREC_32);
+        break;
+      
+      default:
+        // should not happen
+        throw new IllegalArgumentException("valSize must be 32 or 64");
+    }
+    
+    posIncrAtt.setPositionIncrement((shift == 0) ? 1 : 0);
+    shift += precisionStep;
+    return true;
+  }
+
+  // @Override
+  /** @deprecated Will be removed in Lucene 3.0 */
+  public Token next(final Token reusableToken) {
+    assert reusableToken != null;
+    if (valSize == 0)
+      throw new IllegalStateException("call set???Value() before usage");
+    if (shift >= valSize)
+      return null;
+    
+    reusableToken.clear();
+
+    final char[] buffer;
+    switch (valSize) {
+      case 64:
+        buffer = reusableToken.resizeTermBuffer(NumericUtils.LONG_BUF_SIZE);
+        reusableToken.setTermLength(NumericUtils.longToPrefixCoded(value, shift, buffer));
+        reusableToken.setType((shift == 0) ? TOKEN_TYPE_FULL_PREC_64 : TOKEN_TYPE_LOWER_PREC_64);
+        break;
+      
+      case 32:
+        buffer = reusableToken.resizeTermBuffer(NumericUtils.INT_BUF_SIZE);
+        reusableToken.setTermLength(NumericUtils.intToPrefixCoded((int) value, shift, buffer));
+        reusableToken.setType((shift == 0) ? TOKEN_TYPE_FULL_PREC_32 : TOKEN_TYPE_LOWER_PREC_32);
+        break;
+      
+      default:
+        // should not happen
+        throw new IllegalArgumentException("valSize must be 32 or 64");
+    }
+
+    reusableToken.setPositionIncrement((shift == 0) ? 1 : 0);
+    shift += precisionStep;
+    return reusableToken;
+  }
+  
+  // @Override
+  public String toString() {
+    final StringBuffer sb = new StringBuffer("(numeric,valSize=").append(valSize);
+    sb.append(",precisionStep=").append(precisionStep).append(')');
+    return sb.toString();
+  }
+
+  // members
+  private final TermAttribute termAtt;
+  private final TypeAttribute typeAtt;
+  private final PositionIncrementAttribute posIncrAtt;
+  
+  private int shift = 0, valSize = 0; // valSize==0 means not initialized
+  private final int precisionStep;
+  
+  private long value = 0L;
+}

Propchange: lucene/java/trunk/src/java/org/apache/lucene/analysis/NumericTokenStream.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/trunk/src/java/org/apache/lucene/document/DateField.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/document/DateField.java?rev=786470&r1=786469&r2=786470&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/document/DateField.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/document/DateField.java Fri Jun 19 12:09:52 2009
@@ -19,8 +19,14 @@
 
 import org.apache.lucene.search.PrefixQuery;
 import org.apache.lucene.search.RangeQuery;
+import org.apache.lucene.analysis.NumericTokenStream; // for javadocs
+import org.apache.lucene.search.NumericRangeQuery; // for javadocs
+import org.apache.lucene.util.NumericUtils; // for javadocs
 
 import java.util.Date;   // for javadoc
+import java.util.Calendar;   // for javadoc
+
+// do not remove in 3.0, needed for reading old indexes!
 
 /**
  * Provides support for converting dates to strings and vice-versa.
@@ -38,7 +44,18 @@
  * indexed when using this class. See {@link DateTools} for an
  * alternative without such a limitation.
  *
- * @deprecated If you build a new index, use {@link DateTools} instead. This class is included for use with existing
+ * <P>
+ * Another approach is {@link NumericUtils}, which provides
+ * a sortable binary representation (prefix encoded) of numeric values, which
+ * date/time are.
+ * For indexing a {@link Date} or {@link Calendar}, just get the unix timestamp as
+ * <code>long</code> using {@link Date#getTime} or {@link Calendar#getTimeInMillis} and
+ * index this as a numeric value with {@link NumericTokenStream}
+ * and use {@link NumericRangeQuery} to query it.
+ *
+ * @deprecated If you build a new index, use {@link DateTools} or 
+ * {@link NumericTokenStream} instead.
+ * This class is included for use with existing
  * indices and will be removed in a future release.
  */
 public class DateField {

Modified: lucene/java/trunk/src/java/org/apache/lucene/document/DateTools.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/document/DateTools.java?rev=786470&r1=786469&r2=786470&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/document/DateTools.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/document/DateTools.java Fri Jun 19 12:09:52 2009
@@ -22,6 +22,9 @@
 import java.util.Calendar;
 import java.util.Date;
 import java.util.TimeZone;
+import org.apache.lucene.analysis.NumericTokenStream; // for javadocs
+import org.apache.lucene.search.NumericRangeQuery; // for javadocs
+import org.apache.lucene.util.NumericUtils; // for javadocs
 
 /**
  * Provides support for converting dates to strings and vice-versa.
@@ -36,6 +39,15 @@
  * <P>Compared to {@link DateField} the strings generated by the methods
  * in this class take slightly more space, unless your selected resolution
  * is set to <code>Resolution.DAY</code> or lower.
+ *
+ * <P>
+ * Another approach is {@link NumericUtils}, which provides
+ * a sortable binary representation (prefix encoded) of numeric values, which
+ * date/time are.
+ * For indexing a {@link Date} or {@link Calendar}, just get the unix timestamp as
+ * <code>long</code> using {@link Date#getTime} or {@link Calendar#getTimeInMillis} and
+ * index this as a numeric value with {@link NumericTokenStream}
+ * and use {@link NumericRangeQuery} to query it.
  */
 public class DateTools {
   

Modified: lucene/java/trunk/src/java/org/apache/lucene/document/NumberTools.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/document/NumberTools.java?rev=786470&r1=786469&r2=786470&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/document/NumberTools.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/document/NumberTools.java Fri Jun 19 12:09:52 2009
@@ -17,6 +17,11 @@
  * limitations under the License.
  */
 
+import org.apache.lucene.analysis.NumericTokenStream; // for javadocs
+import org.apache.lucene.search.NumericRangeQuery; // for javadocs
+import org.apache.lucene.util.NumericUtils; // for javadocs
+
+// do not remove this class in 3.0, it may be needed to decode old indexes!
 
 /**
  * Provides support for converting longs to Strings, and back again. The strings
@@ -31,7 +36,13 @@
  * This class handles <b>all</b> long values (unlike
  * {@link org.apache.lucene.document.DateField}).
  * 
- * 
+ * @deprecated For new indexes use {@link NumericUtils} instead, which
+ * provides a sortable binary representation (prefix encoded) of numeric
+ * values.
+ * To index and efficiently query numeric values use {@link NumericTokenStream}
+ * and {@link NumericRangeQuery}.
+ * This class is included for use with existing
+ * indices and will be removed in a future release.
  */
 public class NumberTools {
 

Added: lucene/java/trunk/src/java/org/apache/lucene/search/NumericRangeFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/NumericRangeFilter.java?rev=786470&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/NumericRangeFilter.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/NumericRangeFilter.java Fri Jun 19 12:09:52 2009
@@ -0,0 +1,122 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.NumericTokenStream; // for javadocs
+
+/**
+ * Implementation of a {@link Filter} that implements <em>trie-based</em> range filtering
+ * for numeric values. For more information about the algorithm look into the docs of
+ * {@link NumericRangeQuery}.
+ *
+ * <p>This filter depends on a specific structure of terms in the index that can only be created
+ * by indexing using {@link NumericTokenStream}.
+ *
+ * <p><b>Please note:</b> This class has no constructor, you can create filters depending on the data type
+ * by using the static factories {@linkplain #newLongRange NumericRangeFilter.newLongRange()},
+ * {@linkplain #newIntRange NumericRangeFilter.newIntRange()}, {@linkplain #newDoubleRange NumericRangeFilter.newDoubleRange()},
+ * and {@linkplain #newFloatRange NumericRangeFilter.newFloatRange()}, e.g.:
+ * <pre>
+ * Filter f = NumericRangeFilter.newFloatRange(field, <a href="NumericRangeQuery.html#precisionStepDesc">precisionStep</a>,
+ *                                             new Float(0.3f), new Float(0.10f),
+ *                                             true, true);
+ * </pre>
+ * @since 2.9
+ **/
+public final class NumericRangeFilter extends MultiTermQueryWrapperFilter {
+
+  private NumericRangeFilter(final NumericRangeQuery query) {
+    super(query);
+  }
+  
+  /**
+   * Factory that creates a <code>NumericRangeFilter</code>, that filters a <code>long</code>
+   * range using the given <a href="NumericRangeQuery.html#precisionStepDesc"><code>precisionStep</code></a>.
+   * You can have half-open ranges (which are in fact &lt;/&le; or &gt;/&ge; queries)
+   * by setting the min or max value to <code>null</code>. By setting inclusive to false, it will
+   * match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+   */
+  public static NumericRangeFilter newLongRange(final String field, final int precisionStep,
+    Long min, Long max, final boolean minInclusive, final boolean maxInclusive
+  ) {
+    return new NumericRangeFilter(
+      NumericRangeQuery.newLongRange(field, precisionStep, min, max, minInclusive, maxInclusive)
+    );
+  }
+  
+  /**
+   * Factory that creates a <code>NumericRangeFilter</code>, that filters a <code>int</code>
+   * range using the given <a href="NumericRangeQuery.html#precisionStepDesc"><code>precisionStep</code></a>.
+   * You can have half-open ranges (which are in fact &lt;/&le; or &gt;/&ge; queries)
+   * by setting the min or max value to <code>null</code>. By setting inclusive to false, it will
+   * match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+   */
+  public static NumericRangeFilter newIntRange(final String field, final int precisionStep,
+    Integer min, Integer max, final boolean minInclusive, final boolean maxInclusive
+  ) {
+    return new NumericRangeFilter(
+      NumericRangeQuery.newIntRange(field, precisionStep, min, max, minInclusive, maxInclusive)
+    );
+  }
+  
+  /**
+   * Factory that creates a <code>NumericRangeFilter</code>, that filters a <code>double</code>
+   * range using the given <a href="NumericRangeQuery.html#precisionStepDesc"><code>precisionStep</code></a>.
+   * You can have half-open ranges (which are in fact &lt;/&le; or &gt;/&ge; queries)
+   * by setting the min or max value to <code>null</code>. By setting inclusive to false, it will
+   * match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+   */
+  public static NumericRangeFilter newDoubleRange(final String field, final int precisionStep,
+    Double min, Double max, final boolean minInclusive, final boolean maxInclusive
+  ) {
+    return new NumericRangeFilter(
+      NumericRangeQuery.newDoubleRange(field, precisionStep, min, max, minInclusive, maxInclusive)
+    );
+  }
+  
+  /**
+   * Factory that creates a <code>NumericRangeFilter</code>, that filters a <code>float</code>
+   * range using the given <a href="NumericRangeQuery.html#precisionStepDesc"><code>precisionStep</code></a>.
+   * You can have half-open ranges (which are in fact &lt;/&le; or &gt;/&ge; queries)
+   * by setting the min or max value to <code>null</code>. By setting inclusive to false, it will
+   * match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+   */
+  public static NumericRangeFilter newFloatRange(final String field, final int precisionStep,
+    Float min, Float max, final boolean minInclusive, final boolean maxInclusive
+  ) {
+    return new NumericRangeFilter(
+      NumericRangeQuery.newFloatRange(field, precisionStep, min, max, minInclusive, maxInclusive)
+    );
+  }
+
+  /** Returns the field name for this filter */
+  public String getField() { return ((NumericRangeQuery)query).getField(); }
+
+  /** Returns <code>true</code> if the lower endpoint is inclusive */
+  public boolean includesMin() { return ((NumericRangeQuery)query).includesMin(); }
+  
+  /** Returns <code>true</code> if the upper endpoint is inclusive */
+  public boolean includesMax() { return ((NumericRangeQuery)query).includesMax(); }
+
+  /** Returns the lower value of this range filter */
+  public Number getMin() { return ((NumericRangeQuery)query).getMin(); }
+
+  /** Returns the upper value of this range filter */
+  public Number getMax() { return ((NumericRangeQuery)query).getMax(); }
+  
+}

Propchange: lucene/java/trunk/src/java/org/apache/lucene/search/NumericRangeFilter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/src/java/org/apache/lucene/search/NumericRangeQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/NumericRangeQuery.java?rev=786470&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/NumericRangeQuery.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/NumericRangeQuery.java Fri Jun 19 12:09:52 2009
@@ -0,0 +1,410 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.LinkedList;
+
+import org.apache.lucene.analysis.NumericTokenStream; // for javadocs
+import org.apache.lucene.util.NumericUtils;
+import org.apache.lucene.util.ToStringUtils;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+
+/**
+ * Implementation of a {@link Query} that implements <em>trie-based</em> range querying
+ * for numeric values.
+ *
+ * <h3>Usage</h3>
+ * <h4>Indexing</h4>
+ * Before numeric values can be queried, they must be indexed in a special way. You can do this
+ * by adding numeric fields to the index by specifying a {@link NumericTokenStream}.
+ * An important setting is the <a href="#precisionStepDesc"><code>precisionStep</code></a>, which specifies,
+ * how many different precisions per numeric value are indexed to speed up range queries.
+ * Lower values create more terms but speed up search, higher values create less terms, but
+ * slow down search. Suitable values are 2, 4, or 8. A good starting point to test is 4.
+ * For code examples see {@link NumericTokenStream}.
+ *
+ * <h4>Searching</h4>
+ * <p>This class has no constructor, you can create filters depending on the data type
+ * by using the static factories {@linkplain #newLongRange NumericRangeQuery.newLongRange()},
+ * {@linkplain #newIntRange NumericRangeQuery.newIntRange()}, {@linkplain #newDoubleRange NumericRangeQuery.newDoubleRange()},
+ * and {@linkplain #newFloatRange NumericRangeQuery.newFloatRange()}, e.g.:
+ * <pre>
+ * Filter f = NumericRangeQuery.newFloatRange(field, <a href="#precisionStepDesc">precisionStep</a>,
+ *                                            new Float(0.3f), new Float(0.10f),
+ *                                            true, true);
+ * </pre>
+ *
+ * <h3>How it works</h3>
+ *
+ * <p>See the publication about <a target="_blank" href="http://www.panfmp.org">panFMP</a>,
+ * where this algorithm was described (referred to as <code>TrieRangeQuery</code>):
+ *
+ * <blockquote><strong>Schindler, U, Diepenbroek, M</strong>, 2008.
+ * <em>Generic XML-based Framework for Metadata Portals.</em>
+ * Computers &amp; Geosciences 34 (12), 1947-1955.
+ * <a href="http://dx.doi.org/10.1016/j.cageo.2008.02.023"
+ * target="_blank">doi:10.1016/j.cageo.2008.02.023</a></blockquote>
+ *
+ * <p><em>A quote from this paper:</em> Because Apache Lucene is a full-text
+ * search engine and not a conventional database, it cannot handle numerical ranges
+ * (e.g., field value is inside user defined bounds, even dates are numerical values).
+ * We have developed an extension to Apache Lucene that stores
+ * the numerical values in a special string-encoded format with variable precision
+ * (all numerical values like doubles, longs, floats, and ints are converted to
+ * lexicographic sortable string representations and stored with different precisions
+ * (for a more detailed description of how the values are stored,
+ * see {@link NumericUtils}). A range is then divided recursively into multiple intervals for searching:
+ * The center of the range is searched only with the lowest possible precision in the <em>trie</em>,
+ * while the boundaries are matched more exactly. This reduces the number of terms dramatically.</p>
+ *
+ * <p>For the variant that stores long values in 8 different precisions (each reduced by 8 bits) that
+ * uses a lowest precision of 1 byte, the index contains only a maximum of 256 distinct values in the
+ * lowest precision. Overall, a range could consist of a theoretical maximum of
+ * <code>7*255*2 + 255 = 3825</code> distinct terms (when there is a term for every distinct value of an
+ * 8-byte-number in the index and the range covers almost all of them; a maximum of 255 distinct values is used
+ * because it would always be possible to reduce the full 256 values to one term with degraded precision).
+ * In practise, we have seen up to 300 terms in most cases (index with 500,000 metadata records
+ * and a uniform value distribution).</p>
+ *
+ * <a name="precisionStepDesc"><h3>Precision Step</h3>
+ * <p>You can choose any <code>precisionStep</code> when encoding values.
+ * Lower step values mean more precisions and so more terms in index (and index gets larger).
+ * On the other hand, the maximum number of terms to match reduces, which optimized query speed.
+ * The formula to calculate the maximum term count is:
+ * <pre>
+ *  n = [ (bitsPerValue/precisionStep - 1) * (2^precisionStep - 1 ) * 2 ] + (2^precisionStep - 1 )
+ * </pre>
+ * <p><em>(this formula is only correct, when <code>bitsPerValue/precisionStep</code> is an integer;
+ * in other cases, the value must be rounded up and the last summand must contain the modulo of the division as
+ * precision step)</em>.
+ * For longs stored using a precision step of 4, <code>n = 15*15*2 + 15 = 465</code>, and for a precision
+ * step of 2, <code>n = 31*3*2 + 3 = 189</code>. But the faster search speed is reduced by more seeking
+ * in the term enum of the index. Because of this, the ideal <code>precisionStep</code> value can only
+ * be found out by testing. <b>Important:</b> You can index with a lower precision step value and test search speed
+ * using a multiple of the original step value.</p>
+ *
+ * <p>This dramatically improves the performance of Apache Lucene with range queries, which
+ * are no longer dependent on the index size and the number of distinct values because there is
+ * an upper limit unrelated to either of these properties.</p>
+ *
+ * <p>Comparisions of the different types of RangeQueries on an index with about 500,000 docs showed
+ * that the old {@link RangeQuery} (with raised {@link BooleanQuery} clause count) took about 30-40
+ * secs to complete, {@link ConstantScoreRangeQuery} took 5 secs and executing
+ * this class took &lt;100ms to complete (on an Opteron64 machine, Java 1.5, 8 bit precision step).
+ * This query type was developed for a geographic portal, where the performance for
+ * e.g. bounding boxes or exact date/time stamps is important.</p>
+ *
+ * <p>The query is in {@linkplain #setConstantScoreRewrite constant score mode} per default.
+ * With precision steps of &le;4, this query can be run in conventional {@link BooleanQuery}
+ * rewrite mode without changing the max clause count.
+ * @since 2.9
+ **/
+public final class NumericRangeQuery extends MultiTermQuery {
+
+  private NumericRangeQuery(final String field, final int precisionStep, final int valSize,
+    Number min, Number max, final boolean minInclusive, final boolean maxInclusive
+  ) {
+    assert (valSize == 32 || valSize == 64);
+    if (precisionStep < 1 || precisionStep > valSize)
+      throw new IllegalArgumentException("precisionStep may only be 1.."+valSize);
+    this.field = field.intern();
+    this.precisionStep = precisionStep;
+    this.valSize = valSize;
+    this.min = min;
+    this.max = max;
+    this.minInclusive = minInclusive;
+    this.maxInclusive = maxInclusive;
+    setConstantScoreRewrite(true);
+  }
+  
+  /**
+   * Factory that creates a <code>NumericRangeQuery</code>, that queries a <code>long</code>
+   * range using the given <a href="#precisionStepDesc"><code>precisionStep</code></a>.
+   * You can have half-open ranges (which are in fact &lt;/&le; or &gt;/&ge; queries)
+   * by setting the min or max value to <code>null</code>. By setting inclusive to false, it will
+   * match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+   */
+  public static NumericRangeQuery newLongRange(final String field, final int precisionStep,
+    Long min, Long max, final boolean minInclusive, final boolean maxInclusive
+  ) {
+    return new NumericRangeQuery(field, precisionStep, 64, min, max, minInclusive, maxInclusive);
+  }
+  
+  /**
+   * Factory that creates a <code>NumericRangeQuery</code>, that queries a <code>int</code>
+   * range using the given <a href="#precisionStepDesc"><code>precisionStep</code></a>.
+   * You can have half-open ranges (which are in fact &lt;/&le; or &gt;/&ge; queries)
+   * by setting the min or max value to <code>null</code>. By setting inclusive to false, it will
+   * match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+   */
+  public static NumericRangeQuery newIntRange(final String field, final int precisionStep,
+    Integer min, Integer max, final boolean minInclusive, final boolean maxInclusive
+  ) {
+    return new NumericRangeQuery(field, precisionStep, 32, min, max, minInclusive, maxInclusive);
+  }
+  
+  /**
+   * Factory that creates a <code>NumericRangeQuery</code>, that queries a <code>double</code>
+   * range using the given <a href="#precisionStepDesc"><code>precisionStep</code></a>.
+   * You can have half-open ranges (which are in fact &lt;/&le; or &gt;/&ge; queries)
+   * by setting the min or max value to <code>null</code>. By setting inclusive to false, it will
+   * match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+   */
+  public static NumericRangeQuery newDoubleRange(final String field, final int precisionStep,
+    Double min, Double max, final boolean minInclusive, final boolean maxInclusive
+  ) {
+    return new NumericRangeQuery(field, precisionStep, 64, min, max, minInclusive, maxInclusive);
+  }
+  
+  /**
+   * Factory that creates a <code>NumericRangeQuery</code>, that queries a <code>float</code>
+   * range using the given <a href="#precisionStepDesc"><code>precisionStep</code></a>.
+   * You can have half-open ranges (which are in fact &lt;/&le; or &gt;/&ge; queries)
+   * by setting the min or max value to <code>null</code>. By setting inclusive to false, it will
+   * match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+   */
+  public static NumericRangeQuery newFloatRange(final String field, final int precisionStep,
+    Float min, Float max, final boolean minInclusive, final boolean maxInclusive
+  ) {
+    return new NumericRangeQuery(field, precisionStep, 32, min, max, minInclusive, maxInclusive);
+  }
+  
+  //@Override
+  protected FilteredTermEnum getEnum(final IndexReader reader) throws IOException {
+    return new NumericRangeTermEnum(reader);
+  }
+
+  /** Returns the field name for this query */
+  public String getField() { return field; }
+
+  /** Returns <code>true</code> if the lower endpoint is inclusive */
+  public boolean includesMin() { return minInclusive; }
+  
+  /** Returns <code>true</code> if the upper endpoint is inclusive */
+  public boolean includesMax() { return maxInclusive; }
+
+  /** Returns the lower value of this range query */
+  public Number getMin() { return min; }
+
+  /** Returns the upper value of this range query */
+  public Number getMax() { return max; }
+  
+  //@Override
+  public String toString(final String field) {
+    final StringBuffer sb = new StringBuffer();
+    if (!this.field.equals(field)) sb.append(this.field).append(':');
+    return sb.append(minInclusive ? '[' : '{')
+      .append((min == null) ? "*" : min.toString())
+      .append(" TO ")
+      .append((max == null) ? "*" : max.toString())
+      .append(maxInclusive ? ']' : '}')
+      .append(ToStringUtils.boost(getBoost()))
+      .toString();
+  }
+
+  //@Override
+  public final boolean equals(final Object o) {
+    if (o==this) return true;
+    if (o==null) return false;
+    if (o instanceof NumericRangeQuery) {
+      final NumericRangeQuery q=(NumericRangeQuery)o;
+      return (
+        field==q.field &&
+        (q.min == null ? min == null : q.min.equals(min)) &&
+        (q.max == null ? max == null : q.max.equals(max)) &&
+        minInclusive == q.minInclusive &&
+        maxInclusive == q.maxInclusive &&
+        precisionStep == q.precisionStep &&
+        getBoost() == q.getBoost()
+      );
+    }
+    return false;
+  }
+
+  //@Override
+  public final int hashCode() {
+    int hash = Float.floatToIntBits(getBoost()) ^ field.hashCode();
+    hash += precisionStep^0x64365465;
+    if (min != null) hash += min.hashCode()^0x14fa55fb;
+    if (max != null) hash += max.hashCode()^0x733fa5fe;
+    return hash+
+      (Boolean.valueOf(minInclusive).hashCode()^0x14fa55fb)+
+      (Boolean.valueOf(maxInclusive).hashCode()^0x733fa5fe);
+  }
+  
+  // members (package private, to be also fast accessible by NumericRangeTermEnum)
+  final String field;
+  final int precisionStep, valSize;
+  final Number min, max;
+  final boolean minInclusive,maxInclusive;
+
+  /**
+   * Subclass of FilteredTermEnum for enumerating all terms that match the
+   * sub-ranges for trie range queries.
+   * <p>
+   * WARNING: This term enumeration is not guaranteed to be always ordered by
+   * {@link Term#compareTo}.
+   * The ordering depends on how {@link NumericUtils#splitLongRange} and
+   * {@link NumericUtils#splitIntRange} generates the sub-ranges. For
+   * {@link MultiTermQuery} ordering is not relevant.
+   */
+  private final class NumericRangeTermEnum extends FilteredTermEnum {
+
+    private final IndexReader reader;
+    private final LinkedList/*<String>*/ rangeBounds = new LinkedList/*<String>*/();
+    private String currentUpperBound = null;
+
+    NumericRangeTermEnum(final IndexReader reader) throws IOException {
+      this.reader = reader;
+      
+      switch (valSize) {
+        case 64: {
+          // lower
+          long minBound = Long.MIN_VALUE;
+          if (min instanceof Long) {
+            minBound = min.longValue();
+          } else if (min instanceof Double) {
+            minBound = NumericUtils.doubleToSortableLong(min.doubleValue());
+          }
+          if (!minInclusive && min != null) minBound++;
+          
+          // upper
+          long maxBound = Long.MAX_VALUE;
+          if (max instanceof Long) {
+            maxBound = max.longValue();
+          } else if (max instanceof Double) {
+            maxBound = NumericUtils.doubleToSortableLong(max.doubleValue());
+          }
+          if (!maxInclusive && max != null) maxBound--;
+          
+          NumericUtils.splitLongRange(new NumericUtils.LongRangeBuilder() {
+            //@Override
+            public final void addRange(String minPrefixCoded, String maxPrefixCoded) {
+              rangeBounds.add(minPrefixCoded);
+              rangeBounds.add(maxPrefixCoded);
+            }
+          }, precisionStep, minBound, maxBound);
+          break;
+        }
+          
+        case 32: {
+          // lower
+          int minBound = Integer.MIN_VALUE;
+          if (min instanceof Integer) {
+            minBound = min.intValue();
+          } else if (min instanceof Float) {
+            minBound = NumericUtils.floatToSortableInt(min.floatValue());
+          }
+          if (!minInclusive && min != null) minBound++;
+          
+          // upper
+          int maxBound = Integer.MAX_VALUE;
+          if (max instanceof Integer) {
+            maxBound = max.intValue();
+          } else if (max instanceof Float) {
+            maxBound = NumericUtils.floatToSortableInt(max.floatValue());
+          }
+          if (!maxInclusive && max != null) maxBound--;
+          
+          NumericUtils.splitIntRange(new NumericUtils.IntRangeBuilder() {
+            //@Override
+            public final void addRange(String minPrefixCoded, String maxPrefixCoded) {
+              rangeBounds.add(minPrefixCoded);
+              rangeBounds.add(maxPrefixCoded);
+            }
+          }, precisionStep, minBound, maxBound);
+          break;
+        }
+          
+        default:
+          // should never happen
+          throw new IllegalArgumentException("valSize must be 32 or 64");
+      }
+      
+      // seek to first term
+      next();
+    }
+
+    //@Override
+    public float difference() {
+      return 1.0f;
+    }
+    
+    /** this is a dummy, it is not used by this class. */
+    //@Override
+    protected boolean endEnum() {
+      assert false; // should never be called
+      return (currentTerm != null);
+    }
+
+    /**
+     * Compares if current upper bound is reached,
+     * this also updates the term count for statistics.
+     * In contrast to {@link FilteredTermEnum}, a return value
+     * of <code>false</code> ends iterating the current enum
+     * and forwards to the next sub-range.
+     */
+    //@Override
+    protected boolean termCompare(Term term) {
+      return (term.field() == field && term.text().compareTo(currentUpperBound) <= 0);
+    }
+    
+    /** Increments the enumeration to the next element.  True if one exists. */
+    //@Override
+    public boolean next() throws IOException {
+      // if a current term exists, the actual enum is initialized:
+      // try change to next term, if no such term exists, fall-through
+      if (currentTerm != null) {
+        assert actualEnum!=null;
+        if (actualEnum.next()) {
+          currentTerm = actualEnum.term();
+          if (termCompare(currentTerm)) return true;
+        }
+      }
+      // if all above fails, we go forward to the next enum,
+      // if one is available
+      currentTerm = null;
+      if (rangeBounds.size() < 2) return false;
+      // close the current enum and read next bounds
+      if (actualEnum != null) {
+        actualEnum.close();
+        actualEnum = null;
+      }
+      final String lowerBound = (String)rangeBounds.removeFirst();
+      this.currentUpperBound = (String)rangeBounds.removeFirst();
+      // this call recursively uses next(), if no valid term in
+      // next enum found.
+      // if this behavior is changed/modified in the superclass,
+      // this enum will not work anymore!
+      setEnum(reader.terms(new Term(field, lowerBound)));
+      return (currentTerm != null);
+    }
+
+    /** Closes the enumeration to further activity, freeing resources.  */
+    //@Override
+    public void close() throws IOException {
+      rangeBounds.clear();
+      currentUpperBound = null;
+      super.close();
+    }
+
+  }
+  
+}

Propchange: lucene/java/trunk/src/java/org/apache/lucene/search/NumericRangeQuery.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/trunk/src/java/org/apache/lucene/search/RangeFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/RangeFilter.java?rev=786470&r1=786469&r2=786470&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/RangeFilter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/RangeFilter.java Fri Jun 19 12:09:52 2009
@@ -22,8 +22,12 @@
 /**
  * A Filter that restricts search results to a range of values in a given
  * field.
- * 
- * If you construct a large number of range filters with different ranges but on the 
+ *
+ * <p>This filter matches the documents looking for terms that fall into the
+ * supplied range according to {@link String#compareTo(String)}. It is not intended
+ * for numerical ranges, use {@link NumericRangeFilter} instead.
+ *
+ * <p>If you construct a large number of range filters with different ranges but on the 
  * same field, {@link FieldCacheRangeFilter} may have significantly better performance. 
  */
 public class RangeFilter extends MultiTermQueryWrapperFilter {

Modified: lucene/java/trunk/src/java/org/apache/lucene/search/RangeQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/RangeQuery.java?rev=786470&r1=786469&r2=786470&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/RangeQuery.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/RangeQuery.java Fri Jun 19 12:09:52 2009
@@ -26,7 +26,11 @@
 /**
  * A Query that matches documents within an exclusive range.
  *
- * See {@link MultiTermQuery#setConstantScoreRewrite} for the tradeoffs between
+ * <p>This query matches the documents looking for terms that fall into the
+ * supplied range according to {@link String#compareTo(String)}. It is not intended
+ * for numerical ranges, use {@link NumericRangeQuery} instead.
+ *
+ * <p>See {@link MultiTermQuery#setConstantScoreRewrite} for the tradeoffs between
  * enabling and disabling constantScoreRewrite mode.
  */
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/search/package.html
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/package.html?rev=786470&r1=786469&r2=786470&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/package.html (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/package.html Fri Jun 19 12:09:52 2009
@@ -136,6 +136,7 @@
         </li>
     </ol>
 </p>
+
 <h4>
     <a href="RangeQuery.html">RangeQuery</a>
 </h4>
@@ -147,12 +148,28 @@
     <a href="../index/Term.html">Term</a>
     and an upper
     <a href="../index/Term.html">Term</a>.
+    according to {@link java.lang.String#compareTo(String)}. It is not intended
+    for numerical ranges, use <a href="NumericRangeQuery.html">NumericRangeQuery</a> instead.
+
     For example, one could find all documents
     that have terms beginning with the letters <tt>a</tt> through <tt>c</tt>. This type of <a
         href="Query.html">Query</a> is frequently used to
     find
     documents that occur in a specific date range.
 </p>
+
+<h4>
+    <a href="NumericRangeQuery.html">NumericRangeQuery</a>
+</h4>
+
+<p>The
+    <a href="NumericRangeQuery.html">NumericRangeQuery</a>
+    matches all documents that occur in a numeric range.
+    For NumericRangeQuery to work, you must index the values
+    using a special <a href="../analysis/NumericTokenStream.html">
+    NumericTokenStream</a>.
+</p>
+
 <h4>
     <a href="PrefixQuery.html">PrefixQuery</a>,
     <a href="WildcardQuery.html">WildcardQuery</a>

Added: lucene/java/trunk/src/java/org/apache/lucene/util/NumericUtils.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/util/NumericUtils.java?rev=786470&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/util/NumericUtils.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/util/NumericUtils.java Fri Jun 19 12:09:52 2009
@@ -0,0 +1,503 @@
+package org.apache.lucene.util;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.NumericTokenStream; // for javadocs
+import org.apache.lucene.search.NumericRangeQuery; // for javadocs
+import org.apache.lucene.search.NumericRangeFilter; // for javadocs
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.FieldCache;
+import org.apache.lucene.search.ExtendedFieldCache;
+
+/**
+ * This is a helper class to generate prefix-encoded representations for numerical values
+ * and supplies converters to represent float/double values as sortable integers/longs.
+ *
+ * <p>To quickly execute range queries in Apache Lucene, a range is divided recursively
+ * into multiple intervals for searching: The center of the range is searched only with
+ * the lowest possible precision in the trie, while the boundaries are matched
+ * more exactly. This reduces the number of terms dramatically.
+ *
+ * <p>This class generates terms to achive this: First the numerical integer values need to
+ * be converted to strings. For that integer values (32 bit or 64 bit) are made unsigned
+ * and the bits are converted to ASCII chars with each 7 bit. The resulting string is
+ * sortable like the original integer value. Each value is also prefixed
+ * (in the first char) by the <code>shift</code> value (number of bits removed) used
+ * during encoding.
+ *
+ * <p>To also index floating point numbers, this class supplies two methods to convert them
+ * to integer values by changing their bit layout: {@link #doubleToSortableLong},
+ * {@link #floatToSortableInt}. You will have no precision loss by
+ * converting floating point numbers to integers and back (only that the integer form
+ * is not usable). Other data types like dates can easily converted to longs or ints (e.g.
+ * date to long: {@link java.util.Date#getTime}).
+ *
+ * <p>For easy usage, the trie algorithm is implemented for indexing inside
+ * {@link NumericTokenStream} that can index <code>int</code>, <code>long</code>,
+ * <code>float</code>, and <code>double</code>. For querying,
+ * {@link NumericRangeQuery} and {@link NumericRangeFilter} implement the query part
+ * for the same data types.
+ *
+ * <p>This class can also be used, to generate lexicographically sortable (according
+ * {@link String#compareTo(String)}) representations of numeric data types for other
+ * usages (e.g. sorting).
+ *
+ * <p>Prefix encoded fields can also be sorted using the {@link SortField} factories
+ * {@link #getLongSortField}, {@link #getIntSortField}, {@link #getDoubleSortField}
+ * or {@link #getFloatSortField}.
+ * @since 2.9
+ */
+public final class NumericUtils {
+
+  private NumericUtils() {} // no instance!
+
+  /**
+   * Longs are stored at lower precision by shifting off lower bits. The shift count is
+   * stored as <code>SHIFT_START_LONG+shift</code> in the first character
+   */
+  public static final char SHIFT_START_LONG = (char)0x20;
+
+  /**
+   * Expert: The maximum term length (used for <code>char[]</code> buffer size)
+   * for encoding <code>long</code> values.
+   * @see #longToPrefixCoded(long,int,char[])
+   */
+  public static final int LONG_BUF_SIZE = 63/7 + 2;
+
+  /**
+   * Integers are stored at lower precision by shifting off lower bits. The shift count is
+   * stored as <code>SHIFT_START_INT+shift</code> in the first character
+   */
+  public static final char SHIFT_START_INT  = (char)0x60;
+
+  /**
+   * Expert: The maximum term length (used for <code>char[]</code> buffer size)
+   * for encoding <code>int</code> values.
+   * @see #intToPrefixCoded(int,int,char[])
+   */
+  public static final int INT_BUF_SIZE = 31/7 + 2;
+
+  /**
+   * A parser instance for filling a {@link ExtendedFieldCache}, that parses prefix encoded fields as longs.
+   */
+  public static final ExtendedFieldCache.LongParser FIELD_CACHE_LONG_PARSER=new ExtendedFieldCache.LongParser(){
+    public final long parseLong(final String val) {
+      final int shift = val.charAt(0)-SHIFT_START_LONG;
+      if (shift>0 && shift<=63)
+        throw new FieldCache.StopFillCacheException();
+      return prefixCodedToLong(val);
+    }
+  };
+  
+  /**
+   * A parser instance for filling a {@link FieldCache}, that parses prefix encoded fields as ints.
+   */
+  public static final FieldCache.IntParser FIELD_CACHE_INT_PARSER=new FieldCache.IntParser(){
+    public final int parseInt(final String val) {
+      final int shift = val.charAt(0)-SHIFT_START_INT;
+      if (shift>0 && shift<=31)
+        throw new FieldCache.StopFillCacheException();
+      return prefixCodedToInt(val);
+    }
+  };
+
+  /**
+   * A parser instance for filling a {@link ExtendedFieldCache}, that parses prefix encoded fields as doubles.
+   * This uses {@link #sortableLongToDouble} to convert the encoded long to a double.
+   */
+  public static final ExtendedFieldCache.DoubleParser FIELD_CACHE_DOUBLE_PARSER=new ExtendedFieldCache.DoubleParser(){
+    public final double parseDouble(final String val) {
+      final int shift = val.charAt(0)-SHIFT_START_LONG;
+      if (shift>0 && shift<=63)
+        throw new FieldCache.StopFillCacheException();
+      return sortableLongToDouble(prefixCodedToLong(val));
+    }
+  };
+  
+  /**
+   * A parser instance for filling a {@link FieldCache}, that parses prefix encoded fields as floats.
+   * This uses {@link #sortableIntToFloat} to convert the encoded int to a float.
+   */
+  public static final FieldCache.FloatParser FIELD_CACHE_FLOAT_PARSER=new FieldCache.FloatParser(){
+    public final float parseFloat(final String val) {
+      final int shift = val.charAt(0)-SHIFT_START_INT;
+      if (shift>0 && shift<=31)
+        throw new FieldCache.StopFillCacheException();
+      return sortableIntToFloat(prefixCodedToInt(val));
+    }
+  };
+  
+  /**
+   * Expert: Returns prefix coded bits after reducing the precision by <code>shift</code> bits.
+   * This is method is used by {@link NumericTokenStream}.
+   * @param val the numeric value
+   * @param shift how many bits to strip from the right
+   * @param buffer that will contain the encoded chars, must be at least of {@link #LONG_BUF_SIZE}
+   * length
+   * @return number of chars written to buffer
+   */
+  public static int longToPrefixCoded(final long val, final int shift, final char[] buffer) {
+    int nChars = (63-shift)/7 + 1, len = nChars+1;
+    buffer[0] = (char)(SHIFT_START_LONG + shift);
+    long sortableBits = val ^ 0x8000000000000000L;
+    sortableBits >>>= shift;
+    while (nChars>=1) {
+      // Store 7 bits per character for good efficiency when UTF-8 encoding.
+      // The whole number is right-justified so that lucene can prefix-encode
+      // the terms more efficiently.
+      buffer[nChars--] = (char)(sortableBits & 0x7f);
+      sortableBits >>>= 7;
+    }
+    return len;
+  }
+
+  /**
+   * Expert: Returns prefix coded bits after reducing the precision by <code>shift</code> bits.
+   * This is method is used by {@link LongRangeBuilder}.
+   * @param val the numeric value
+   * @param shift how many bits to strip from the right
+   */
+  public static String longToPrefixCoded(final long val, final int shift) {
+    if (shift>63 || shift<0)
+      throw new IllegalArgumentException("Illegal shift value, must be 0..63");
+    final char[] buffer = new char[LONG_BUF_SIZE];
+    final int len = longToPrefixCoded(val, shift, buffer);
+    return new String(buffer, 0, len);
+  }
+
+  /**
+   * This is a convenience method, that returns prefix coded bits of a long without
+   * reducing the precision. It can be used to store the full precision value as a
+   * stored field in index.
+   * <p>To decode, use {@link #prefixCodedToLong}.
+   */
+  public static String longToPrefixCoded(final long val) {
+    return longToPrefixCoded(val, 0);
+  }
+  
+  /**
+   * Expert: Returns prefix coded bits after reducing the precision by <code>shift</code> bits.
+   * This is method is used by {@link NumericTokenStream}.
+   * @param val the numeric value
+   * @param shift how many bits to strip from the right
+   * @param buffer that will contain the encoded chars, must be at least of {@link #INT_BUF_SIZE}
+   * length
+   * @return number of chars written to buffer
+   */
+  public static int intToPrefixCoded(final int val, final int shift, final char[] buffer) {
+    int nChars = (31-shift)/7 + 1, len = nChars+1;
+    buffer[0] = (char)(SHIFT_START_INT + shift);
+    int sortableBits = val ^ 0x80000000;
+    sortableBits >>>= shift;
+    while (nChars>=1) {
+      // Store 7 bits per character for good efficiency when UTF-8 encoding.
+      // The whole number is right-justified so that lucene can prefix-encode
+      // the terms more efficiently.
+      buffer[nChars--] = (char)(sortableBits & 0x7f);
+      sortableBits >>>= 7;
+    }
+    return len;
+  }
+
+  /**
+   * Expert: Returns prefix coded bits after reducing the precision by <code>shift</code> bits.
+   * This is method is used by {@link IntRangeBuilder}.
+   * @param val the numeric value
+   * @param shift how many bits to strip from the right
+   */
+  public static String intToPrefixCoded(final int val, final int shift) {
+    if (shift>31 || shift<0)
+      throw new IllegalArgumentException("Illegal shift value, must be 0..31");
+    final char[] buffer = new char[INT_BUF_SIZE];
+    final int len = intToPrefixCoded(val, shift, buffer);
+    return new String(buffer, 0, len);
+  }
+
+  /**
+   * This is a convenience method, that returns prefix coded bits of an int without
+   * reducing the precision. It can be used to store the full precision value as a
+   * stored field in index.
+   * <p>To decode, use {@link #prefixCodedToInt}.
+   */
+  public static String intToPrefixCoded(final int val) {
+    return intToPrefixCoded(val, 0);
+  }
+
+  /**
+   * Returns a long from prefixCoded characters.
+   * Rightmost bits will be zero for lower precision codes.
+   * This method can be used to decode e.g. a stored field.
+   * @throws NumberFormatException if the supplied string is
+   * not correctly prefix encoded.
+   * @see #longToPrefixCoded(long)
+   */
+  public static long prefixCodedToLong(final String prefixCoded) {
+    final int shift = prefixCoded.charAt(0)-SHIFT_START_LONG;
+    if (shift>63 || shift<0)
+      throw new NumberFormatException("Invalid shift value in prefixCoded string (is encoded value really a LONG?)");
+    long sortableBits = 0L;
+    for (int i=1, len=prefixCoded.length(); i<len; i++) {
+      sortableBits <<= 7;
+      final char ch = prefixCoded.charAt(i);
+      if (ch>0x7f) {
+        throw new NumberFormatException(
+          "Invalid prefixCoded numerical value representation (char "+
+          Integer.toHexString((int)ch)+" at position "+i+" is invalid)"
+        );
+      }
+      sortableBits |= (long)ch;
+    }
+    return (sortableBits << shift) ^ 0x8000000000000000L;
+  }
+
+  /**
+   * Returns an int from prefixCoded characters.
+   * Rightmost bits will be zero for lower precision codes.
+   * This method can be used to decode e.g. a stored field.
+   * @throws NumberFormatException if the supplied string is
+   * not correctly prefix encoded.
+   * @see #intToPrefixCoded(int)
+   */
+  public static int prefixCodedToInt(final String prefixCoded) {
+    final int shift = prefixCoded.charAt(0)-SHIFT_START_INT;
+    if (shift>31 || shift<0)
+      throw new NumberFormatException("Invalid shift value in prefixCoded string (is encoded value really an INT?)");
+    int sortableBits = 0;
+    for (int i=1, len=prefixCoded.length(); i<len; i++) {
+      sortableBits <<= 7;
+      final char ch = prefixCoded.charAt(i);
+      if (ch>0x7f) {
+        throw new NumberFormatException(
+          "Invalid prefixCoded numerical value representation (char "+
+          Integer.toHexString((int)ch)+" at position "+i+" is invalid)"
+        );
+      }
+      sortableBits |= (int)ch;
+    }
+    return (sortableBits << shift) ^ 0x80000000;
+  }
+
+  /**
+   * Converts a <code>double</code> value to a sortable signed <code>long</code>.
+   * The value is converted by getting their IEEE 754 floating-point &quot;double format&quot;
+   * bit layout and then some bits are swapped, to be able to compare the result as long.
+   * By this the precision is not reduced, but the value can easily used as a long.
+   * @see #sortableLongToDouble
+   */
+  public static long doubleToSortableLong(double val) {
+    long f = Double.doubleToLongBits(val);
+    if (f<0) f ^= 0x7fffffffffffffffL;
+    return f;
+  }
+
+  /**
+   * Converts a sortable <code>long</code> back to a <code>double</code>.
+   * @see #doubleToSortableLong
+   */
+  public static double sortableLongToDouble(long val) {
+    if (val<0) val ^= 0x7fffffffffffffffL;
+    return Double.longBitsToDouble(val);
+  }
+
+  /**
+   * Converts a <code>float</code> value to a sortable signed <code>int</code>.
+   * The value is converted by getting their IEEE 754 floating-point &quot;float format&quot;
+   * bit layout and then some bits are swapped, to be able to compare the result as int.
+   * By this the precision is not reduced, but the value can easily used as an int.
+   * @see #sortableIntToFloat
+   */
+  public static int floatToSortableInt(float val) {
+    int f = Float.floatToIntBits(val);
+    if (f<0) f ^= 0x7fffffff;
+    return f;
+  }
+
+  /**
+   * Converts a sortable <code>int</code> back to a <code>float</code>.
+   * @see #floatToSortableInt
+   */
+  public static float sortableIntToFloat(int val) {
+    if (val<0) val ^= 0x7fffffff;
+    return Float.intBitsToFloat(val);
+  }
+
+  /** A factory method, that generates a {@link SortField} instance for sorting prefix encoded long values. */
+  public static SortField getLongSortField(final String field, final boolean reverse) {
+    return new SortField(field, FIELD_CACHE_LONG_PARSER, reverse);
+  }
+  
+  /** A factory method, that generates a {@link SortField} instance for sorting prefix encoded int values. */
+  public static SortField getIntSortField(final String field, final boolean reverse) {
+    return new SortField(field, FIELD_CACHE_INT_PARSER, reverse);
+  }
+
+  /** A factory method, that generates a {@link SortField} instance for sorting prefix encoded double values. */
+  public static SortField getDoubleSortField(final String field, final boolean reverse) {
+    return new SortField(field, FIELD_CACHE_DOUBLE_PARSER, reverse);
+  }
+  
+  /** A factory method, that generates a {@link SortField} instance for sorting prefix encoded float values. */
+  public static SortField getFloatSortField(final String field, final boolean reverse) {
+    return new SortField(field, FIELD_CACHE_FLOAT_PARSER, reverse);
+  }
+
+  /**
+   * Expert: Splits a long range recursively.
+   * You may implement a builder that adds clauses to a
+   * {@link org.apache.lucene.search.BooleanQuery} for each call to its
+   * {@link LongRangeBuilder#addRange(String,String)}
+   * method.
+   * <p>This method is used by {@link NumericRangeQuery}.
+   */
+  public static void splitLongRange(final LongRangeBuilder builder,
+    final int precisionStep,  final long minBound, final long maxBound
+  ) {
+    if (precisionStep<1 || precisionStep>64)
+      throw new IllegalArgumentException("precisionStep may only be 1..64");
+    splitRange(builder, 64, precisionStep, minBound, maxBound);
+  }
+  
+  /**
+   * Expert: Splits an int range recursively.
+   * You may implement a builder that adds clauses to a
+   * {@link org.apache.lucene.search.BooleanQuery} for each call to its
+   * {@link IntRangeBuilder#addRange(String,String)}
+   * method.
+   * <p>This method is used by {@link NumericRangeQuery}.
+   */
+  public static void splitIntRange(final IntRangeBuilder builder,
+    final int precisionStep,  final int minBound, final int maxBound
+  ) {
+    if (precisionStep<1 || precisionStep>32)
+      throw new IllegalArgumentException("precisionStep may only be 1..32");
+    splitRange(builder, 32, precisionStep, (long)minBound, (long)maxBound);
+  }
+  
+  /** This helper does the splitting for both 32 and 64 bit. */
+  private static void splitRange(
+    final Object builder, final int valSize,
+    final int precisionStep, long minBound, long maxBound
+  ) {
+    if (minBound > maxBound) return;
+    for (int shift=0; ; shift += precisionStep) {
+      // calculate new bounds for inner precision
+      final long diff = 1L << (shift+precisionStep),
+        mask = ((1L<<precisionStep) - 1L) << shift;
+      final boolean
+        hasLower = (minBound & mask) != 0L,
+        hasUpper = (maxBound & mask) != mask;
+      final long
+        nextMinBound = (hasLower ? (minBound + diff) : minBound) & ~mask,
+        nextMaxBound = (hasUpper ? (maxBound - diff) : maxBound) & ~mask;
+
+      if (shift+precisionStep>=valSize || nextMinBound>nextMaxBound) {
+        // We are in the lowest precision or the next precision is not available.
+        addRange(builder, valSize, minBound, maxBound, shift);
+        // exit the split recursion loop
+        break;
+      }
+      
+      if (hasLower)
+        addRange(builder, valSize, minBound, minBound | mask, shift);
+      if (hasUpper)
+        addRange(builder, valSize, maxBound & ~mask, maxBound, shift);
+      
+      // recurse to next precision
+      minBound = nextMinBound;
+      maxBound = nextMaxBound;
+    }
+  }
+  
+  /** Helper that delegates to correct range builder */
+  private static void addRange(
+    final Object builder, final int valSize,
+    long minBound, long maxBound,
+    final int shift
+  ) {
+    // for the max bound set all lower bits (that were shifted away):
+    // this is important for testing or other usages of the splitted range
+    // (e.g. to reconstruct the full range). The prefixEncoding will remove
+    // the bits anyway, so they do not hurt!
+    maxBound |= (1L << shift) - 1L;
+    // delegate to correct range builder
+    switch(valSize) {
+      case 64:
+        ((LongRangeBuilder)builder).addRange(minBound, maxBound, shift);
+        break;
+      case 32:
+        ((IntRangeBuilder)builder).addRange((int)minBound, (int)maxBound, shift);
+        break;
+      default:
+        // Should not happen!
+        throw new IllegalArgumentException("valSize must be 32 or 64.");
+    }
+  }
+
+  /**
+   * Expert: Callback for {@link #splitLongRange}.
+   * You need to overwrite only one of the methods.
+   * <p><font color="red">WARNING: This is a very low-level interface,
+   * the method signatures may change in later versions.</font>
+   */
+  public static abstract class LongRangeBuilder {
+    
+    /**
+     * Overwrite this method, if you like to receive the already prefix encoded range bounds.
+     * You can directly build classical (inclusive) range queries from them.
+     */
+    public void addRange(String minPrefixCoded, String maxPrefixCoded) {
+      throw new UnsupportedOperationException();
+    }
+    
+    /**
+     * Overwrite this method, if you like to receive the raw long range bounds.
+     * You can use this for e.g. debugging purposes (print out range bounds).
+     */
+    public void addRange(final long min, final long max, final int shift) {
+      addRange(longToPrefixCoded(min, shift), longToPrefixCoded(max, shift));
+    }
+  
+  }
+  
+  /**
+   * Expert: Callback for {@link #splitIntRange}.
+   * You need to overwrite only one of the methods.
+   * <p><font color="red">WARNING: This is a very low-level interface,
+   * the method signatures may change in later versions.</font>
+   */
+  public static abstract class IntRangeBuilder {
+    
+    /**
+     * Overwrite this method, if you like to receive the already prefix encoded range bounds.
+     * You can directly build classical range (inclusive) queries from them.
+     */
+    public void addRange(String minPrefixCoded, String maxPrefixCoded) {
+      throw new UnsupportedOperationException();
+    }
+    
+    /**
+     * Overwrite this method, if you like to receive the raw int range bounds.
+     * You can use this for e.g. debugging purposes (print out range bounds).
+     */
+    public void addRange(final int min, final int max, final int shift) {
+      addRange(intToPrefixCoded(min, shift), intToPrefixCoded(max, shift));
+    }
+  
+  }
+  
+}

Propchange: lucene/java/trunk/src/java/org/apache/lucene/util/NumericUtils.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java?rev=786470&view=auto
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java (added)
+++ lucene/java/trunk/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java Fri Jun 19 12:09:52 2009
@@ -0,0 +1,103 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.NumericUtils;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+
+public class TestNumericTokenStream extends LuceneTestCase {
+
+  static final int precisionStep = 8;
+  static final long lvalue = 4573245871874382L;
+  static final int ivalue = 123456;
+
+  public void testLongStreamNewAPI() throws Exception {
+    final NumericTokenStream stream=new NumericTokenStream(precisionStep).setLongValue(lvalue);
+    stream.setUseNewAPI(true);
+    // use getAttribute to test if attributes really exist, if not an IAE will be throwed
+    final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class);
+    for (int shift=0; shift<64; shift+=precisionStep) {
+      assertTrue("New token is available", stream.incrementToken());
+      assertEquals("Term is correctly encoded", NumericUtils.longToPrefixCoded(lvalue, shift), termAtt.term());
+    }
+    assertFalse("No more tokens available", stream.incrementToken());
+  }
+  
+  public void testLongStreamOldAPI() throws Exception {
+    final NumericTokenStream stream=new NumericTokenStream(precisionStep).setLongValue(lvalue);
+    stream.setUseNewAPI(false);
+    Token tok=new Token();
+    for (int shift=0; shift<64; shift+=precisionStep) {
+      assertNotNull("New token is available", tok=stream.next(tok));
+      assertEquals("Term is correctly encoded", NumericUtils.longToPrefixCoded(lvalue, shift), tok.term());
+    }
+    assertNull("No more tokens available", stream.next(tok));
+  }
+
+  public void testIntStreamNewAPI() throws Exception {
+    final NumericTokenStream stream=new NumericTokenStream(precisionStep).setIntValue(ivalue);
+    stream.setUseNewAPI(true);
+    // use getAttribute to test if attributes really exist, if not an IAE will be throwed
+    final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class);
+    for (int shift=0; shift<32; shift+=precisionStep) {
+      assertTrue("New token is available", stream.incrementToken());
+      assertEquals("Term is correctly encoded", NumericUtils.intToPrefixCoded(ivalue, shift), termAtt.term());
+    }
+    assertFalse("No more tokens available", stream.incrementToken());
+  }
+  
+  public void testIntStreamOldAPI() throws Exception {
+    final NumericTokenStream stream=new NumericTokenStream(precisionStep).setIntValue(ivalue);
+    stream.setUseNewAPI(false);
+    Token tok=new Token();
+    for (int shift=0; shift<32; shift+=precisionStep) {
+      assertNotNull("New token is available", tok=stream.next(tok));
+      assertEquals("Term is correctly encoded", NumericUtils.intToPrefixCoded(ivalue, shift), tok.term());
+    }
+    assertNull("No more tokens available", stream.next(tok));
+  }
+  
+  public void testNotInitialized() throws Exception {
+    final NumericTokenStream stream=new NumericTokenStream(precisionStep);
+    
+    try {
+      stream.reset();
+      fail("reset() should not succeed.");
+    } catch (IllegalStateException e) {
+      // pass
+    }
+
+    stream.setUseNewAPI(true);
+    try {
+      stream.incrementToken();
+      fail("incrementToken() should not succeed.");
+    } catch (IllegalStateException e) {
+      // pass
+    }
+
+    stream.setUseNewAPI(false);
+    try {
+      stream.next(new Token());
+      fail("next() should not succeed.");
+    } catch (IllegalStateException e) {
+      // pass
+    }
+  }
+  
+}

Propchange: lucene/java/trunk/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message