lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r723031 - in /lucene/java/trunk/contrib: ./ queries/src/java/org/apache/lucene/search/trie/ queries/src/test/org/apache/lucene/search/trie/
Date Wed, 03 Dec 2008 19:38:32 GMT
Author: mikemccand
Date: Wed Dec  3 11:38:31 2008
New Revision: 723031

URL: http://svn.apache.org/viewvc?rev=723031&view=rev
Log:
LUCENE-1470: add TrieRangeQuery, a much more efficient implementation of RangeQuery at the expense of added space consumed in the index

Added:
    lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/trie/
    lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/trie/TrieRangeFilter.java   (with props)
    lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/trie/TrieRangeQuery.java   (with props)
    lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/trie/TrieUtils.java   (with props)
    lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/trie/package.html   (with props)
    lucene/java/trunk/contrib/queries/src/test/org/apache/lucene/search/trie/
    lucene/java/trunk/contrib/queries/src/test/org/apache/lucene/search/trie/TestTrieRangeQuery.java   (with props)
    lucene/java/trunk/contrib/queries/src/test/org/apache/lucene/search/trie/TestTrieUtils.java   (with props)
Modified:
    lucene/java/trunk/contrib/CHANGES.txt

Modified: lucene/java/trunk/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/CHANGES.txt?rev=723031&r1=723030&r2=723031&view=diff
==============================================================================
--- lucene/java/trunk/contrib/CHANGES.txt (original)
+++ lucene/java/trunk/contrib/CHANGES.txt Wed Dec  3 11:38:31 2008
@@ -17,7 +17,9 @@
 
 New features
 
- (None)
+ 1. LUCENE-1470: Added TrieRangeQuery, a much faster implementation of
+    RangeQuery at the expense of added space (additional indexed
+    tokens) consumed in the index.  (Uwe Schindler via Mike McCandless)
 
 Documentation
 

Added: lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/trie/TrieRangeFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/trie/TrieRangeFilter.java?rev=723031&view=auto
==============================================================================
--- lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/trie/TrieRangeFilter.java (added)
+++ lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/trie/TrieRangeFilter.java Wed Dec  3 11:38:31 2008
@@ -0,0 +1,290 @@
+package org.apache.lucene.search.trie;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Date;
+
+import org.apache.lucene.search.Filter;
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.TermDocs;
+import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.util.OpenBitSet;
+
+/**
+ * Implementation of a Lucene {@link Filter} that implements trie-based range filtering.
+ * This filter depends on a specific structure of terms in the index that can only be created
+ * by {@link TrieUtils} methods.
+ * For more information, how the algorithm works, see the package description {@link org.apache.lucene.search.trie}.
+ * @author Uwe Schindler (panFMP developer)
+ */
+public final class TrieRangeFilter extends Filter {
+
+	/**
+	 * Universal constructor (expert use only): Uses already trie-converted min/max values.
+	 * You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
+	 */
+	public TrieRangeFilter(final String field, final String min, final String max, final TrieUtils variant) {
+		if (min==null && max==null) throw new IllegalArgumentException("The min and max values cannot be both null.");
+		this.trieVariant=variant;
+		this.minUnconverted=min;
+		this.maxUnconverted=max;
+		this.min=(min==null) ? trieVariant.TRIE_CODED_NUMERIC_MIN : min;
+		this.max=(max==null) ? trieVariant.TRIE_CODED_NUMERIC_MAX : max;
+		this.field=field.intern();
+	}
+
+	/**
+	 * Universal constructor (expert use only): Uses already trie-converted min/max values.
+	 * You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
+	 * <p>This constructor uses the trie package returned by {@link TrieUtils#getDefaultTrieVariant()}.
+	 */
+	public TrieRangeFilter(final String field, final String min, final String max) {
+		this(field,min,max,TrieUtils.getDefaultTrieVariant());
+	}
+	
+	/**
+	 * Generates a trie query using the supplied field with range bounds in numeric form (double).
+	 * You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
+	 */
+	public TrieRangeFilter(final String field, final Double min, final Double max, final TrieUtils variant) {
+		this(
+			field,
+			(min==null) ? null : variant.doubleToTrieCoded(min.doubleValue()),
+			(max==null) ? null : variant.doubleToTrieCoded(max.doubleValue()),
+			variant
+		);
+		this.minUnconverted=min;
+		this.maxUnconverted=max;
+	}
+
+	/**
+	 * Generates a trie query using the supplied field with range bounds in numeric form (double).
+	 * You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
+	 * <p>This constructor uses the trie variant returned by {@link TrieUtils#getDefaultTrieVariant()}.
+	 */
+	public TrieRangeFilter(final String field, final Double min, final Double max) {
+		this(field,min,max,TrieUtils.getDefaultTrieVariant());
+	}
+
+	/**
+	 * Generates a trie query using the supplied field with range bounds in date/time form.
+	 * You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
+	 */
+	public TrieRangeFilter(final String field, final Date min, final Date max, final TrieUtils variant) {
+		this(
+			field,
+			(min==null) ? null : variant.dateToTrieCoded(min),
+			(max==null) ? null : variant.dateToTrieCoded(max),
+			variant
+		);
+		this.minUnconverted=min;
+		this.maxUnconverted=max;
+	}
+
+	/**
+	 * Generates a trie query using the supplied field with range bounds in date/time form.
+	 * You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
+	 * <p>This constructor uses the trie variant returned by {@link TrieUtils#getDefaultTrieVariant()}.
+	 */
+	public TrieRangeFilter(final String field, final Date min, final Date max) {
+		this(field,min,max,TrieUtils.getDefaultTrieVariant());
+	}
+
+	/**
+	 * Generates a trie query using the supplied field with range bounds in integer form (long).
+	 * You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
+	 */
+	public TrieRangeFilter(final String field, final Long min, final Long max, final TrieUtils variant) {
+		this(
+			field,
+			(min==null) ? null : variant.longToTrieCoded(min.longValue()),
+			(max==null) ? null : variant.longToTrieCoded(max.longValue()),
+			variant
+		);
+		this.minUnconverted=min;
+		this.maxUnconverted=max;
+	}
+
+	/**
+	 * Generates a trie query using the supplied field with range bounds in integer form (long).
+	 * You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
+	 * <p>This constructor uses the trie variant returned by {@link TrieUtils#getDefaultTrieVariant()}.
+	 */
+	public TrieRangeFilter(final String field, final Long min, final Long max) {
+		this(field,min,max,TrieUtils.getDefaultTrieVariant());
+	}
+
+	//@Override
+	public String toString() {
+		return toString(null);
+	}
+
+	public String toString(final String field) {
+		final StringBuffer sb=new StringBuffer();
+		if (!this.field.equals(field)) sb.append(this.field).append(':');
+		return sb.append('[').append(minUnconverted).append(" TO ").append(maxUnconverted).append(']').toString();
+	}
+
+	//@Override
+	public final boolean equals(final Object o) {
+		if (o instanceof TrieRangeFilter) {
+			TrieRangeFilter q=(TrieRangeFilter)o;
+			// trieVariants are singleton per type, so no equals needed
+			return (field==q.field && min.equals(q.min) && max.equals(q.max) && trieVariant==q.trieVariant);
+		} else return false;
+	}
+
+	//@Override
+	public final int hashCode() {
+		// the hash code uses from the variant only the number of bits, as this is unique for the variant
+		return field.hashCode()+(min.hashCode()^0x14fa55fb)+(max.hashCode()^0x733fa5fe)+(trieVariant.TRIE_BITS^0x64365465);
+	}
+	
+	/** prints the String in hexadecimal \\u notation (for debugging of <code>setBits()</code>) */
+	private String stringToHexDigits(final String s) {
+		StringBuffer sb=new StringBuffer(s.length()*3);
+		for (int i=0,c=s.length(); i<c; i++) {
+			char ch=s.charAt(i);
+			sb.append("\\u").append(Integer.toHexString((int)ch));
+		}
+		return sb.toString();
+	}
+
+	/** Marks documents in a specific range. Code borrowed from original RangeFilter and simplified (and returns number of terms) */
+	private int setBits(final IndexReader reader, final TermDocs termDocs, final OpenBitSet bits, String lowerTerm, String upperTerm) throws IOException {
+		//System.out.println(stringToHexDigits(lowerTerm)+" TO "+stringToHexDigits(upperTerm));
+		int count=0,len=lowerTerm.length();
+		final String field;
+		if (len<trieVariant.TRIE_CODED_LENGTH) {
+			// lower precision value is in helper field
+			field=(this.field + trieVariant.LOWER_PRECISION_FIELD_NAME_SUFFIX).intern();
+			// add padding before lower precision values to group them
+			lowerTerm=new StringBuffer(len+1).append((char)(trieVariant.TRIE_CODED_PADDING_START+len)).append(lowerTerm).toString();
+			upperTerm=new StringBuffer(len+1).append((char)(trieVariant.TRIE_CODED_PADDING_START+len)).append(upperTerm).toString();
+			// length is longer by 1 char because of padding
+			len++;
+		} else {
+			// full precision value is in original field
+			field=this.field;
+		}
+		final TermEnum enumerator = reader.terms(new Term(field, lowerTerm));
+		try {
+			do {
+				final Term term = enumerator.term();
+				if (term!=null && term.field()==field) {
+					// break out when upperTerm reached or length of term is different
+					final String t=term.text();
+					if (len!=t.length() || t.compareTo(upperTerm)>0) break;
+					// we have a good term, find the docs
+					count++;
+					termDocs.seek(enumerator);
+					while (termDocs.next()) bits.set(termDocs.doc());
+				} else break;
+			} while (enumerator.next());
+		} finally {
+			enumerator.close();
+		}
+		return count;
+	}
+
+	/** Splits range recursively (and returns number of terms) */
+	private int splitRange(
+		final IndexReader reader, final TermDocs termDocs, final OpenBitSet bits,
+		final String min, final boolean lowerBoundOpen, final String max, final boolean upperBoundOpen
+	) throws IOException {
+		int count=0;
+		final int length=min.length();
+		final String minShort=lowerBoundOpen ? min.substring(0,length-1) : trieVariant.incrementTrieCoded(min.substring(0,length-1));
+		final String maxShort=upperBoundOpen ? max.substring(0,length-1) : trieVariant.decrementTrieCoded(max.substring(0,length-1));
+
+		if (length==1 || minShort.compareTo(maxShort)>=0) {
+			// we are in the lowest precision or the current precision is not existent
+			count+=setBits(reader, termDocs, bits, min, max);
+		} else {
+			// Avoid too much seeking: first go deeper into lower precision
+			// (in IndexReader's TermEnum these terms are earlier).
+			// Do this only, if the current length is not trieVariant.TRIE_CODED_LENGTH (not full precision),
+			// because terms from the highest prec come before all lower prec terms
+			// (because the field name is ordered before the suffixed one).
+			if (length!=trieVariant.TRIE_CODED_LENGTH) count+=splitRange(
+				reader,termDocs,bits,
+				minShort,lowerBoundOpen,
+				maxShort,upperBoundOpen
+			);
+			// Avoid too much seeking: set bits for lower part of current (higher) precision.
+			// These terms come later in IndexReader's TermEnum.
+			if (!lowerBoundOpen) {
+				count+=setBits(reader, termDocs, bits, min, trieVariant.decrementTrieCoded(minShort+trieVariant.TRIE_CODED_SYMBOL_MIN));
+			}
+			// Avoid too much seeking: set bits for upper part of current precision.
+			// These terms come later in IndexReader's TermEnum.
+			if (!upperBoundOpen) {
+				count+=setBits(reader, termDocs, bits, trieVariant.incrementTrieCoded(maxShort+trieVariant.TRIE_CODED_SYMBOL_MAX), max);
+			}
+			// If the first step (see above) was not done (because length==trieVariant.TRIE_CODED_LENGTH) we do it now.
+			if (length==trieVariant.TRIE_CODED_LENGTH) count+=splitRange(
+				reader,termDocs,bits,
+				minShort,lowerBoundOpen,
+				maxShort,upperBoundOpen
+			);
+		}
+		return count;
+	}
+
+	/**
+	 * Returns a DocIdSet that provides the documents which should be permitted or prohibited in search results.
+	 */
+	//@Override
+	public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
+		final OpenBitSet bits = new OpenBitSet(reader.maxDoc());
+		final TermDocs termDocs=reader.termDocs();
+		try {
+			final int count=splitRange(
+				reader,termDocs,bits,
+				min,trieVariant.TRIE_CODED_NUMERIC_MIN.equals(min),
+				max,trieVariant.TRIE_CODED_NUMERIC_MAX.equals(max)
+			);
+			lastNumberOfTerms=new Integer(count);
+			//System.out.println("Found "+count+" distinct terms in filtered range for field '"+field+"'.");
+		} finally {
+			termDocs.close();
+		}
+		return bits;
+	}
+	
+	/**
+	 * EXPERT: Return the number of terms visited during the last execution of {@link #getDocIdSet}.
+	 * This may be used for performance comparisons of different trie variants and their effectiveness.
+	 * This method is not thread safe, be sure to only call it when no query is running!
+	 * @throws IllegalStateException if {@link #getDocIdSet} was not yet executed.
+	 */
+	//@Override
+	public int getLastNumberOfTerms() {
+		if (lastNumberOfTerms==null) throw new IllegalStateException();
+		return lastNumberOfTerms.intValue();
+	}
+
+	// members
+	private final String field,min,max;
+	private final TrieUtils trieVariant;
+	private Object minUnconverted,maxUnconverted;
+	private Integer lastNumberOfTerms=null;
+}

Propchange: lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/trie/TrieRangeFilter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/trie/TrieRangeQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/trie/TrieRangeQuery.java?rev=723031&view=auto
==============================================================================
--- lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/trie/TrieRangeQuery.java (added)
+++ lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/trie/TrieRangeQuery.java Wed Dec  3 11:38:31 2008
@@ -0,0 +1,145 @@
+package org.apache.lucene.search.trie;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Date;
+import java.io.IOException;
+
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.util.ToStringUtils;
+import org.apache.lucene.index.IndexReader;
+
+/**
+ * Implementation of a Lucene {@link Query} that implements a trie-based range query.
+ * This query depends on a specific structure of terms in the index that can only be created
+ * by {@link TrieUtils} methods.
+ * <p>This class wraps a {@link TrieRangeFilter} using a {@link ConstantScoreQuery}.
+ * @see TrieRangeFilter
+ * @author Uwe Schindler (panFMP developer)
+ */
+public final class TrieRangeQuery extends Query {
+
+	/**
+	 * Universal constructor (expert use only): Uses already trie-converted min/max values.
+	 * You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
+	 * <p>This constructor uses the trie variant returned by {@link TrieUtils#getDefaultTrieVariant()}.
+	 */
+	public TrieRangeQuery(final String field, final String min, final String max) {
+		filter=new TrieRangeFilter(field,min,max);
+	}
+
+	/**
+	 * Universal constructor (expert use only): Uses already trie-converted min/max values.
+	 * You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
+	 */
+	public TrieRangeQuery(final String field, final String min, final String max, final TrieUtils variant) {
+		filter=new TrieRangeFilter(field,min,max,variant);
+	}
+
+	/**
+	 * Generates a trie query using the supplied field with range bounds in numeric form (double).
+	 * You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
+	 * <p>This constructor uses the trie variant returned by {@link TrieUtils#getDefaultTrieVariant()}.
+	 */
+	public TrieRangeQuery(final String field, final Double min, final Double max) {
+		filter=new TrieRangeFilter(field,min,max);
+	}
+
+	/**
+	 * Generates a trie query using the supplied field with range bounds in numeric form (double).
+	 * You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
+	 */
+	public TrieRangeQuery(final String field, final Double min, final Double max, final TrieUtils variant) {
+		filter=new TrieRangeFilter(field,min,max,variant);
+	}
+
+	/**
+	 * Generates a trie query using the supplied field with range bounds in date/time form.
+	 * You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
+	 * <p>This constructor uses the trie variant returned by {@link TrieUtils#getDefaultTrieVariant()}.
+	 */
+	public TrieRangeQuery(final String field, final Date min, final Date max) {
+		filter=new TrieRangeFilter(field,min,max);
+	}
+
+	/**
+	 * Generates a trie query using the supplied field with range bounds in date/time form.
+	 * You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
+	 */
+	public TrieRangeQuery(final String field, final Date min, final Date max, final TrieUtils variant) {
+		filter=new TrieRangeFilter(field,min,max,variant);
+	}
+
+	/**
+	 * Generates a trie query using the supplied field with range bounds in integer form (long).
+	 * You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
+	 * <p>This constructor uses the trie variant returned by {@link TrieUtils#getDefaultTrieVariant()}.
+	 */
+	public TrieRangeQuery(final String field, final Long min, final Long max) {
+		filter=new TrieRangeFilter(field,min,max);
+	}
+
+	/**
+	 * Generates a trie query using the supplied field with range bounds in integer form (long).
+	 * You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
+	 */
+	public TrieRangeQuery(final String field, final Long min, final Long max, final TrieUtils variant) {
+		filter=new TrieRangeFilter(field,min,max,variant);
+	}
+
+	//@Override
+	public String toString(final String field) {
+		return filter.toString(field)+ToStringUtils.boost(getBoost());
+	}
+
+	//@Override
+	public final boolean equals(final Object o) {
+		if (o instanceof TrieRangeQuery) {
+			TrieRangeQuery q=(TrieRangeQuery)o;
+			return (filter.equals(q.filter) && getBoost()==q.getBoost());
+		} else return false;
+	}
+
+	//@Override
+	public final int hashCode() {
+		return filter.hashCode()^0x1756fa55+Float.floatToIntBits(getBoost());
+	}
+
+	/**
+	 * Rewrites the query to native Lucene {@link Query}'s. This implementation uses a {@link ConstantScoreQuery} with
+	 * a {@link TrieRangeFilter} as implementation of the trie algorithm.
+	 */
+	//@Override
+	public Query rewrite(final IndexReader reader) throws IOException {
+		final ConstantScoreQuery q = new ConstantScoreQuery(filter);
+		q.setBoost(getBoost());
+		return q.rewrite(reader);
+	}
+	
+	/**
+	 * Returns the underlying filter.
+	 */
+	public TrieRangeFilter getFilter() {
+		return filter;
+	}
+
+	// members
+	private final TrieRangeFilter filter;
+
+}

Propchange: lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/trie/TrieRangeQuery.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/trie/TrieUtils.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/trie/TrieUtils.java?rev=723031&view=auto
==============================================================================
--- lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/trie/TrieUtils.java (added)
+++ lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/trie/TrieUtils.java Wed Dec  3 11:38:31 2008
@@ -0,0 +1,358 @@
+package org.apache.lucene.search.trie;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Date;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+
+/**
+ *This is a helper class to construct the trie-based index entries for numerical values.
+ * <p>For more information, how the algorithm works, see the package description {@link org.apache.lucene.search.trie}. The format of how the
+ * numerical values are stored in index is documented here:
+ * <p>All numerical values are first converted to special <code>unsigned long</code>s by applying some bit-wise transformations. This means:<ul>
+ * <li>{@link Date}s are casted to unix timestamps (milliseconds since 1970-01-01, this is how Java represents date/time
+ * internally): {@link Date#getTime()}. The resulting <code>signed long</code> is transformed to the unsigned form like so:</li>
+ * <li><code>signed long</code>s are shifted, so that {@link Long#MIN_VALUE} is mapped to <code>0x0000000000000000</code>,
+ * {@link Long#MAX_VALUE} is mapped to <code>0xffffffffffffffff</code>.</li>
+ * <li><code>double</code>s are converted by getting their IEEE 754 floating-point "double format" bit layout and then some bits
+ * are swapped, to be able to compare the result as <code>unsigned long</code>s.</li>
+ * </ul>
+ * <p>For each variant (you can choose between {@link #VARIANT_8BIT}, {@link #VARIANT_4BIT}, and {@link #VARIANT_2BIT}),
+ * the bitmap of this <code>unsigned long</code> is divided into parts of a number of bits (starting with the most-significant bits)
+ * and each part converted to characters between {@link #TRIE_CODED_SYMBOL_MIN} and {@link #TRIE_CODED_SYMBOL_MAX}.
+ * The resulting {@link String} is comparable like the corresponding <code>unsigned long</code>.
+ * <p>To store the different precisions of the long values (from one character [only the most significant one] to the full encoded length),
+ * each lower precision is prefixed by the length ({@link #TRIE_CODED_PADDING_START}<code>+precision == 0x20+precision</code>),
+ * in an extra "helper" field with a suffixed field name (i.e. fieldname "numeric" => lower precision's name "numeric#trie").
+ * The full long is not prefixed at all and indexed and stored according to the given flags in the original field name.
+ * By this it is possible to get the correct enumeration of terms in correct precision
+ * of the term list by just jumping to the correct fieldname and/or prefix. The full precision value may also be
+ * stored in the document. Having the full precision value as term in a separate field with the original name,
+ * sorting of query results agains such fields is possible using the original field name.
+ * @author Uwe Schindler (panFMP developer)
+ */
+public final class TrieUtils {
+
+	/** Instance of TrieUtils using a trie factor of 8 bit. */
+	public static final TrieUtils VARIANT_8BIT=new TrieUtils(8);
+
+	/** Instance of TrieUtils using a trie factor of 4 bit. */
+	public static final TrieUtils VARIANT_4BIT=new TrieUtils(4);
+
+	/** Instance of TrieUtils using a trie factor of 2 bit. */
+	public static final TrieUtils VARIANT_2BIT=new TrieUtils(2);
+
+	/** Marker (PADDING)  before lower-precision trie entries to signal the precision value. See class description! */
+	public static final char TRIE_CODED_PADDING_START=(char)0x20;
+	
+	/** The "helper" field containing the lower precision terms is the original fieldname with this appended. */
+	public static final String LOWER_PRECISION_FIELD_NAME_SUFFIX="#trie";
+
+	/** Character used as lower end */
+	public static final char TRIE_CODED_SYMBOL_MIN=(char)0x100;
+
+	private static TrieUtils defaultTrieVariant=TrieUtils.VARIANT_8BIT;
+
+	/**
+	 * Sets the default variant used for generating trie values and ranges.
+	 * It is used by the constructors of {@link TrieRangeQuery} and {@link TrieRangeFilter} without <code>TrieUtils</code> parameter
+	 * and can be used to get a default value through your whole application.
+	 */
+	public synchronized static final void setDefaultTrieVariant(final TrieUtils variant) {
+		defaultTrieVariant=variant;
+	}
+
+	/**
+	 * Gets the default variant used for generating trie values and ranges.
+	 * It is used by the constructors of {@link TrieRangeQuery} and {@link TrieRangeFilter} without <code>TrieUtils</code> parameter
+	 * and can be used to get a default value through your whole application.
+	 * <p>The default, if not set by {@link #setDefaultTrieVariant}, is {@link #VARIANT_8BIT}.
+	 */
+	public synchronized static final TrieUtils getDefaultTrieVariant() {
+		return defaultTrieVariant;
+	}
+	
+	/**
+	 * Detects and returns the variant of a trie encoded string using the length.
+	 * @throws NumberFormatException if the length is not 8, 16, or 32 chars.
+	 */
+	public static final TrieUtils autoDetectVariant(final String s) {
+		final int l=s.length();
+		if (l==VARIANT_8BIT.TRIE_CODED_LENGTH) {
+			return VARIANT_8BIT;
+		} else if (l==VARIANT_4BIT.TRIE_CODED_LENGTH) {
+			return VARIANT_4BIT;
+		} else if (l==VARIANT_2BIT.TRIE_CODED_LENGTH) {
+			return VARIANT_2BIT;
+		} else {
+			throw new NumberFormatException("Invalid trie encoded numerical value representation (incompatible length).");
+		}
+	}
+
+	/**
+	 * Converts a encoded <code>String</code> value back to a <code>long</code>,
+	 * auto detecting the trie encoding variant using the String length.
+	 */
+	public static final long trieCodedToLongAuto(final String s) {
+		return autoDetectVariant(s).trieCodedToLong(s);
+	}
+
+	/**
+	 * Converts a encoded <code>String</code> value back to a <code>double</code>,
+	 * auto detecting the trie encoding variant using the String length.
+	 */
+	public static final double trieCodedToDoubleAuto(final String s) {
+		return autoDetectVariant(s).trieCodedToDouble(s);
+	}
+
+	/**
+	 * Converts a encoded <code>String</code> value back to a <code>Date</code>,
+	 * auto detecting the trie encoding variant using the String length.
+	 */
+	public static final Date trieCodedToDateAuto(final String s) {
+		return autoDetectVariant(s).trieCodedToDate(s);
+	}
+
+	// TrieUtils instance's part
+	
+	private TrieUtils(int bits) {
+		assert 64%bits == 0;
+		
+		// helper variable for conversion
+		mask = (1L << bits) - 1L;		
+
+		// init global "constants"
+		TRIE_BITS=bits;
+		TRIE_CODED_LENGTH=64/TRIE_BITS;
+		TRIE_CODED_SYMBOL_MAX=(char)(TRIE_CODED_SYMBOL_MIN+mask);
+		TRIE_CODED_NUMERIC_MIN=longToTrieCoded(Long.MIN_VALUE);
+		TRIE_CODED_NUMERIC_MAX=longToTrieCoded(Long.MAX_VALUE);
+	}
+
+	// internal conversion to/from strings
+
+	private final String internalLongToTrieCoded(long l) {
+		final char[] buf=new char[TRIE_CODED_LENGTH];
+		for (int i=TRIE_CODED_LENGTH-1; i>=0; i--) {
+			buf[i] = (char)( TRIE_CODED_SYMBOL_MIN + (l & mask) );
+			l = l >>> TRIE_BITS;
+		}
+		return new String(buf);
+	}
+
+	private final long internalTrieCodedToLong(final String s) {
+		if (s==null) throw new NullPointerException("Trie encoded string may not be NULL");
+		final int len=s.length();
+		if (len!=TRIE_CODED_LENGTH) throw new NumberFormatException(
+			"Invalid trie encoded numerical value representation (incompatible length, must be "+TRIE_CODED_LENGTH+")"
+		);
+		long l=0L;
+		for (int i=0; i<len; i++) {
+			char ch=s.charAt(i);
+			if (ch>=TRIE_CODED_SYMBOL_MIN && ch<=TRIE_CODED_SYMBOL_MAX) {
+				l = (l << TRIE_BITS) | (long)(ch-TRIE_CODED_SYMBOL_MIN);
+			} else {
+				throw new NumberFormatException(
+					"Invalid trie encoded numerical value representation (char "+
+					Integer.toHexString((int)ch)+" at position "+i+" is invalid)"
+				);
+			}
+		}
+		return l;
+	}
+
+	// Long's
+
+	/** Converts a <code>long</code> value encoded to a <code>String</code>. */
+	public String longToTrieCoded(final long l) {
+		return internalLongToTrieCoded(l ^ 0x8000000000000000L);
+	}
+
+	/** Converts a encoded <code>String</code> value back to a <code>long</code>. */
+	public long trieCodedToLong(final String s) {
+		return internalTrieCodedToLong(s) ^ 0x8000000000000000L;
+	}
+
+	// Double's
+
+	/** Converts a <code>double</code> value encoded to a <code>String</code>. */
+	public String doubleToTrieCoded(final double d) {
+		long l=Double.doubleToLongBits(d);
+		if ((l & 0x8000000000000000L) == 0L) {
+			// >0
+			l |= 0x8000000000000000L;
+		} else {
+			// <0
+			l = ~l;
+		}
+		return internalLongToTrieCoded(l);
+	}
+
+	/** Converts a encoded <code>String</code> value back to a <code>double</code>. */
+	public double trieCodedToDouble(final String s) {
+		long l=internalTrieCodedToLong(s);
+		if ((l & 0x8000000000000000L) != 0L) {
+			// >0
+			l &= 0x7fffffffffffffffL;
+		} else {
+			// <0
+			l = ~l;
+		}
+		return Double.longBitsToDouble(l);
+	}
+
+	// Date's
+
+	/** Converts a <code>Date</code> value encoded to a <code>String</code>. */
+	public String dateToTrieCoded(final Date d) {
+		return longToTrieCoded(d.getTime());
+	}
+
+	/** Converts a encoded <code>String</code> value back to a <code>Date</code>. */
+	public Date trieCodedToDate(final String s) {
+		return new Date(trieCodedToLong(s));
+	}
+
+	// increment / decrement
+
+	/** Increments an encoded String value by 1. Needed by {@link TrieRangeFilter}. */
+	public String incrementTrieCoded(final String v) {
+		final int l=v.length();
+		final char[] buf=new char[l];
+		boolean inc=true;
+		for (int i=l-1; i>=0; i--) {
+			int b=v.charAt(i)-TRIE_CODED_SYMBOL_MIN;
+			if (inc) b++;
+			if (inc=(b>(int)mask)) b=0;
+			buf[i]=(char)(TRIE_CODED_SYMBOL_MIN+b);
+		}
+		return new String(buf);
+	}
+
+	/** Decrements an encoded String value by 1. Needed by {@link TrieRangeFilter}. */
+	public String decrementTrieCoded(final String v) {
+		final int l=v.length();
+		final char[] buf=new char[l];
+		boolean dec=true;
+		for (int i=l-1; i>=0; i--) {
+			int b=v.charAt(i)-TRIE_CODED_SYMBOL_MIN;
+			if (dec) b--;
+			if (dec=(b<0)) b=(int)mask;
+			buf[i]=(char)(TRIE_CODED_SYMBOL_MIN+b);
+		}
+		return new String(buf);
+	}
+
+	private void addConvertedTrieCodedDocumentField(
+		final Document ldoc, final String fieldname, final String val,
+		final boolean index, final Field.Store store
+	) {
+		Field f=new Field(fieldname, val, store, index?Field.Index.NOT_ANALYZED:Field.Index.NO);
+		if (index) {
+			f.setOmitTf(true);
+			ldoc.add(f);
+			// add the lower precision values in the helper field with prefix
+			final StringBuffer sb=new StringBuffer(TRIE_CODED_LENGTH);
+			synchronized(sb) {
+				for (int i=TRIE_CODED_LENGTH-1; i>0; i--) {
+					sb.setLength(0);
+					f=new Field(
+						fieldname + LOWER_PRECISION_FIELD_NAME_SUFFIX,
+						sb.append( (char)(TRIE_CODED_PADDING_START+i) ).append( val.substring(0,i) ).toString(),
+						Field.Store.NO, Field.Index.NOT_ANALYZED
+					);
+					f.setOmitTf(true);
+					ldoc.add(f);
+				}
+			}
+		} else {
+			ldoc.add(f);
+		}
+	}
+
+	/**
+	 * Stores a double value in trie-form in document for indexing.
+	 * <p>To store the different precisions of the long values (from one byte [only the most significant one] to the full eight bytes),
+	 * each lower precision is prefixed by the length ({@link #TRIE_CODED_PADDING_START}<code>+precision</code>),
+	 * in an extra "helper" field with a name of <code>fieldname+{@link #LOWER_PRECISION_FIELD_NAME_SUFFIX}</code>
+	 * (i.e. fieldname "numeric" => lower precision's name "numeric#trie").
+	 * The full long is not prefixed at all and indexed and stored according to the given flags in the original field name.
+	 * If the field should not be searchable, set <code>index</code> to <code>false</code>. It is then only stored (for convenience).
+	 * Fields added to a document using this method can be queried by {@link TrieRangeQuery}. 
+	 */
+	public void addDoubleTrieCodedDocumentField(
+		final Document ldoc, final String fieldname, final double val,
+		final boolean index, final Field.Store store
+	) {
+		addConvertedTrieCodedDocumentField(ldoc, fieldname, doubleToTrieCoded(val), index, store);
+	}
+
+	/**
+	 * Stores a Date value in trie-form in document for indexing.
+	 * <p>To store the different precisions of the long values (from one byte [only the most significant one] to the full eight bytes),
+	 * each lower precision is prefixed by the length ({@link #TRIE_CODED_PADDING_START}<code>+precision</code>),
+	 * in an extra "helper" field with a name of <code>fieldname+{@link #LOWER_PRECISION_FIELD_NAME_SUFFIX}</code>
+	 * (i.e. fieldname "numeric" => lower precision's name "numeric#trie").
+	 * The full long is not prefixed at all and indexed and stored according to the given flags in the original field name.
+	 * If the field should not be searchable, set <code>index</code> to <code>false</code>. It is then only stored (for convenience).
+	 * Fields added to a document using this method can be queried by {@link TrieRangeQuery}. 
+	 */
+	public void addDateTrieCodedDocumentField(
+		final Document ldoc, final String fieldname,
+		final Date val, final boolean index, final Field.Store store
+	) {
+		addConvertedTrieCodedDocumentField(ldoc, fieldname, dateToTrieCoded(val), index, store);
+	}
+
+	/**
+	 * Stores a long value in trie-form in document for indexing.
+	 * <p>To store the different precisions of the long values (from one byte [only the most significant one] to the full eight bytes),
+	 * each lower precision is prefixed by the length ({@link #TRIE_CODED_PADDING_START}<code>+precision</code>),
+	 * in an extra "helper" field with a name of <code>fieldname+{@link #LOWER_PRECISION_FIELD_NAME_SUFFIX}</code>
+	 * (i.e. fieldname "numeric" => lower precision's name "numeric#trie").
+	 * The full long is not prefixed at all and indexed and stored according to the given flags in the original field name.
+	 * If the field should not be searchable, set <code>index</code> to <code>false</code>. It is then only stored (for convenience).
+	 * Fields added to a document using this method can be queried by {@link TrieRangeQuery}. 
+	 */
+	public void addLongTrieCodedDocumentField(
+		final Document ldoc, final String fieldname,
+		final long val, final boolean index, final Field.Store store
+	) {
+		addConvertedTrieCodedDocumentField(ldoc, fieldname, longToTrieCoded(val), index, store);
+	}
+	
+	private final long mask;
+	
+	/** Number of bits used in this trie variant (2, 4, or 8) */
+	public final int TRIE_BITS;
+
+	/** Length (in chars) of an encoded value (8, 16, or 32 chars) */
+	public final int TRIE_CODED_LENGTH;
+
+	/** Character used as upper end (depends on trie bits, its <code>{@link #TRIE_CODED_SYMBOL_MIN}+2^{@link #TRIE_BITS}-1</code>) */
+	public final char TRIE_CODED_SYMBOL_MAX;
+
+	/** minimum encoded value of a numerical index entry: {@link Long#MIN_VALUE} */
+	public final String TRIE_CODED_NUMERIC_MIN;
+
+	/** maximum encoded value of a numerical index entry: {@link Long#MAX_VALUE} */
+	public final String TRIE_CODED_NUMERIC_MAX;
+
+}

Propchange: lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/trie/TrieUtils.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/trie/package.html
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/trie/package.html?rev=723031&view=auto
==============================================================================
--- lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/trie/package.html (added)
+++ lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/trie/package.html Wed Dec  3 11:38:31 2008
@@ -0,0 +1,96 @@
+<html>
+<body>
+<p>This package provides fast numeric range queries/filters on <code>long</code>, <code>double</code> or <code>Date</code>
+fields based on trie structures.</p>
+
+<h3>How it works</h3>
+<p>See the publication about <a target="_blank" href="http://www.panfmp.org">panFMP</a>, where this algorithm was described:
+
+<blockquote><strong>Schindler, U, Diepenbroek, M</strong>, 2008. <em>Generic XML-based Framework for Metadata Portals.</em>
+Computers &amp; Geosciences 34 (12), 1947-1955.
+<a href="http://dx.doi.org/10.1016/j.cageo.2008.02.023" target="_blank">doi:10.1016/j.cageo.2008.02.023</a></blockquote>
+
+<p><em>A quote from this paper:</em> Because Apache Lucene is a full-text search engine and not a conventional database,
+it cannot handle numerical ranges (e.g., field value is inside user defined bounds, even dates are numerical values).
+We have developed an extension to Apache Lucene that stores
+the numerical values in a special string-encoded format with variable precision
+(all numerical values like doubles, longs, and timestamps are converted to lexicographic sortable string representations
+and stored with different precisions from one byte to the full 8 bytes - depending on the variant used).
+For a more detailed description, how the values are stored, see {@link org.apache.lucene.search.trie.TrieUtils}.
+A range is then divided recursively into multiple intervals for searching:
+The center of the range is searched only with the lowest possible precision in the trie, the boundaries are matched
+more exactly. This reduces the number of terms dramatically.</p>
+
+<p>For the variant, that uses a lowest precision of 1-byte the index only
+contains a maximum of 256 distinct values in the lowest precision.
+Overall, a range could consist of a theoretical maximum of
+<code>7*255*2 + 255 = 3825</code> distinct terms (when there is a term for every distinct value of an
+8-byte-number in the index and the range covers all of them; a maximum of 255 distinct values is used
+because it would always be possible to reduce the full 256 values to one term with degraded precision).
+In practise, we have seen up to 300 terms in most cases (index with 500,000 metadata records
+and a homogeneous dispersion of values).</p>
+
+<p>There are two other variants of encoding: 4bit and 2bit. Each variant stores more different precisions
+of the longs and so needs more storage space (because it generates more and longer terms -
+4bit: two times the length and number of terms; 2bit: four times the length and number of terms).
+But on the other hand, the maximum number of distinct terms used for range queries is
+<code>15*15*2 + 15 = 465</code> for the 4bit variant, and
+<code>31*3*2 + 3 = 189</code> for the 2bit variant.</p>
+
+<p>This dramatically improves the performance of Apache Lucene with range queries, which
+is no longer dependent on the index size and number of distinct values because there is
+an upper limit not related to any of these properties.</p>
+
+<h3>Usage</h3>
+<p>To use the new query types the numerical values, which may be <code>long</code>, <code>double</code> or <code>Date</code>,
+during indexing the values must be stored in a special format in index (using {@link org.apache.lucene.search.trie.TrieUtils}).
+This can be done like this:</p>
+
+<pre>
+	Document doc=new Document();
+	// add some standard fields:
+	String svalue="anything to index";
+	doc.add(new Field("exampleString",
+		svalue, Field.Store.YES, Field.Index.ANALYZED) ;
+	// add some numerical fields:
+	double fvalue=1.057E17;
+	TrieUtils.VARIANT_8BIT.addDoubleTrieCodedDocumentField(doc, "exampleDouble", 
+		fvalue, true /* index the field */, Field.Store.YES);
+	long lvalue=121345L;
+	TrieUtils.VARIANT_8BIT.addLongTrieCodedDocumentField(doc, "exampleLong",
+		lvalue, true /* index the field */, Field.Store.YES);
+	Date dvalue=new Date(); // actual time
+	TrieUtils.VARIANT_8BIT.addDateTrieCodedDocumentField(doc, "exampleDate", 
+		dvalue, true /* index the field */, Field.Store.YES);
+	// add document to IndexWriter
+</pre>
+
+<p>The numeric index fields you prepared in this way can be searched by {@link org.apache.lucene.search.trie.TrieRangeQuery}:</p>
+
+<pre>
+	// Java 1.4, because Double.valueOf(double) is not available:
+	Query q=new TrieRangeQuery("exampleDouble", new Double(1.0E17), new Double(2.0E17), TrieUtils.VARIANT_8BIT);
+	// OR, Java 1.5, using autoboxing:
+	Query q=new TrieRangeQuery("exampleDouble", 1.0E17, 2.0E17, TrieUtils.VARIANT_8BIT);
+	TopDocs docs=searcher.search(q, 10);
+	for (int i=0; i&lt;docs.scoreDocs.length; i++) {
+		Document doc=searcher.doc(docs.scoreDocs[i].doc);
+		System.out.println(doc.get("exampleString"));
+		// decode the stored numerical value (important!!!):
+		System.out.println( TrieUtils.VARIANT_8BIT.trieCodedToDouble(doc.get("exampleDouble")) );
+	}
+</pre>
+
+<h3>Performance</h3>
+
+<p>Comparisions of the different types of RangeQueries on an index with about 500,000 docs showed,
+that the old {@link org.apache.lucene.search.RangeQuery} (with raised 
+{@link org.apache.lucene.search.BooleanQuery} clause count) took about 30-40 secs to complete,
+{@link org.apache.lucene.search.ConstantScoreRangeQuery} took 5 secs and
+{@link org.apache.lucene.search.trie.TrieRangeQuery} took &lt;100ms to
+complete (on an Opteron64 machine, Java 1.5, {@link org.apache.lucene.search.trie.TrieUtils#VARIANT_8BIT}).
+This query type was developed for a geographic portal, where the performance for
+e.g. bounding boxes or exact date/time stamps is important.</p>
+
+</body>
+</html>

Propchange: lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/trie/package.html
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/queries/src/test/org/apache/lucene/search/trie/TestTrieRangeQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/queries/src/test/org/apache/lucene/search/trie/TestTrieRangeQuery.java?rev=723031&view=auto
==============================================================================
--- lucene/java/trunk/contrib/queries/src/test/org/apache/lucene/search/trie/TestTrieRangeQuery.java (added)
+++ lucene/java/trunk/contrib/queries/src/test/org/apache/lucene/search/trie/TestTrieRangeQuery.java Wed Dec  3 11:38:31 2008
@@ -0,0 +1,192 @@
+package org.apache.lucene.search.trie;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Random;
+
+import junit.framework.TestCase;
+
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriter.MaxFieldLength;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.RangeQuery;
+
+public class TestTrieRangeQuery extends TestCase
+{
+	private static final long distance=66666;
+	
+	private static Random rnd=new Random();
+	private static RAMDirectory directory;
+	private static IndexSearcher searcher;
+	static {
+		try {
+			directory = new RAMDirectory();
+			IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(),
+			true, MaxFieldLength.UNLIMITED);
+			
+			// Add a series of 10000 docs with increasing long values
+			for (long l=0L; l<10000L; l++) {
+				Document doc=new Document();
+				// add fields, that have a distance to test general functionality
+				TrieUtils.VARIANT_8BIT.addLongTrieCodedDocumentField(
+					doc, "field8", distance*l, true /*index it*/, Field.Store.YES
+				);
+				TrieUtils.VARIANT_4BIT.addLongTrieCodedDocumentField(
+					doc, "field4", distance*l, true /*index it*/, Field.Store.YES
+				);
+				TrieUtils.VARIANT_2BIT.addLongTrieCodedDocumentField(
+					doc, "field2", distance*l, true /*index it*/, Field.Store.YES
+				);
+				// add ascending fields with a distance of 1 to test the correct splitting of range
+				TrieUtils.VARIANT_8BIT.addLongTrieCodedDocumentField(
+					doc, "ascfield8", l, true /*index it*/, Field.Store.NO
+				);
+				TrieUtils.VARIANT_4BIT.addLongTrieCodedDocumentField(
+					doc, "ascfield4", l, true /*index it*/, Field.Store.NO
+				);
+				TrieUtils.VARIANT_2BIT.addLongTrieCodedDocumentField(
+					doc, "ascfield2", l, true /*index it*/, Field.Store.NO
+				);
+				writer.addDocument(doc);
+			}
+		
+			writer.close();
+			searcher=new IndexSearcher(directory);			
+		} catch (Exception e) {
+			throw new Error(e);
+		}
+	}
+	
+	private void testRange(final TrieUtils variant) throws Exception {
+		String field="field"+variant.TRIE_BITS;
+		int count=3000;
+		long lower=96666L, upper=lower + count*distance + 1234L;
+		TrieRangeQuery q=new TrieRangeQuery(field, new Long(lower), new Long(upper), variant);
+		TopDocs topDocs = searcher.search(q, null, 10000, Sort.INDEXORDER);
+		System.out.println("Found "+q.getFilter().getLastNumberOfTerms()+" distinct terms in range for field '"+field+"'.");
+		ScoreDoc[] sd = topDocs.scoreDocs;
+		assertNotNull(sd);
+		assertEquals("Score docs must match "+count+" docs, found "+sd.length+" docs", sd.length, count );
+		Document doc=searcher.doc(sd[0].doc);
+		assertEquals("First doc should be "+(2*distance), variant.trieCodedToLong(doc.get(field)), 2*distance );
+		doc=searcher.doc(sd[sd.length-1].doc);
+		assertEquals("Last doc should be "+((1+count)*distance), variant.trieCodedToLong(doc.get(field)), (1+count)*distance );
+	}
+
+	public void testRange_8bit() throws Exception {
+		testRange(TrieUtils.VARIANT_8BIT);
+	}
+	
+	public void testRange_4bit() throws Exception {
+		testRange(TrieUtils.VARIANT_4BIT);
+	}
+	
+	public void testRange_2bit() throws Exception {
+		testRange(TrieUtils.VARIANT_2BIT);
+	}
+	
+	private void testLeftOpenRange(final TrieUtils variant) throws Exception {
+		String field="field"+variant.TRIE_BITS;
+		int count=3000;
+		long upper=(count-1)*distance + 1234L;
+		TrieRangeQuery q=new TrieRangeQuery(field, null, new Long(upper), variant);
+		TopDocs topDocs = searcher.search(q, null, 10000, Sort.INDEXORDER);
+		System.out.println("Found "+q.getFilter().getLastNumberOfTerms()+" distinct terms in left open range for field '"+field+"'.");
+		ScoreDoc[] sd = topDocs.scoreDocs;
+		assertNotNull(sd);
+		assertEquals("Score docs must match "+count+" docs, found "+sd.length+" docs", sd.length, count );
+		Document doc=searcher.doc(sd[0].doc);
+		assertEquals("First doc should be 0", variant.trieCodedToLong(doc.get(field)), 0L );
+		doc=searcher.doc(sd[sd.length-1].doc);
+		assertEquals("Last doc should be "+((count-1)*distance), variant.trieCodedToLong(doc.get(field)), (count-1)*distance );
+	}
+	
+	public void testLeftOpenRange_8bit() throws Exception {
+		testLeftOpenRange(TrieUtils.VARIANT_8BIT);
+	}
+	
+	public void testLeftOpenRange_4bit() throws Exception {
+		testLeftOpenRange(TrieUtils.VARIANT_4BIT);
+	}
+	
+	public void testLeftOpenRange_2bit() throws Exception {
+		testLeftOpenRange(TrieUtils.VARIANT_2BIT);
+	}
+	
+	private void testRandomTrieAndClassicRangeQuery(final TrieUtils variant) throws Exception {
+		String field="field"+variant.TRIE_BITS;
+		// 50 random tests, the tests may also return 0 results, if min>max, but this is ok
+		for (int i=0; i<50; i++) {
+			long lower=(long)(rnd.nextDouble()*10000L*distance);
+			long upper=(long)(rnd.nextDouble()*10000L*distance);
+			TrieRangeQuery tq=new TrieRangeQuery(field, new Long(lower), new Long(upper), variant);
+			RangeQuery cq=new RangeQuery(field, variant.longToTrieCoded(lower), variant.longToTrieCoded(upper), true, true);
+			cq.setConstantScoreRewrite(true);
+			TopDocs tTopDocs = searcher.search(tq, 1);
+			TopDocs cTopDocs = searcher.search(cq, 1);
+			assertEquals("Returned count for TrieRangeQuery and RangeQuery must be equal", tTopDocs.totalHits, cTopDocs.totalHits );
+		}
+	}
+	
+	public void testRandomTrieAndClassicRangeQuery_8bit() throws Exception {
+		testRandomTrieAndClassicRangeQuery(TrieUtils.VARIANT_8BIT);
+	}
+	
+	public void testRandomTrieAndClassicRangeQuery_4bit() throws Exception {
+		testRandomTrieAndClassicRangeQuery(TrieUtils.VARIANT_4BIT);
+	}
+	
+	public void testRandomTrieAndClassicRangeQuery_2bit() throws Exception {
+		testRandomTrieAndClassicRangeQuery(TrieUtils.VARIANT_2BIT);
+	}
+	
+	private void testRangeSplit(final TrieUtils variant) throws Exception {
+		String field="ascfield"+variant.TRIE_BITS;
+		// 50 random tests, the tests may also return 0 results, if min>max, but this is ok
+		for (int i=0; i<50; i++) {
+			long lower=(long)(rnd.nextDouble()*10000L);
+			long upper=(long)(rnd.nextDouble()*10000L);
+			if (lower>upper) {
+				long a=lower; lower=upper; upper=a;
+			}
+			TrieRangeQuery tq=new TrieRangeQuery(field, new Long(lower), new Long(upper), variant);
+			TopDocs tTopDocs = searcher.search(tq, 1);
+			assertEquals("Returned count of range query must be equal to inclusive range length", tTopDocs.totalHits, upper-lower+1 );
+		}
+	}
+
+	public void testRangeSplit_8bit() throws Exception {
+		testRangeSplit(TrieUtils.VARIANT_8BIT);
+	}
+	
+	public void testRangeSplit_4bit() throws Exception {
+		testRangeSplit(TrieUtils.VARIANT_4BIT);
+	}
+	
+	public void testRangeSplit_2bit() throws Exception {
+		testRangeSplit(TrieUtils.VARIANT_2BIT);
+	}
+	
+}

Propchange: lucene/java/trunk/contrib/queries/src/test/org/apache/lucene/search/trie/TestTrieRangeQuery.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/queries/src/test/org/apache/lucene/search/trie/TestTrieUtils.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/queries/src/test/org/apache/lucene/search/trie/TestTrieUtils.java?rev=723031&view=auto
==============================================================================
--- lucene/java/trunk/contrib/queries/src/test/org/apache/lucene/search/trie/TestTrieUtils.java (added)
+++ lucene/java/trunk/contrib/queries/src/test/org/apache/lucene/search/trie/TestTrieUtils.java Wed Dec  3 11:38:31 2008
@@ -0,0 +1,168 @@
+package org.apache.lucene.search.trie;
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+import java.util.Date;
+import java.util.GregorianCalendar;
+
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestTrieUtils extends LuceneTestCase {
+
+	public void testSpecialValues() throws Exception {
+		// Variant 8bit values
+		assertEquals( TrieUtils.VARIANT_8BIT.TRIE_CODED_NUMERIC_MIN, "\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100");
+		assertEquals( TrieUtils.VARIANT_8BIT.TRIE_CODED_NUMERIC_MAX, "\u01ff\u01ff\u01ff\u01ff\u01ff\u01ff\u01ff\u01ff");
+		assertEquals( TrieUtils.VARIANT_8BIT.longToTrieCoded(-1),    "\u017f\u01ff\u01ff\u01ff\u01ff\u01ff\u01ff\u01ff");
+		assertEquals( TrieUtils.VARIANT_8BIT.longToTrieCoded(0),     "\u0180\u0100\u0100\u0100\u0100\u0100\u0100\u0100");
+		assertEquals( TrieUtils.VARIANT_8BIT.longToTrieCoded(1),     "\u0180\u0100\u0100\u0100\u0100\u0100\u0100\u0101");
+		// Variant 4bit values
+		assertEquals( TrieUtils.VARIANT_4BIT.TRIE_CODED_NUMERIC_MIN, "\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100");
+		assertEquals( TrieUtils.VARIANT_4BIT.TRIE_CODED_NUMERIC_MAX, "\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f");
+		assertEquals( TrieUtils.VARIANT_4BIT.longToTrieCoded(-1),    "\u0107\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f");
+		assertEquals( TrieUtils.VARIANT_4BIT.longToTrieCoded(0),     "\u0108\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100");
+		assertEquals( TrieUtils.VARIANT_4BIT.longToTrieCoded(1),     "\u0108\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0101");
+		// TODO: 2bit tests
+	}
+
+	private void testBinaryOrderingAndIncrement(TrieUtils variant) throws Exception {
+		// generate a series of encoded longs, each numerical one bigger than the one before
+		String last=null;
+		for (long l=-100000L; l<100000L; l++) {
+			String act=variant.longToTrieCoded(l);
+			if (last!=null) {
+				// test if smaller
+				assertTrue( last.compareTo(act) < 0 );
+				// test the increment method (the last incremented by one should be the actual)
+				assertEquals( variant.incrementTrieCoded(last), act );
+				// test the decrement method (the actual decremented by one should be the last)
+				assertEquals( last, variant.decrementTrieCoded(act) );
+			}
+			// next step
+			last=act;
+		}
+	}
+
+	public void testBinaryOrderingAndIncrement_8bit() throws Exception {
+		testBinaryOrderingAndIncrement(TrieUtils.VARIANT_8BIT);
+	}
+
+	public void testBinaryOrderingAndIncrement_4bit() throws Exception {
+		testBinaryOrderingAndIncrement(TrieUtils.VARIANT_8BIT);
+	}
+
+	public void testBinaryOrderingAndIncrement_2bit() throws Exception {
+		testBinaryOrderingAndIncrement(TrieUtils.VARIANT_8BIT);
+	}
+
+	private void testLongs(TrieUtils variant) throws Exception {
+		long[] vals=new long[]{
+			Long.MIN_VALUE, -5000L, -4000L, -3000L, -2000L, -1000L, 0L,
+			1L, 10L, 300L, 5000L, Long.MAX_VALUE-2, Long.MAX_VALUE-1, Long.MAX_VALUE
+		};
+		String[] trieVals=new String[vals.length];
+		
+		// check back and forth conversion
+		for (int i=0; i<vals.length; i++) {
+			trieVals[i]=variant.longToTrieCoded(vals[i]);
+			assertEquals( "Back and forth conversion should return same value", vals[i], variant.trieCodedToLong(trieVals[i]) );
+		}
+		
+		// check sort order (trieVals should be ascending)
+		for (int i=1; i<vals.length; i++) {
+			assertTrue( trieVals[i-1].compareTo( trieVals[i] ) < 0 );
+		}
+	}
+
+	public void testLongs_8bit() throws Exception {
+		testLongs(TrieUtils.VARIANT_8BIT);
+	}
+
+	public void testLongs_4bit() throws Exception {
+		testLongs(TrieUtils.VARIANT_4BIT);
+	}
+
+	public void testLongs_2bit() throws Exception {
+		testLongs(TrieUtils.VARIANT_2BIT);
+	}
+
+	private void testDoubles(TrieUtils variant) throws Exception {
+		double[] vals=new double[]{
+			Double.NEGATIVE_INFINITY, -2.3E25, -1.0E15, -1.0, -1.0E-1, -1.0E-2, -0.0, 
+			+0.0, 1.0E-2, 1.0E-1, 1.0, 1.0E15, 2.3E25, Double.POSITIVE_INFINITY
+		};
+		String[] trieVals=new String[vals.length];
+		
+		// check back and forth conversion
+		for (int i=0; i<vals.length; i++) {
+			trieVals[i]=variant.doubleToTrieCoded(vals[i]);
+			assertTrue( "Back and forth conversion should return same value", vals[i]==variant.trieCodedToDouble(trieVals[i]) );
+		}
+		
+		// check sort order (trieVals should be ascending)
+		for (int i=1; i<vals.length; i++) {
+			assertTrue( trieVals[i-1].compareTo( trieVals[i] ) < 0 );
+		}
+	}
+
+	public void testDoubles_8bit() throws Exception {
+		testDoubles(TrieUtils.VARIANT_8BIT);
+	}
+
+	public void testDoubles_4bit() throws Exception {
+		testDoubles(TrieUtils.VARIANT_4BIT);
+	}
+
+	public void testDoubles_2bit() throws Exception {
+		testDoubles(TrieUtils.VARIANT_2BIT);
+	}
+
+	private void testDates(TrieUtils variant) throws Exception {
+		Date[] vals=new Date[]{
+			new GregorianCalendar(1000,1,1).getTime(),
+			new GregorianCalendar(1999,1,1).getTime(),
+			new GregorianCalendar(2000,1,1).getTime(),
+			new GregorianCalendar(2001,1,1).getTime()
+		};
+		String[] trieVals=new String[vals.length];
+		
+		// check back and forth conversion
+		for (int i=0; i<vals.length; i++) {
+			trieVals[i]=variant.dateToTrieCoded(vals[i]);
+			assertEquals( "Back and forth conversion should return same value", vals[i], variant.trieCodedToDate(trieVals[i]) );
+		}
+		
+		// check sort order (trieVals should be ascending)
+		for (int i=1; i<vals.length; i++) {
+			assertTrue( trieVals[i-1].compareTo( trieVals[i] ) < 0 );
+		}
+	}
+
+	public void testDates_8bit() throws Exception {
+		testDates(TrieUtils.VARIANT_8BIT);
+	}
+
+	public void testDates_4bit() throws Exception {
+		testDates(TrieUtils.VARIANT_4BIT);
+	}
+
+	public void testDates_2bit() throws Exception {
+		testDates(TrieUtils.VARIANT_2BIT);
+	}
+
+}

Propchange: lucene/java/trunk/contrib/queries/src/test/org/apache/lucene/search/trie/TestTrieUtils.java
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message