lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tjo...@apache.org
Subject additional sort field type (again)
Date Thu, 22 Apr 2004 22:38:05 GMT
oops - forgot to include the comments on the SampleComparable below
to explain what it is all about.

--

I just committed some code which adds a new SortField type where the
comparison function can be specified.

I also wrote another class which I have not committed - it is very
convenient, but has a heavy memory footprint.

Attached below is the class, and example of its use.  What do you guys
think of it?

Tim

--

=========
the class
=========

   package org.apache.lucene.search;

   import org.apache.lucene.index.*;
   import java.io.IOException;

   /**
    * Abstract base class for sorting hits returned by a Query.
    *
    * <p>This class should only be used if the other SortField
    * types (SCORE, DOC, STRING, INT, FLOAT) do not provide an
    * adequate sorting.  It maintains an internal cache of values which
    * could be quite large.  The cache is an array of Comparable,
    * one for each document in the index.  There is a distinct
    * Comparable for each unique term in the field - if
    * some documents have the same term in the field, the cache
    * array will have entries which reference the same Comparable.
    */
   public abstract class SortComparator
   implements SortComparatorSource {

     // inherit javadocs
     public ScoreDocLookupComparator newComparator (final IndexReader
reader, String fieldname)
     throws IOException {
       final String field = fieldname.intern();
       final TermEnum enumerator = reader.terms (new Term (fieldname, ""));
       try {
         return new ScoreDocLookupComparator() {
           protected Comparable[] cachedValues = fillCache (reader,
enumerator, field);

           public boolean sizeMatches (int n) {
             return (cachedValues.length == n);
           }

           public int compare (ScoreDoc i, ScoreDoc j) {
             return cachedValues[i.doc].compareTo (cachedValues[j.doc]);
           }

           public int compareReverse (ScoreDoc i, ScoreDoc j) {
             return cachedValues[j.doc].compareTo (cachedValues[i.doc]);
           }

           public Comparable sortValue (ScoreDoc i) {
             return cachedValues[i.doc];
           }

           public int sortType(){
             return SortField.CUSTOM;
           }
         };
   	} finally {
         enumerator.close();
   	}
     }

     /**
      * Returns an array of objects which represent that natural order
      * of the term values in the given field.
      * @param reader Terms are in this index.
      * @param enumerator Use this to get the term values and TermDocs.
      * @param fieldname Comparables should be for this field.
      * @return Array of objects representing natural order of terms in
field.
      * @throws IOException If an error occurs reading the index.
      */
     protected Comparable[] fillCache (IndexReader reader, TermEnum
enumerator, String fieldname)
     throws IOException {
       final String field = fieldname.intern();
       Comparable[] retArray = new Comparable[reader.maxDoc()];
       if (retArray.length > 0) {
         TermDocs termDocs = reader.termDocs();
         try {
           if (enumerator.term() == null) {
             throw new RuntimeException ("no terms in field " + field);
           }
           do {
             Term term = enumerator.term();
             if (term.field() != field) break;
             Comparable termval = getComparable (term.text());
             termDocs.seek (enumerator);
             while (termDocs.next()) {
               retArray[termDocs.doc()] = termval;
             }
           } while (enumerator.next());
         } finally {
           termDocs.close();
         }
       }
       return retArray;
     }

     /**
      * Returns an object which, when sorted according to natural order,
      * will order the Term values in the correct order.
      * <p>For example, if the Terms contained integer values, this method
      * would return <code>new Integer(termtext)</code>.  Note that this
      * might not always be the most efficient implementation - for this
      * particular example, a better implementation might be to make a
      * ScoreDocLookupComparator that uses an internal lookup table of int.
      * @param termtext The textual value of the term.
      * @return An object representing <code>termtext</code> that sorts
according to the natural order of <code>termtext</code>.
      * @see Comparable
      * @see ScoreDocLookupComparator
      */
     protected abstract Comparable getComparable (String termtext);

   }


====================
How it would be used
====================
   ...
   new SortField ("idfield", new SortComparator() {
     protected Comparable getComparable (String termtext) {
       return new SampleComparable (termtext);
     }
   });
   ...


   /**
    * An example Comparable for use with the custom sort tests.
    * It implements a comparable for "id" sort of values which
    * consist of an alphanumeric part and a numeric part, such as:
    * <p/>
    * <P>ABC-123, A-1, A-7, A-100, B-99999
    * <p/>
    * <p>Such values cannot be sorted as strings, since A-100 needs
    * to come after A-7.
    * <p/>
    * <p>It could be argued that the "ids" should be rewritten as
    * A-0001, A-0100, etc. so they will sort as strings.  That is
    * a valid alternate way to solve it - but
    * this is only supposed to be a simple test case.
    */
   public class SampleComparable
   implements Comparable, Serializable {

     String string_part;
     Integer int_part;

     public SampleComparable (String s) {
       int i = s.indexOf ("-");
       string_part = s.substring (0, i);
       int_part = new Integer (s.substring (i + 1));
     }

     public int compareTo (Object o) {
       SampleComparable otherid = (SampleComparable) o;
       int i = string_part.compareTo (otherid.string_part);
       if (i == 0) return int_part.compareTo (otherid.int_part);
       return i;
     }
   }





---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org


Mime
View raw message