lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tjo...@apache.org
Subject additional sort field type
Date Thu, 22 Apr 2004 22:33:33 GMT
I just committed some code which adds a new SortField type where the 
comparison function can be specified.

I also wrote another class which I have not committed - it is very 
convenient, but has a heavy memory footprint.

Attached below is the class, and example of its use.  What do you guys 
think of it?

Tim

--

=========
the class
=========

   package org.apache.lucene.search;

   import org.apache.lucene.index.*;
   import java.io.IOException;

   /**
    * Abstract base class for sorting hits returned by a Query.
    *
    * <p>This class should only be used if the other SortField
    * types (SCORE, DOC, STRING, INT, FLOAT) do not provide an
    * adequate sorting.  It maintains an internal cache of values which
    * could be quite large.  The cache is an array of Comparable,
    * one for each document in the index.  There is a distinct
    * Comparable for each unique term in the field - if
    * some documents have the same term in the field, the cache
    * array will have entries which reference the same Comparable.
    */
   public abstract class SortComparator
   implements SortComparatorSource {

     // inherit javadocs
     public ScoreDocLookupComparator newComparator (final IndexReader 
reader, String fieldname)
     throws IOException {
       final String field = fieldname.intern();
       final TermEnum enumerator = reader.terms (new Term (fieldname, ""));
       try {
         return new ScoreDocLookupComparator() {
           protected Comparable[] cachedValues = fillCache (reader, 
enumerator, field);

           public boolean sizeMatches (int n) {
             return (cachedValues.length == n);
           }

           public int compare (ScoreDoc i, ScoreDoc j) {
             return cachedValues[i.doc].compareTo (cachedValues[j.doc]);
           }

           public int compareReverse (ScoreDoc i, ScoreDoc j) {
             return cachedValues[j.doc].compareTo (cachedValues[i.doc]);
           }

           public Comparable sortValue (ScoreDoc i) {
             return cachedValues[i.doc];
           }

           public int sortType(){
             return SortField.CUSTOM;
           }
         };
   	} finally {
         enumerator.close();
   	}
     }

     /**
      * Returns an array of objects which represent that natural order
      * of the term values in the given field.
      * @param reader Terms are in this index.
      * @param enumerator Use this to get the term values and TermDocs.
      * @param fieldname Comparables should be for this field.
      * @return Array of objects representing natural order of terms in 
field.
      * @throws IOException If an error occurs reading the index.
      */
     protected Comparable[] fillCache (IndexReader reader, TermEnum 
enumerator, String fieldname)
     throws IOException {
       final String field = fieldname.intern();
       Comparable[] retArray = new Comparable[reader.maxDoc()];
       if (retArray.length > 0) {
         TermDocs termDocs = reader.termDocs();
         try {
           if (enumerator.term() == null) {
             throw new RuntimeException ("no terms in field " + field);
           }
           do {
             Term term = enumerator.term();
             if (term.field() != field) break;
             Comparable termval = getComparable (term.text());
             termDocs.seek (enumerator);
             while (termDocs.next()) {
               retArray[termDocs.doc()] = termval;
             }
           } while (enumerator.next());
         } finally {
           termDocs.close();
         }
       }
       return retArray;
     }

     /**
      * Returns an object which, when sorted according to natural order,
      * will order the Term values in the correct order.
      * <p>For example, if the Terms contained integer values, this method
      * would return <code>new Integer(termtext)</code>.  Note that this
      * might not always be the most efficient implementation - for this
      * particular example, a better implementation might be to make a
      * ScoreDocLookupComparator that uses an internal lookup table of int.
      * @param termtext The textual value of the term.
      * @return An object representing <code>termtext</code> that sorts 
according to the natural order of <code>termtext</code>.
      * @see Comparable
      * @see ScoreDocLookupComparator
      */
     protected abstract Comparable getComparable (String termtext);

   }


====================
How it would be used
====================
   ...
   new SortField ("idfield", new SortComparator() {
     protected Comparable getComparable (String termtext) {
       return new SampleComparable (termtext);
     }
   });
   ...


   public class SampleComparable
   implements Comparable, Serializable {

     String string_part;
     Integer int_part;

     public SampleComparable (String s) {
       int i = s.indexOf ("-");
       string_part = s.substring (0, i);
       int_part = new Integer (s.substring (i + 1));
     }

     public int compareTo (Object o) {
       SampleComparable otherid = (SampleComparable) o;
       int i = string_part.compareTo (otherid.string_part);
       if (i == 0) return int_part.compareTo (otherid.int_part);
       return i;
     }
   }



---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org


Mime
View raw message