lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tjo...@apache.org
Subject cvs commit: jakarta-lucene/src/test/org/apache/lucene/search TestSort.java
Date Mon, 24 May 2004 22:51:42 GMT
tjones      2004/05/24 15:51:42

  Modified:    src/java/org/apache/lucene/search FieldCacheImpl.java
                        FieldDocSortedHitQueue.java
                        FieldSortedHitQueue.java SortField.java
               src/test/org/apache/lucene/search TestSort.java
  Log:
  added a SortField which uses a Locale to sort strings.
  also fixed the discrepancy about what happens when a document has no terms in a sorted field.
  added test cases for both of the above.
  
  Revision  Changes    Path
  1.2       +10 -3     jakarta-lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java
  
  Index: FieldCacheImpl.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- FieldCacheImpl.java	19 May 2004 23:05:27 -0000	1.1
  +++ FieldCacheImpl.java	24 May 2004 22:51:42 -0000	1.2
  @@ -230,11 +230,18 @@
       Object ret = lookup (reader, field, STRING_INDEX);
       if (ret == null) {
         final int[] retArray = new int[reader.maxDoc()];
  -      String[] mterms = new String[reader.maxDoc()];
  +      String[] mterms = new String[reader.maxDoc()+1];
         if (retArray.length > 0) {
           TermDocs termDocs = reader.termDocs();
           TermEnum termEnum = reader.terms (new Term (field, ""));
           int t = 0;  // current term number
  +
  +        // an entry for documents that have no terms in this field
  +        // should a document with no terms be at top or bottom?
  +        // this puts them at the top - if it is changed, FieldDocSortedHitQueue
  +        // needs to change as well.
  +        mterms[t++] = null;
  +
           try {
             if (termEnum.term() == null) {
               throw new RuntimeException ("no terms in field " + field);
  @@ -264,7 +271,7 @@
             // if there are no terms, make the term array
             // have a single null entry
             mterms = new String[1];
  -		} else if (t < mterms.length) {
  +        } else if (t < mterms.length) {
             // if there are less terms than documents,
             // trim off the dead array space
             String[] terms = new String[t];
  
  
  
  1.5       +42 -4     jakarta-lucene/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java
  
  Index: FieldDocSortedHitQueue.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- FieldDocSortedHitQueue.java	19 May 2004 23:05:27 -0000	1.4
  +++ FieldDocSortedHitQueue.java	24 May 2004 22:51:42 -0000	1.5
  @@ -19,6 +19,8 @@
   import org.apache.lucene.util.PriorityQueue;
   
   import java.io.IOException;
  +import java.text.Collator;
  +import java.util.Locale;
   
   /**
    * Expert: Collects sorted results from Searchable's and collates them.
  @@ -37,6 +39,10 @@
   	// have been resolved by the time this class is used.
   	volatile SortField[] fields;
   
  +	// used in the case where the fields are sorted by locale
  +	// based strings
  +	volatile Collator[] collators;
  +
   
   	/**
   	 * Creates a hit queue sorted by the given list of fields.
  @@ -47,6 +53,7 @@
   	FieldDocSortedHitQueue (SortField[] fields, int size)
   	throws IOException {
   		this.fields = fields;
  +		this.collators = hasCollators (fields);
   		initialize (size);
   	}
   
  @@ -60,7 +67,10 @@
   	 * @param fields
   	 */
   	synchronized void setFields (SortField[] fields) {
  -		if (this.fields == null) this.fields = fields;
  +		if (this.fields == null) {
  +			this.fields = fields;
  +			this.collators = hasCollators (fields);
  +		}
   	}
   
   
  @@ -70,6 +80,23 @@
   	}
   
   
  +	/** Returns an array of collators, possibly <code>null</code>.  The collators
  +	 * correspond to any SortFields which were given a specific locale.
  +	 * @param fields Array of sort fields.
  +	 * @return Array, possibly <code>null</code>.
  +	 */
  +	private Collator[] hasCollators (final SortField[] fields) {
  +		if (fields == null) return null;
  +		Collator[] ret = new Collator[fields.length];
  +		for (int i=0; i<fields.length; ++i) {
  +			Locale locale = fields[i].getLocale();
  +			if (locale != null)
  +				ret[i] = Collator.getInstance (locale);
  +		}
  +		return ret;
  +	}
  +
  +
   	/**
   	 * Returns whether <code>a</code> is less relevant than <code>b</code>.
   	 * @param a ScoreDoc
  @@ -103,7 +130,11 @@
   						String s2 = (String) docB.fields[i];
   						if (s2 == null) c = -1;      // could be null if there are
   						else if (s1 == null) c = 1;  // no terms in the given field
  -						else c = s2.compareTo(s1);
  +						else if (fields[i].getLocale() == null) {
  +							c = s2.compareTo(s1);
  +						} else {
  +							c = collators[i].compare (s2, s1);
  +						}
   						break;
   					case SortField.FLOAT:
   						float f1 = ((Float)docA.fields[i]).floatValue();
  @@ -141,9 +172,16 @@
   					case SortField.STRING:
   						String s1 = (String) docA.fields[i];
   						String s2 = (String) docB.fields[i];
  +						// null values need to be sorted first, because of how FieldCache.getStringIndex()
  +						// works - in that routine, any documents without a value in the given field are
  +						// put first.
   						if (s1 == null) c = -1;      // could be null if there are
   						else if (s2 == null) c = 1;  // no terms in the given field
  -						else c = s1.compareTo(s2);
  +						else if (fields[i].getLocale() == null) {
  +							c = s1.compareTo(s2);
  +						} else {
  +							c = collators[i].compare (s1, s2);
  +						}
   						break;
   					case SortField.FLOAT:
   						float f1 = ((Float)docA.fields[i]).floatValue();
  
  
  
  1.10      +35 -4     jakarta-lucene/src/java/org/apache/lucene/search/FieldSortedHitQueue.java
  
  Index: FieldSortedHitQueue.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/FieldSortedHitQueue.java,v
  retrieving revision 1.9
  retrieving revision 1.10
  diff -u -r1.9 -r1.10
  --- FieldSortedHitQueue.java	19 May 2004 23:05:27 -0000	1.9
  +++ FieldSortedHitQueue.java	24 May 2004 22:51:42 -0000	1.10
  @@ -22,6 +22,8 @@
   import java.io.IOException;
   import java.util.WeakHashMap;
   import java.util.Map;
  +import java.util.Locale;
  +import java.text.Collator;
   
   /**
    * Expert: A hit queue for sorting by hits by terms in more than one field.
  @@ -52,7 +54,7 @@
       this.fields = new SortField[n];
       for (int i=0; i<n; ++i) {
         String fieldname = fields[i].getField();
  -      comparators[i] = getCachedComparator (reader, fieldname, fields[i].getType(), fields[i].getFactory());
  +      comparators[i] = getCachedComparator (reader, fieldname, fields[i].getType(), fields[i].getLocale(),
fields[i].getFactory());
         this.fields[i] = new SortField (fieldname, comparators[i].sortType(), fields[i].getReverse());
       }
       initialize (size);
  @@ -144,7 +146,7 @@
       }
     }
   
  -  static ScoreDocComparator getCachedComparator (IndexReader reader, String fieldname,
int type, SortComparatorSource factory)
  +  static ScoreDocComparator getCachedComparator (IndexReader reader, String fieldname,
int type, Locale locale, SortComparatorSource factory)
     throws IOException {
       if (type == SortField.DOC) return ScoreDocComparator.INDEXORDER;
       if (type == SortField.SCORE) return ScoreDocComparator.RELEVANCE;
  @@ -161,7 +163,8 @@
             comparator = comparatorFloat (reader, fieldname);
             break;
           case SortField.STRING:
  -          comparator = comparatorString (reader, fieldname);
  +          if (locale != null) comparator = comparatorStringLocale (reader, fieldname, locale);
  +          else comparator = comparatorString (reader, fieldname);
             break;
           case SortField.CUSTOM:
             comparator = factory.newComparator (reader, fieldname);
  @@ -261,6 +264,34 @@
   
         public Comparable sortValue (final ScoreDoc i) {
           return index.lookup[index.order[i.doc]];
  +      }
  +
  +      public int sortType() {
  +        return SortField.STRING;
  +      }
  +    };
  +  }
  +
  +  /**
  +   * Returns a comparator for sorting hits according to a field containing strings.
  +   * @param reader  Index to use.
  +   * @param fieldname  Field containg string values.
  +   * @return  Comparator for sorting hits.
  +   * @throws IOException If an error occurs reading the index.
  +   */
  +  static ScoreDocComparator comparatorStringLocale (final IndexReader reader, final String
fieldname, final Locale locale)
  +  throws IOException {
  +    final Collator collator = Collator.getInstance (locale);
  +    final String field = fieldname.intern();
  +    return new ScoreDocComparator() {
  +      final String[] index = FieldCache.DEFAULT.getStrings (reader, field);
  +
  +      public final int compare (final ScoreDoc i, final ScoreDoc j) {
  +        return collator.compare (index[i.doc], index[j.doc]);
  +      }
  +
  +      public Comparable sortValue (final ScoreDoc i) {
  +        return index[i.doc];
         }
   
         public int sortType() {
  
  
  
  1.9       +37 -4     jakarta-lucene/src/java/org/apache/lucene/search/SortField.java
  
  Index: SortField.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/SortField.java,v
  retrieving revision 1.8
  retrieving revision 1.9
  diff -u -r1.8 -r1.9
  --- SortField.java	19 May 2004 23:05:27 -0000	1.8
  +++ SortField.java	24 May 2004 22:51:42 -0000	1.9
  @@ -17,6 +17,7 @@
    */
   
   import java.io.Serializable;
  +import java.util.Locale;
   
   /**
    * Stores information about how to sort documents by terms in an individual
  @@ -66,7 +67,7 @@
     // as the above static int values.  Any new values must not have the same value
     // as FieldCache.STRING_INDEX.
   
  -	
  +
     /** Represents sorting by document score (relevancy). */
     public static final SortField FIELD_SCORE = new SortField (null, SCORE);
   
  @@ -76,6 +77,7 @@
   
     private String field;
     private int type = AUTO;  // defaults to determining type dynamically
  +  private Locale locale;    // defaults to "natural order" (no Locale)
     boolean reverse = false;  // defaults to natural order
     private SortComparatorSource factory;
   
  @@ -121,6 +123,29 @@
       this.reverse = reverse;
     }
   
  +  /** Creates a sort by terms in the given field sorted
  +   * according to the given locale.
  +   * @param field  Name of field to sort by, cannot be <code>null</code>.
  +   * @param locale Locale of values in the field.
  +   */
  +  public SortField (String field, Locale locale) {
  +    this.field = field.intern();
  +    this.type = STRING;
  +    this.locale = locale;
  +  }
  +
  +  /** Creates a sort, possibly in reverse, by terms in the given field sorted
  +   * according to the given locale.
  +   * @param field  Name of field to sort by, cannot be <code>null</code>.
  +   * @param locale Locale of values in the field.
  +   */
  +  public SortField (String field, Locale locale, boolean reverse) {
  +    this.field = field.intern();
  +    this.type = STRING;
  +    this.locale = locale;
  +    this.reverse = reverse;
  +  }
  +
     /** Creates a sort with a custom comparison function.
      * @param field Name of field to sort by; cannot be <code>null</code>.
      * @param comparator Returns a comparator for sorting hits.
  @@ -158,6 +183,14 @@
       return type;
     }
   
  +  /** Returns the Locale by which term values are interpreted.
  +   * May return <code>null</code> if no Locale was specified.
  +   * @return Locale, or <code>null</code>.
  +   */
  +  public Locale getLocale() {
  +    return locale;
  +  }
  +
     /** Returns whether the sort should be reversed.
      * @return  True if natural order should be reversed.
      */
  @@ -186,8 +219,8 @@
                  break;
       }
   
  -    if (reverse)
  -      buffer.append('!');
  +    if (locale != null) buffer.append ("("+locale+")");
  +    if (reverse) buffer.append('!');
   
       return buffer.toString();
     }
  
  
  
  1.7       +65 -7     jakarta-lucene/src/test/org/apache/lucene/search/TestSort.java
  
  Index: TestSort.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/test/org/apache/lucene/search/TestSort.java,v
  retrieving revision 1.6
  retrieving revision 1.7
  diff -u -r1.6 -r1.7
  --- TestSort.java	19 May 2004 23:05:27 -0000	1.6
  +++ TestSort.java	24 May 2004 22:51:42 -0000	1.7
  @@ -30,6 +30,7 @@
   import java.util.regex.Pattern;
   import java.util.HashMap;
   import java.util.Iterator;
  +import java.util.Locale;
   
   import junit.framework.TestCase;
   import junit.framework.Test;
  @@ -56,6 +57,7 @@
   	private Query queryX;
   	private Query queryY;
   	private Query queryA;
  +	private Query queryF;
   	private Sort sort;
   
   
  @@ -101,6 +103,7 @@
   	{   "H",   "y a b c d",     "0",           "1.4E-45",      "e",     "C-88"  },
   	{   "I",   "x a b c d e f", "-2147483648", "1.0e+0",       "d",     "A-10"  },
   	{   "J",   "y a b c d e f", "4",           ".5",           "b",     "C-7"   },
  +	{   "Z",   "f",             null,          null,           null,    null    }
   	};
   
   	// create an index of all the documents, or just the x, or just the y documents
  @@ -113,10 +116,10 @@
   				Document doc = new Document();          // store, index, token
   				doc.add (new Field ("tracer",   data[i][0], true, false, false));
   				doc.add (new Field ("contents", data[i][1], false, true, true));
  -				doc.add (new Field ("int",      data[i][2], false, true, false));
  -				doc.add (new Field ("float",    data[i][3], false, true, false));
  -				doc.add (new Field ("string",   data[i][4], false, true, false));
  -				doc.add (new Field ("custom",   data[i][5], false, true, false));
  +				if (data[i][2] != null) doc.add (new Field ("int",      data[i][2], false, true, false));
  +				if (data[i][3] != null) doc.add (new Field ("float",    data[i][3], false, true, false));
  +				if (data[i][4] != null) doc.add (new Field ("string",   data[i][4], false, true, false));
  +				if (data[i][5] != null) doc.add (new Field ("custom",   data[i][5], false, true, false));
   				writer.addDocument (doc);
   			}
   		}
  @@ -152,6 +155,7 @@
   		queryX = new TermQuery (new Term ("contents", "x"));
   		queryY = new TermQuery (new Term ("contents", "y"));
   		queryA = new TermQuery (new Term ("contents", "a"));
  +		queryF = new TermQuery (new Term ("contents", "f"));
   		sort = new Sort();
   	}
   
  @@ -239,6 +243,27 @@
   		assertMatches (full, queryY, sort, "BFHJD");
   	}
   
  +	// test sorting when the sort field is empty (undefined) for some of the documents
  +	public void testEmptyFieldSort() throws Exception {
  +		sort.setSort ("string");
  +		assertMatches (full, queryF, sort, "ZJI");
  +
  +		sort.setSort ("string", true);
  +		assertMatches (full, queryF, sort, "IJZ");
  +
  +		sort.setSort ("int");
  +		assertMatches (full, queryF, sort, "IZJ");
  +
  +		sort.setSort ("int", true);
  +		assertMatches (full, queryF, sort, "JZI");
  +
  +		sort.setSort ("float");
  +		assertMatches (full, queryF, sort, "ZJI");
  +
  +		sort.setSort ("float", true);
  +		assertMatches (full, queryF, sort, "IJZ");
  +	}
  +
   	// test sorts using a series of fields
   	public void testSortCombos() throws Exception {
   		sort.setSort (new String[] {"int","float"});
  @@ -251,7 +276,18 @@
   		assertMatches (full, queryX, sort, "GICEA");
   	}
   
  +	// test using a Locale for sorting strings
  +	public void testLocaleSort() throws Exception {
  +		sort.setSort (new SortField[] { new SortField ("string", Locale.US) });
  +		assertMatches (full, queryX, sort, "AIGEC");
  +		assertMatches (full, queryY, sort, "DJHFB");
   
  +		sort.setSort (new SortField[] { new SortField ("string", Locale.US, true) });
  +		assertMatches (full, queryX, sort, "CEGIA");
  +		assertMatches (full, queryY, sort, "BFHJD");
  +	}
  +
  +	// test a custom sort function
   	public void testCustomSorts() throws Exception {
   		sort.setSort (new SortField ("custom", SampleComparable.getComparatorSource()));
   		assertMatches (full, queryX, sort, "CAIEG");
  @@ -283,6 +319,7 @@
   		runMultiSorts (multi);
   	}
   
  +	// test custom search when remote
   	public void testRemoteCustomSort() throws Exception {
   		Searchable searcher = getRemote();
   		MultiSearcher multi = new MultiSearcher (new Searchable[] { searcher });
  @@ -438,11 +475,32 @@
   		sort.setSort ("string", true);
   		assertMatches (multi, queryA, sort, "CBEFGHIAJD");
   
  +		sort.setSort (new SortField[] { new SortField ("string", Locale.US) });
  +		assertMatches (multi, queryA, sort, "DJAIHGFEBC");
  +
  +		sort.setSort (new SortField[] { new SortField ("string", Locale.US, true) });
  +		assertMatches (multi, queryA, sort, "CBEFGHIAJD");
  +
   		sort.setSort (new String[] {"int","float"});
  -		assertMatches (full, queryA, sort, "IDHFGJEABC");
  +		assertMatches (multi, queryA, sort, "IDHFGJEABC");
   
   		sort.setSort (new String[] {"float","string"});
  -		assertMatches (full, queryA, sort, "GDHJICEFAB");
  +		assertMatches (multi, queryA, sort, "GDHJICEFAB");
  +
  +		sort.setSort ("int");
  +		assertMatches (multi, queryF, sort, "IZJ");
  +
  +		sort.setSort ("int", true);
  +		assertMatches (multi, queryF, sort, "JZI");
  +
  +		sort.setSort ("float");
  +		assertMatches (multi, queryF, sort, "ZJI");
  +
  +		sort.setSort ("string");
  +		assertMatches (multi, queryF, sort, "ZJI");
  +
  +		sort.setSort ("string", true);
  +		assertMatches (multi, queryF, sort, "IJZ");
   	}
   
   	// make sure the documents returned by the search match the expected list
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org


Mime
View raw message