lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tjo...@apache.org
Subject cvs commit: jakarta-lucene/src/test/org/apache/lucene/search TestSort.java
Date Tue, 23 Mar 2004 16:49:56 GMT
tjones      2004/03/23 08:49:56

  Modified:    src/java/org/apache/lucene/search
                        MultiFieldSortedHitQueue.java
               src/test/org/apache/lucene/search TestSort.java
  Log:
  fix to properly normalize scores even when hits are sorted
  also wrote tests to verify scores are the same whether sorted or not
  
  Revision  Changes    Path
  1.3       +14 -1     jakarta-lucene/src/java/org/apache/lucene/search/MultiFieldSortedHitQueue.java
  
  Index: MultiFieldSortedHitQueue.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/MultiFieldSortedHitQueue.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- MultiFieldSortedHitQueue.java	24 Feb 2004 19:34:58 -0000	1.2
  +++ MultiFieldSortedHitQueue.java	23 Mar 2004 16:49:56 -0000	1.3
  @@ -64,6 +64,11 @@
   	/** Stores the sort criteria being used. */
   	protected SortField[] fields;
   
  +	/** Stores the maximum score value encountered, for normalizing.
  +	 *  we only care about scores greater than 1.0 - if all the scores
  +	 *  are less than 1.0, we don't have to normalize. */
  +	protected float maxscore = 1.0f;
  +
   
   	/**
   	 * Returns whether <code>a</code> is less relevant than <code>b</code>.
  @@ -74,6 +79,12 @@
   	protected final boolean lessThan (final Object a, final Object b) {
   		final ScoreDoc docA = (ScoreDoc) a;
   		final ScoreDoc docB = (ScoreDoc) b;
  +
  +		// keep track of maximum score
  +		if (docA.score > maxscore) maxscore = docA.score;
  +		if (docB.score > maxscore) maxscore = docB.score;
  +
  +		// run comparators
   		final int n = comparators.length;
   		int c = 0;
   		for (int i=0; i<n && c==0; ++i) {
  @@ -100,6 +111,7 @@
   		for (int i=0; i<n; ++i)
   			fields[i] = comparators[i].sortValue(doc);
   		doc.fields = fields;
  +		if (maxscore > 1.0f) doc.score /= maxscore;   // normalize scores
   		return doc;
   	}
   
  @@ -108,4 +120,5 @@
   	SortField[] getFields() {
   		return fields;
   	}
  +
   }
  
  
  
  1.3       +136 -1    jakarta-lucene/src/test/org/apache/lucene/search/TestSort.java
  
  Index: TestSort.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/test/org/apache/lucene/search/TestSort.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- TestSort.java	23 Mar 2004 15:59:49 -0000	1.2
  +++ TestSort.java	23 Mar 2004 16:49:56 -0000	1.3
  @@ -28,6 +28,8 @@
   import java.rmi.registry.Registry;
   import java.io.IOException;
   import java.util.regex.Pattern;
  +import java.util.HashMap;
  +import java.util.Iterator;
   
   import junit.framework.TestCase;
   import junit.framework.Test;
  @@ -241,6 +243,115 @@
   		runMultiSorts (multi);
   	}
   
  +	// test that the relevancy scores are the same even if
  +	// hits are sorted
  +	public void testNormalizedScores() throws Exception {
  +
  +		// capture relevancy scores
  +		HashMap scoresX = getScores (full.search (queryX));
  +		HashMap scoresY = getScores (full.search (queryY));
  +		HashMap scoresA = getScores (full.search (queryA));
  +
  +		// we'll test searching locally, remote and multi
  +		// note: the multi test depends on each separate index containing
  +		// the same documents as our local index, so the computed normalization
  +		// will be the same.  so we make a multi searcher over two equal document
  +		// sets - not realistic, but necessary for testing.
  +		MultiSearcher remote = new MultiSearcher (new Searchable[] { getRemote() });
  +		MultiSearcher multi  = new MultiSearcher (new Searchable[] { full, full });
  +
  +		// change sorting and make sure relevancy stays the same
  +
  +		sort = new Sort();
  +		assertSameValues (scoresX, getScores(full.search(queryX,sort)));
  +		assertSameValues (scoresX, getScores(remote.search(queryX,sort)));
  +		assertSameValues (scoresX, getScores(multi.search(queryX,sort)));
  +		assertSameValues (scoresY, getScores(full.search(queryY,sort)));
  +		assertSameValues (scoresY, getScores(remote.search(queryY,sort)));
  +		assertSameValues (scoresY, getScores(multi.search(queryY,sort)));
  +		assertSameValues (scoresA, getScores(full.search(queryA,sort)));
  +		assertSameValues (scoresA, getScores(remote.search(queryA,sort)));
  +		assertSameValues (scoresA, getScores(multi.search(queryA,sort)));
  +
  +		sort.setSort(SortField.FIELD_DOC);
  +		assertSameValues (scoresX, getScores(full.search(queryX,sort)));
  +		assertSameValues (scoresX, getScores(remote.search(queryX,sort)));
  +		assertSameValues (scoresX, getScores(multi.search(queryX,sort)));
  +		assertSameValues (scoresY, getScores(full.search(queryY,sort)));
  +		assertSameValues (scoresY, getScores(remote.search(queryY,sort)));
  +		assertSameValues (scoresY, getScores(multi.search(queryY,sort)));
  +		assertSameValues (scoresA, getScores(full.search(queryA,sort)));
  +		assertSameValues (scoresA, getScores(remote.search(queryA,sort)));
  +		assertSameValues (scoresA, getScores(multi.search(queryA,sort)));
  +
  +		sort.setSort ("int");
  +		assertSameValues (scoresX, getScores(full.search(queryX,sort)));
  +		assertSameValues (scoresX, getScores(remote.search(queryX,sort)));
  +		assertSameValues (scoresX, getScores(multi.search(queryX,sort)));
  +		assertSameValues (scoresY, getScores(full.search(queryY,sort)));
  +		assertSameValues (scoresY, getScores(remote.search(queryY,sort)));
  +		assertSameValues (scoresY, getScores(multi.search(queryY,sort)));
  +		assertSameValues (scoresA, getScores(full.search(queryA,sort)));
  +		assertSameValues (scoresA, getScores(remote.search(queryA,sort)));
  +		assertSameValues (scoresA, getScores(multi.search(queryA,sort)));
  +
  +		sort.setSort ("float");
  +		assertSameValues (scoresX, getScores(full.search(queryX,sort)));
  +		assertSameValues (scoresX, getScores(remote.search(queryX,sort)));
  +		assertSameValues (scoresX, getScores(multi.search(queryX,sort)));
  +		assertSameValues (scoresY, getScores(full.search(queryY,sort)));
  +		assertSameValues (scoresY, getScores(remote.search(queryY,sort)));
  +		assertSameValues (scoresY, getScores(multi.search(queryY,sort)));
  +		assertSameValues (scoresA, getScores(full.search(queryA,sort)));
  +		assertSameValues (scoresA, getScores(remote.search(queryA,sort)));
  +		assertSameValues (scoresA, getScores(multi.search(queryA,sort)));
  +
  +		sort.setSort ("string");
  +		assertSameValues (scoresX, getScores(full.search(queryX,sort)));
  +		assertSameValues (scoresX, getScores(remote.search(queryX,sort)));
  +		assertSameValues (scoresX, getScores(multi.search(queryX,sort)));
  +		assertSameValues (scoresY, getScores(full.search(queryY,sort)));
  +		assertSameValues (scoresY, getScores(remote.search(queryY,sort)));
  +		assertSameValues (scoresY, getScores(multi.search(queryY,sort)));
  +		assertSameValues (scoresA, getScores(full.search(queryA,sort)));
  +		assertSameValues (scoresA, getScores(remote.search(queryA,sort)));
  +		assertSameValues (scoresA, getScores(multi.search(queryA,sort)));
  +
  +		sort.setSort (new String[] {"int","float"});
  +		assertSameValues (scoresX, getScores(full.search(queryX,sort)));
  +		assertSameValues (scoresX, getScores(remote.search(queryX,sort)));
  +		assertSameValues (scoresX, getScores(multi.search(queryX,sort)));
  +		assertSameValues (scoresY, getScores(full.search(queryY,sort)));
  +		assertSameValues (scoresY, getScores(remote.search(queryY,sort)));
  +		assertSameValues (scoresY, getScores(multi.search(queryY,sort)));
  +		assertSameValues (scoresA, getScores(full.search(queryA,sort)));
  +		assertSameValues (scoresA, getScores(remote.search(queryA,sort)));
  +		assertSameValues (scoresA, getScores(multi.search(queryA,sort)));
  +
  +		sort.setSort (new SortField[] { new SortField ("int", true), new SortField (null, SortField.DOC,
true) });
  +		assertSameValues (scoresX, getScores(full.search(queryX,sort)));
  +		assertSameValues (scoresX, getScores(remote.search(queryX,sort)));
  +		assertSameValues (scoresX, getScores(multi.search(queryX,sort)));
  +		assertSameValues (scoresY, getScores(full.search(queryY,sort)));
  +		assertSameValues (scoresY, getScores(remote.search(queryY,sort)));
  +		assertSameValues (scoresY, getScores(multi.search(queryY,sort)));
  +		assertSameValues (scoresA, getScores(full.search(queryA,sort)));
  +		assertSameValues (scoresA, getScores(remote.search(queryA,sort)));
  +		assertSameValues (scoresA, getScores(multi.search(queryA,sort)));
  +
  +		sort.setSort (new String[] {"float","string"});
  +		assertSameValues (scoresX, getScores(full.search(queryX,sort)));
  +		assertSameValues (scoresX, getScores(remote.search(queryX,sort)));
  +		assertSameValues (scoresX, getScores(multi.search(queryX,sort)));
  +		assertSameValues (scoresY, getScores(full.search(queryY,sort)));
  +		assertSameValues (scoresY, getScores(remote.search(queryY,sort)));
  +		assertSameValues (scoresY, getScores(multi.search(queryY,sort)));
  +		assertSameValues (scoresA, getScores(full.search(queryA,sort)));
  +		assertSameValues (scoresA, getScores(remote.search(queryA,sort)));
  +		assertSameValues (scoresA, getScores(multi.search(queryA,sort)));
  +
  +	}
  +
   	// runs a variety of sorts useful for multisearchers
   	private void runMultiSorts (Searcher multi) throws Exception {
   		sort.setSort (SortField.FIELD_DOC);
  @@ -313,6 +424,30 @@
   		assertTrue (Pattern.compile(pattern).matcher(buff.toString()).matches());
   	}
   
  +	private HashMap getScores (Hits hits)
  +	throws IOException {
  +		HashMap scoreMap = new HashMap();
  +		int n = hits.length();
  +		for (int i=0; i<n; ++i) {
  +			Document doc = hits.doc(i);
  +			String[] v = doc.getValues("tracer");
  +			assertEquals (v.length, 1);
  +			scoreMap.put (v[0], new Float(hits.score(i)));
  +		}
  +		return scoreMap;
  +	}
  +
  +	// make sure all the values in the maps match
  +	private void assertSameValues (HashMap m1, HashMap m2) {
  +		int n = m1.size();
  +		int m = m2.size();
  +		assertEquals (n, m);
  +		Iterator iter = m1.keySet().iterator();
  +		while (iter.hasNext()) {
  +			Object key = iter.next();
  +			assertEquals (m1.get(key), m2.get(key));
  +		}
  +	}
   
   	private Searchable getRemote () throws Exception {
   		try {
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org


Mime
View raw message