lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Bernhard Messer <bmes...@apache.org>
Subject Re: svn commit: r332431 - in /lucene/java/trunk: CHANGES.txt src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java src/test/org/apache/lucene/search/TestCustomSearcherSort.java
Date Tue, 15 Nov 2005 19:01:36 GMT
Yonik,

TestCustomSearcherSort.java you added a few days ago shows that the 
author is Martin Seitz from T-Systems and doesn't has the apache license 
agreement in it's header. Is it ok to have this test in SVN ?

Bernhard


yonik@apache.org wrote:

>Author: yonik
>Date: Thu Nov 10 19:13:10 2005
>New Revision: 332431
>
>URL: http://svn.apache.org/viewcvs?rev=332431&view=rev
>Log:
>break sorting ties by index order: LUCENE-456
>
>Added:
>    lucene/java/trunk/src/test/org/apache/lucene/search/TestCustomSearcherSort.java
>Modified:
>    lucene/java/trunk/CHANGES.txt
>    lucene/java/trunk/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java
>
>Modified: lucene/java/trunk/CHANGES.txt
>URL: http://svn.apache.org/viewcvs/lucene/java/trunk/CHANGES.txt?rev=332431&r1=332430&r2=332431&view=diff
>==============================================================================
>--- lucene/java/trunk/CHANGES.txt (original)
>+++ lucene/java/trunk/CHANGES.txt Thu Nov 10 19:13:10 2005
>@@ -245,6 +245,10 @@
>     change the sort order when sorting by string for documents without
>     a value for the sort field.
>     (Luc Vanlerberghe via Yonik, LUCENE-453)
>+
>+16. Fixed a sorting problem with MultiSearchers that can lead to
>+    missing or duplicate docs due to equal docs sorting in an arbitrary order.
>+    (Yonik Seeley, LUCENE-456)
> 	
> Optimizations
>      
>
>Modified: lucene/java/trunk/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java
>URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java?rev=332431&r1=332430&r2=332431&view=diff
>==============================================================================
>--- lucene/java/trunk/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java (original)
>+++ lucene/java/trunk/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java Thu
Nov 10 19:13:10 2005
>@@ -157,6 +157,11 @@
> 				c = -c;
> 			}
> 		}
>-		return c > 0;
>+
>+    // avoid random sort order that could lead to duplicates (bug #31241):
>+    if (c == 0)
>+      return docA.doc > docB.doc;
>+
>+    return c > 0;
> 	}
> }
>
>Added: lucene/java/trunk/src/test/org/apache/lucene/search/TestCustomSearcherSort.java
>URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/test/org/apache/lucene/search/TestCustomSearcherSort.java?rev=332431&view=auto
>==============================================================================
>--- lucene/java/trunk/src/test/org/apache/lucene/search/TestCustomSearcherSort.java (added)
>+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestCustomSearcherSort.java Thu
Nov 10 19:13:10 2005
>@@ -0,0 +1,268 @@
>+package org.apache.lucene.search;
>+
>+import java.io.IOException;
>+import java.io.Serializable;
>+import java.util.Calendar;
>+import java.util.GregorianCalendar;
>+import java.util.Map;
>+import java.util.Random;
>+import java.util.TreeMap;
>+
>+import junit.framework.Test;
>+import junit.framework.TestCase;
>+import junit.framework.TestSuite;
>+import junit.textui.TestRunner;
>+
>+import org.apache.lucene.analysis.standard.StandardAnalyzer;
>+import org.apache.lucene.document.DateTools;
>+import org.apache.lucene.document.Document;
>+import org.apache.lucene.document.Field;
>+import org.apache.lucene.index.IndexReader;
>+import org.apache.lucene.index.IndexWriter;
>+import org.apache.lucene.index.Term;
>+import org.apache.lucene.store.Directory;
>+import org.apache.lucene.store.RAMDirectory;
>+
>+/**
>+ * Unit test for sorting code.
>+ *
>+ * @author  Martin Seitz (T-Systems)
>+ */
>+
>+public class TestCustomSearcherSort
>+extends TestCase
>+implements Serializable {
>+
>+    private Directory index = null;
>+    private Query query = null;
>+    // reduced from 20000 to 2000 to speed up test...
>+    private final static int INDEX_SIZE = 2000;
>+
>+	public TestCustomSearcherSort (String name) {
>+		super (name);
>+	}
>+
>+	public static void main (String[] argv) {
>+	    TestRunner.run (suite());
>+	}
>+
>+	public static Test suite() {
>+		return new TestSuite (TestCustomSearcherSort.class);
>+	}
>+
>+
>+	// create an index for testing
>+	private Directory getIndex()
>+	throws IOException {
>+	        RAMDirectory indexStore = new RAMDirectory ();
>+	        IndexWriter writer = new IndexWriter (indexStore, new StandardAnalyzer(), true);
>+	        RandomGen random = new RandomGen();
>+	        for (int i=0; i<INDEX_SIZE; ++i) { // don't decrease; if to low the problem
doesn't show up
>+	        Document doc = new Document();
>+	            if((i%5)!=0) { // some documents must not have an entry in the first sort
field
>+	                doc.add (new Field("publicationDate_", random.getLuceneDate(), Field.Store.YES,
Field.Index.UN_TOKENIZED));
>+	            }
>+	            if((i%7)==0) { // some documents to match the query (see below) 
>+	                doc.add (new Field("content", "test", Field.Store.YES, Field.Index.TOKENIZED));
>+	            }
>+	            // every document has a defined 'mandant' field
>+	            doc.add(new Field("mandant", Integer.toString(i%3), Field.Store.YES, Field.Index.UN_TOKENIZED));
>+	            writer.addDocument (doc);
>+	        }
>+	        writer.optimize ();
>+	        writer.close ();
>+	    return indexStore;
>+	}
>+
>+	/**
>+	 * Create index and query for test cases. 
>+	 */
>+	public void setUp() throws Exception {
>+		index = getIndex();
>+	    query = new TermQuery( new Term("content", "test"));
>+	}
>+
>+	/**
>+	 * Run the test using two CustomSearcher instances. 
>+	 */
>+	public void testFieldSortCustomSearcher() throws Exception {
>+	    log("Run testFieldSortCustomSearcher");
>+		// define the sort criteria
>+	    Sort custSort = new Sort(new SortField[] {
>+	            new SortField("publicationDate_"), 
>+	            SortField.FIELD_SCORE
>+	    });
>+	    Searcher searcher = new CustomSearcher (index, 2);
>+	    // search and check hits
>+		matchHits(searcher, custSort);
>+	}
>+	/**
>+	 * Run the test using one CustomSearcher wrapped by a MultiSearcher. 
>+	 */
>+	public void testFieldSortSingleSearcher() throws Exception {
>+	    log("Run testFieldSortSingleSearcher");
>+		// define the sort criteria
>+	    Sort custSort = new Sort(new SortField[] {
>+	            new SortField("publicationDate_"), 
>+	            SortField.FIELD_SCORE
>+	    });
>+	    Searcher searcher = 
>+	        new MultiSearcher(new Searchable[] {
>+	                new CustomSearcher (index, 2)});
>+	    // search and check hits
>+		matchHits(searcher, custSort);
>+	}
>+	/**
>+	 * Run the test using two CustomSearcher instances. 
>+	 */
>+	public void testFieldSortMultiCustomSearcher() throws Exception {
>+	    log("Run testFieldSortMultiCustomSearcher");
>+		// define the sort criteria
>+	    Sort custSort = new Sort(new SortField[] {
>+	            new SortField("publicationDate_"), 
>+	            SortField.FIELD_SCORE
>+	    });
>+	    Searcher searcher = 
>+	        new MultiSearcher(new Searchable[] {
>+	                new CustomSearcher (index, 0),
>+	                new CustomSearcher (index, 2)});
>+	    // search and check hits
>+		matchHits(searcher, custSort);
>+	}
>+
>+
>+	// make sure the documents returned by the search match the expected list
>+	private void matchHits (Searcher searcher, Sort sort)
>+	throws IOException {
>+	    // make a query without sorting first
>+		Hits hitsByRank = searcher.search(query);
>+		checkHits(hitsByRank, "Sort by rank: "); // check for duplicates
>+        Map resultMap = new TreeMap();
>+        // store hits in TreeMap - TreeMap does not allow duplicates; existing entries
are silently overwritten
>+        for(int hitid=0;hitid<hitsByRank.length(); ++hitid) {
>+            resultMap.put(
>+                    new Integer(hitsByRank.id(hitid)),  // Key:   Lucene Document ID
>+                    new Integer(hitid));				// Value: Hits-Objekt Index
>+        }
>+        
>+        // now make a query using the sort criteria
>+		Hits resultSort = searcher.search (query, sort);
>+		checkHits(resultSort, "Sort by custom criteria: "); // check for duplicates
>+		
>+        String lf = System.getProperty("line.separator", "\n");
>+        // besides the sorting both sets of hits must be identical
>+        for(int hitid=0;hitid<resultSort.length(); ++hitid) {
>+            Integer idHitDate = new Integer(resultSort.id(hitid)); // document ID from
sorted search
>+            if(!resultMap.containsKey(idHitDate)) {
>+                log("ID "+idHitDate+" not found. Possibliy a duplicate.");
>+            }
>+            assertTrue(resultMap.containsKey(idHitDate)); // same ID must be in the Map
from the rank-sorted search
>+            // every hit must appear once in both result sets --> remove it from the
Map.
>+            // At the end the Map must be empty!
>+            resultMap.remove(idHitDate);
>+        }
>+        if(resultMap.size()==0) {
>+            log("All hits matched");
>+        } else {
>+        log("Couldn't match "+resultMap.size()+" hits.");
>+        }
>+        assertEquals(resultMap.size(), 0);
>+	}
>+
>+	/**
>+	 * Check the hits for duplicates.
>+	 * @param hits
>+	 */
>+    private void checkHits(Hits hits, String prefix) {
>+        if(hits!=null) {
>+            Map idMap = new TreeMap();
>+            for(int docnum=0;docnum<hits.length();++docnum) {
>+                Integer luceneId = null;
>+                try {
>+                    luceneId = new Integer(hits.id(docnum));
>+                    if(idMap.containsKey(luceneId)) {
>+                        StringBuffer message = new StringBuffer(prefix);
>+                        message.append("Duplicate key for hit index = ");
>+                        message.append(docnum);
>+                        message.append(", previous index = ");
>+                        message.append(((Integer)idMap.get(luceneId)).toString());
>+                        message.append(", Lucene ID = ");
>+                        message.append(luceneId);
>+                        log(message.toString());
>+                    } else { 
>+                        idMap.put(luceneId, new Integer(docnum));
>+                    }
>+                } catch(IOException ioe) {
>+                    StringBuffer message = new StringBuffer(prefix);
>+                    message.append("Error occurred for hit index = ");
>+                    message.append(docnum);
>+                    message.append(" (");
>+                    message.append(ioe.getMessage());
>+                    message.append(")");
>+                    log(message.toString());
>+                }
>+            }
>+        }
>+    }
>+    
>+    // Simply write to console - choosen to be independant of log4j etc 
>+    private void log(String message) {
>+        System.out.println(message);
>+    }
>+    
>+    public class CustomSearcher extends IndexSearcher {
>+        private int switcher;
>+        /**
>+         * @param directory
>+         * @throws IOException
>+         */
>+        public CustomSearcher(Directory directory, int switcher) throws IOException {
>+            super(directory);
>+            this.switcher = switcher;
>+        }
>+        /**
>+         * @param r
>+         */
>+        public CustomSearcher(IndexReader r, int switcher) {
>+            super(r);
>+            this.switcher = switcher;
>+        }
>+        /**
>+         * @param path
>+         * @throws IOException
>+         */
>+        public CustomSearcher(String path, int switcher) throws IOException {
>+            super(path);
>+            this.switcher = switcher;
>+        }
>+        /* (non-Javadoc)
>+         * @see org.apache.lucene.search.Searchable#search(org.apache.lucene.search.Query,
org.apache.lucene.search.Filter, int, org.apache.lucene.search.Sort)
>+         */
>+        public TopFieldDocs search(Query query, Filter filter, int nDocs,
>+                Sort sort) throws IOException {
>+            BooleanQuery bq = new BooleanQuery();
>+            bq.add(query, BooleanClause.Occur.MUST);
>+            bq.add(new TermQuery(new Term("mandant", Integer.toString(switcher))), BooleanClause.Occur.MUST);
>+            return super.search(bq, filter, nDocs, sort);
>+        }
>+        /* (non-Javadoc)
>+         * @see org.apache.lucene.search.Searchable#search(org.apache.lucene.search.Query,
org.apache.lucene.search.Filter, int)
>+         */
>+        public TopDocs search(Query query, Filter filter, int nDocs)
>+        throws IOException {
>+            BooleanQuery bq = new BooleanQuery();
>+            bq.add(query, BooleanClause.Occur.MUST);
>+            bq.add(new TermQuery(new Term("mandant", Integer.toString(switcher))), BooleanClause.Occur.MUST);
>+            return super.search(bq, filter, nDocs);
>+        }
>+    }
>+    private class RandomGen {
>+        private Random random = new Random(0); // to generate some arbitrary contents
>+	    private Calendar base = new GregorianCalendar(1980, 1, 1);
>+
>+	    // Just to generate some different Lucene Date strings
>+        private String getLuceneDate() {
>+    	    return DateTools.timeToString(base.getTimeInMillis() + random.nextInt() - Integer.MIN_VALUE,
DateTools.Resolution.DAY);
>+        }
>+    }
>+}
>
>
>
>  
>


---------------------------------------------------------------------
To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-dev-help@lucene.apache.org


Mime
View raw message