lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mharw...@apache.org
Subject svn commit: r638631 - in /lucene/java/trunk/contrib/queries/src: java/org/apache/lucene/search/TermsFilter.java test/org/apache/lucene/search/TermsFilterTest.java
Date Tue, 18 Mar 2008 23:01:11 GMT
Author: mharwood
Date: Tue Mar 18 16:01:00 2008
New Revision: 638631

URL: http://svn.apache.org/viewvc?rev=638631&view=rev
Log:
Applied trejkaz's patch from https://issues.apache.org/jira/browse/LUCENE-1240 to optimise
TermFilter.java and included new JUnit test

Added:
    lucene/java/trunk/contrib/queries/src/test/org/apache/lucene/search/TermsFilterTest.java
Modified:
    lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/TermsFilter.java

Modified: lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/TermsFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/TermsFilter.java?rev=638631&r1=638630&r2=638631&view=diff
==============================================================================
--- lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/TermsFilter.java (original)
+++ lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/TermsFilter.java Tue
Mar 18 16:01:00 2008
@@ -18,9 +18,10 @@
  */
 
 import java.io.IOException;
-import java.util.ArrayList;
 import java.util.BitSet;
 import java.util.Iterator;
+import java.util.Set;
+import java.util.TreeSet;
 
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
@@ -37,7 +38,7 @@
  */
 public class TermsFilter extends Filter
 {
-	ArrayList termsList=new ArrayList();
+	Set terms=new TreeSet();
 	
 	/**
 	 * Adds a term to the list of acceptable terms   
@@ -45,7 +46,7 @@
 	 */
 	public void addTerm(Term term)
 	{
-		termsList.add(term);
+		terms.add(term);
 	}
 
 	/* (non-Javadoc)
@@ -54,16 +55,24 @@
 	public BitSet bits(IndexReader reader) throws IOException
 	{
 		BitSet result=new BitSet(reader.maxDoc());
-		for (Iterator iter = termsList.iterator(); iter.hasNext();)
-		{
-			Term term = (Term) iter.next();
-			TermDocs td=reader.termDocs(term);
-	        while (td.next())
-	        {
-	            result.set(td.doc());
-	        }						
-		}
-		return result;
+        TermDocs td = reader.termDocs();
+        try
+        {
+            for (Iterator iter = terms.iterator(); iter.hasNext();)
+            {
+                Term term = (Term) iter.next();
+                td.seek(term);
+                while (td.next())
+                {
+                    result.set(td.doc());
+                }
+            }
+        }
+        finally
+        {
+            td.close();
+        }
+        return result;
 	}
 	
 	public boolean equals(Object obj)
@@ -73,14 +82,14 @@
 		if((obj == null) || (obj.getClass() != this.getClass()))
 				return false;
 		TermsFilter test = (TermsFilter)obj;
-		return (termsList == test.termsList|| 
-					 (termsList!= null && termsList.equals(test.termsList)));
+		return (terms == test.terms ||
+					 (terms != null && terms.equals(test.terms)));
 	}
 
 	public int hashCode()
 	{
 		int hash=9;
-		for (Iterator iter = termsList.iterator(); iter.hasNext();)
+		for (Iterator iter = terms.iterator(); iter.hasNext();)
 		{
 			Term term = (Term) iter.next();
 			hash = 31 * hash + term.hashCode();			

Added: lucene/java/trunk/contrib/queries/src/test/org/apache/lucene/search/TermsFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/queries/src/test/org/apache/lucene/search/TermsFilterTest.java?rev=638631&view=auto
==============================================================================
--- lucene/java/trunk/contrib/queries/src/test/org/apache/lucene/search/TermsFilterTest.java
(added)
+++ lucene/java/trunk/contrib/queries/src/test/org/apache/lucene/search/TermsFilterTest.java
Tue Mar 18 16:01:00 2008
@@ -0,0 +1,70 @@
+package org.apache.lucene.search;
+
+import java.util.BitSet;
+import java.util.HashSet;
+
+import junit.framework.TestCase;
+
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.IndexWriter.MaxFieldLength;
+import org.apache.lucene.store.RAMDirectory;
+
+public class TermsFilterTest extends TestCase
+{
+	public void testCachability() throws Exception
+	{
+		TermsFilter a=new TermsFilter();
+		a.addTerm(new Term("field1","a"));
+		a.addTerm(new Term("field1","b"));
+		HashSet cachedFilters=new HashSet();
+		cachedFilters.add(a);
+		TermsFilter b=new TermsFilter();
+		b.addTerm(new Term("field1","a"));
+		b.addTerm(new Term("field1","b"));
+		
+		assertTrue("Must be cached",cachedFilters.contains(b));
+		b.addTerm(new Term("field1","a")); //duplicate term
+		assertTrue("Must be cached",cachedFilters.contains(b));
+		b.addTerm(new Term("field1","c"));
+		assertFalse("Must not be cached",cachedFilters.contains(b));
+		
+	}
+	public void testMissingTerms() throws Exception
+	{
+		String fieldName="field1";
+		RAMDirectory rd=new RAMDirectory();
+		IndexWriter w=new IndexWriter(rd,new WhitespaceAnalyzer(),MaxFieldLength.UNLIMITED);
+		for (int i = 0; i < 100; i++)
+		{
+			Document doc=new Document();
+			int term=i*10; //terms are units of 10;
+			doc.add(new Field(fieldName,""+term,Field.Store.YES,Field.Index.UN_TOKENIZED));
+			w.addDocument(doc);			
+		}
+		w.close();
+		IndexReader reader = IndexReader.open(rd);
+		
+		TermsFilter tf=new TermsFilter();
+		tf.addTerm(new Term(fieldName,"19"));
+		BitSet bits = tf.bits(reader);
+		assertEquals("Must match nothing", 0, bits.cardinality());
+
+		tf.addTerm(new Term(fieldName,"20"));
+		bits=tf.bits(reader);
+		assertEquals("Must match 1", 1, bits.cardinality());
+		
+		tf.addTerm(new Term(fieldName,"10"));
+		bits=tf.bits(reader);
+		assertEquals("Must match 2", 2, bits.cardinality());
+		
+		tf.addTerm(new Term(fieldName,"00"));
+		bits=tf.bits(reader);
+		assertEquals("Must match 2", 2, bits.cardinality());
+				
+	}
+}



Mime
View raw message