lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Grant Ingersoll" <gsing...@syr.edu>
Subject [PATCH] TestStopAnalyzer
Date Fri, 30 Jan 2004 21:20:11 GMT
Hi,

Just noticed in the TestStopAnalyzer unit test that there were a lot of dependencies on other
pieces of code just to test whether the StopAnalyzer actually properly removed stop words.
 Not a big change, but here is the diff produced by cvs diff -u TestStopAnalyzer

Cheers,
Grant Ingersoll



Index: TestStopAnalyzer.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/test/org/apache/lucene/analysis/TestStopAnalyzer.java,v
retrieving revision 1.2
diff -u -r1.2 TestStopAnalyzer.java
--- TestStopAnalyzer.java	8 Dec 2003 16:16:32 -0000	1.2
+++ TestStopAnalyzer.java	30 Jan 2004 21:17:50 -0000
@@ -1,60 +1,125 @@
 package org.apache.lucene.analysis;
 
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+
+
 import junit.framework.TestCase;
+
 import java.io.StringReader;
-import java.util.ArrayList;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.store.RAMDirectory;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.PhraseQuery;
-import org.apache.lucene.search.Hits;
+import java.io.IOException;
+import java.util.Set;
+import java.util.HashSet;
 
-public class TestStopAnalyzer extends TestCase {
-  private StopAnalyzer stopAnalyzer = new StopAnalyzer();
+//import org.cnlp.utils.properties.ResourceBundleHelper;
 
-  public Token[] tokensFromAnalyzer(Analyzer analyzer, String text)
-                                                  throws Exception {
-    TokenStream stream =
-      analyzer.tokenStream("contents", new StringReader(text));
-    ArrayList tokenList = new ArrayList();
-    while (true) {
-      Token token = stream.next();
-      if (token == null) break;
+public class TestStopAnalyzer extends TestCase {
+  private StopAnalyzer stop = new StopAnalyzer();
+  
+  private Set inValidTokens = new HashSet();
+  public TestStopAnalyzer(String s) {
+    super(s);
+  }
 
-      tokenList.add(token);
+  protected void setUp() {
+    for (int i = 0; i < StopAnalyzer.ENGLISH_STOP_WORDS.length; i++) {
+      inValidTokens.add(StopAnalyzer.ENGLISH_STOP_WORDS[i]);
+      
     }
-
-    return (Token[]) tokenList.toArray(new Token[0]);
   }
 
+  protected void tearDown() {
+  }
 
-  public void testPhraseQuery() throws Exception {
-    RAMDirectory directory = new RAMDirectory();
-    IndexWriter writer = new IndexWriter(directory, stopAnalyzer, true);
-    Document doc = new Document();
-    doc.add(Field.Text("field", "the stop words are here"));
-    writer.addDocument(doc);
-    writer.close();
-
-    IndexSearcher searcher = new IndexSearcher(directory);
-
-    // valid exact phrase query
-    PhraseQuery query = new PhraseQuery();
-    query.add(new Term("field","stop"));
-    query.add(new Term("field","words"));
-    Hits hits = searcher.search(query);
-    assertEquals(1, hits.length());
-
-    // currently StopAnalyzer does not leave "holes", so this matches.
-    query = new PhraseQuery();
-    query.add(new Term("field", "words"));
-    query.add(new Term("field", "here"));
-    hits = searcher.search(query);
-    assertEquals(1, hits.length());
-
-    searcher.close();
+  public void testDefaults() {
+    
+    assertTrue(stop != null);
+    StringReader reader = new StringReader("This is a test of the english stop analyzer");
+    TokenStream stream = stop.tokenStream("test", reader);
+    assertTrue(stream != null);
+    Token token = null;
+    try {
+      while ((token = stream.next()) != null)
+      {
+        assertTrue(inValidTokens.contains(token.termText()) == false);
+      }
+    } catch (IOException e) {
+      assertTrue(false);
+    }
   }
-}
+  
+  public void testStopList() {
+    Set stopWordsSet = new HashSet();
+    stopWordsSet.add("good");
+    stopWordsSet.add("test");
+    stopWordsSet.add("analyzer");    
+    StopAnalyzer newStop = new StopAnalyzer((String[])stopWordsSet.toArray(new String[3]));
+    StringReader reader = new StringReader("This is a good test of the english stop analyzer");
+    TokenStream stream = newStop.tokenStream("test", reader);
+    assertTrue(stream != null);
+    Token token = null;
+    try {
+      while ((token = stream.next()) != null)
+      {
+        String text = token.termText();
+        assertTrue(stopWordsSet.contains(text) == false);
+      }
+    } catch (IOException e) {
+      assertTrue(false);
+    }
+    
+  }    
+  
+}
\ No newline at end of file

---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org


Mime
View raw message