Mailing-List: contact lucene-dev-help@jakarta.apache.org; run by ezmlm
Precedence: bulk
Reply-To: "Lucene Developers List" <lucene-dev@jakarta.apache.org>
Message-ID: <20040202124438.98676.qmail@web12701.mail.yahoo.com>
Date: Mon, 2 Feb 2004 04:44:38 -0800 (PST)
From: Otis Gospodnetic <otis_gospodnetic@yahoo.com>
Subject: Re: [PATCH] TestStopAnalyzer
To: Lucene Developers List <lucene-dev@jakarta.apache.org>
In-Reply-To: <s01a8456.014@gwia201.syr.edu>
MIME-Version: 1.0
Content-Type: text/plain; charset=us-ascii

Thanks, the patch applies, but I have not committed it yet.
What about things like these:

-  public void testPhraseQuery() throws Exception {
-    RAMDirectory directory = new RAMDirectory();
-    IndexWriter writer = new IndexWriter(directory, stopAnalyzer,
true);
-    Document doc = new Document();
-    doc.add(Field.Text("field", "the stop words are here"));
-    writer.addDocument(doc);
-    writer.close();
-
-    IndexSearcher searcher = new IndexSearcher(directory);
-
-    // valid exact phrase query
-    PhraseQuery query = new PhraseQuery();
-    query.add(new Term("field","stop"));
-    query.add(new Term("field","words"));
-    Hits hits = searcher.search(query);
-    assertEquals(1, hits.length());
-
-    // currently StopAnalyzer does not leave "holes", so this matches.
-    query = new PhraseQuery();
-    query.add(new Term("field", "words"));
-    query.add(new Term("field", "here"));
-    hits = searcher.search(query);
-    assertEquals(1, hits.length());
...
...

By applying your patch, aren't we losing some important tests here?

Thanks,
Otis

--- Grant Ingersoll <gsingers@syr.edu> wrote:
> Hi,
> 
> Just noticed in the TestStopAnalyzer unit test that there were a lot
> of dependencies on other pieces of code just to test whether the
> StopAnalyzer actually properly removed stop words.  Not a big change,
> but here is the diff produced by cvs diff -u TestStopAnalyzer
> 
> Cheers,
> Grant Ingersoll
> 
> 
> 
> Index: TestStopAnalyzer.java
> ===================================================================
> RCS file:
>
/home/cvspublic/jakarta-lucene/src/test/org/apache/lucene/analysis/TestStopAnalyzer.java,v
> retrieving revision 1.2
> diff -u -r1.2 TestStopAnalyzer.java
> --- TestStopAnalyzer.java	8 Dec 2003 16:16:32 -0000	1.2
> +++ TestStopAnalyzer.java	30 Jan 2004 21:17:50 -0000
> @@ -1,60 +1,125 @@
>  package org.apache.lucene.analysis;
>  
> +/*
> ====================================================================
> + * The Apache Software License, Version 1.1
> + *
> + * Copyright (c) 2001 The Apache Software Foundation.  All rights
> + * reserved.
> + *
> + * Redistribution and use in source and binary forms, with or
> without
> + * modification, are permitted provided that the following
> conditions
> + * are met:
> + *
> + * 1. Redistributions of source code must retain the above copyright
> + *    notice, this list of conditions and the following disclaimer.
> + *
> + * 2. Redistributions in binary form must reproduce the above
> copyright
> + *    notice, this list of conditions and the following disclaimer
> in
> + *    the documentation and/or other materials provided with the
> + *    distribution.
> + *
> + * 3. The end-user documentation included with the redistribution,
> + *    if any, must include the following acknowledgment:
> + *       "This product includes software developed by the
> + *        Apache Software Foundation (http://www.apache.org/)."
> + *    Alternately, this acknowledgment may appear in the software
> itself,
> + *    if and wherever such third-party acknowledgments normally
> appear.
> + *
> + * 4. The names "Apache" and "Apache Software Foundation" and
> + *    "Apache Lucene" must not be used to endorse or promote
> products
> + *    derived from this software without prior written permission.
> For
> + *    written permission, please contact apache@apache.org.
> + *
> + * 5. Products derived from this software may not be called
> "Apache",
> + *    "Apache Lucene", nor may "Apache" appear in their name,
> without
> + *    prior written permission of the Apache Software Foundation.
> + *
> + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
> + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
> + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
> + * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
> + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
> + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
> AND
> + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
> LIABILITY,
> + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
> OUT
> + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
> OF
> + * SUCH DAMAGE.
> + *
> ====================================================================
> + *
> + * This software consists of voluntary contributions made by many
> + * individuals on behalf of the Apache Software Foundation.  For
> more
> + * information on the Apache Software Foundation, please see
> + * <http://www.apache.org/>.
> + */
> +
> +
> +
>  import junit.framework.TestCase;
> +
>  import java.io.StringReader;
> -import java.util.ArrayList;
> -import org.apache.lucene.index.IndexWriter;
> -import org.apache.lucene.index.Term;
> -import org.apache.lucene.store.RAMDirectory;
> -import org.apache.lucene.document.Document;
> -import org.apache.lucene.document.Field;
> -import org.apache.lucene.search.IndexSearcher;
> -import org.apache.lucene.search.PhraseQuery;
> -import org.apache.lucene.search.Hits;
> +import java.io.IOException;
> +import java.util.Set;
> +import java.util.HashSet;
>  
> -public class TestStopAnalyzer extends TestCase {
> -  private StopAnalyzer stopAnalyzer = new StopAnalyzer();
> +//import org.cnlp.utils.properties.ResourceBundleHelper;
>  
> -  public Token[] tokensFromAnalyzer(Analyzer analyzer, String text)
> -                                                  throws Exception {
> -    TokenStream stream =
> -      analyzer.tokenStream("contents", new StringReader(text));
> -    ArrayList tokenList = new ArrayList();
> -    while (true) {
> -      Token token = stream.next();
> -      if (token == null) break;
> +public class TestStopAnalyzer extends TestCase {
> +  private StopAnalyzer stop = new StopAnalyzer();
> +  
> +  private Set inValidTokens = new HashSet();
> +  public TestStopAnalyzer(String s) {
> +    super(s);
> +  }
>  
> -      tokenList.add(token);
> +  protected void setUp() {
> +    for (int i = 0; i < StopAnalyzer.ENGLISH_STOP_WORDS.length; i++)
> {
> +      inValidTokens.add(StopAnalyzer.ENGLISH_STOP_WORDS[i]);
> +      
>      }
> -
> -    return (Token[]) tokenList.toArray(new Token[0]);
>    }
>  
> +  protected void tearDown() {
> +  }
>  
> -  public void testPhraseQuery() throws Exception {
> -    RAMDirectory directory = new RAMDirectory();
> -    IndexWriter writer = new IndexWriter(directory, stopAnalyzer,
> true);
> -    Document doc = new Document();
> -    doc.add(Field.Text("field", "the stop words are here"));
> -    writer.addDocument(doc);
> -    writer.close();
> -
> -    IndexSearcher searcher = new IndexSearcher(directory);
> -
> -    // valid exact phrase query
> -    PhraseQuery query = new PhraseQuery();
> -    query.add(new Term("field","stop"));
> -    query.add(new Term("field","words"));
> -    Hits hits = searcher.search(query);
> -    assertEquals(1, hits.length());
> -
> -    // currently StopAnalyzer does not leave "holes", so this
> matches.
> -    query = new PhraseQuery();
> -    query.add(new Term("field", "words"));
> -    query.add(new Term("field", "here"));
> -    hits = searcher.search(query);
> -    assertEquals(1, hits.length());
> -
> -    searcher.close();
> +  public void testDefaults() {
> +    
> +    assertTrue(stop != null);
> +    StringReader reader = new StringReader("This is a test of the
> english stop analyzer");
> +    TokenStream stream = stop.tokenStream("test", reader);
> +    assertTrue(stream != null);
> +    Token token = null;
> +    try {
> +      while ((token = stream.next()) != null)
> +      {
> +        assertTrue(inValidTokens.contains(token.termText()) ==
> false);
> +      }
> +    } catch (IOException e) {
> +      assertTrue(false);
> +    }
>    }
> -}
> +  
> +  public void testStopList() {
> +    Set stopWordsSet = new HashSet();
> +    stopWordsSet.add("good");
> +    stopWordsSet.add("test");
> +    stopWordsSet.add("analyzer");    
> +    StopAnalyzer newStop = new
> StopAnalyzer((String[])stopWordsSet.toArray(new String[3]));
> +    StringReader reader = new StringReader("This is a good test of
> the english stop analyzer");
> +    TokenStream stream = newStop.tokenStream("test", reader);
> +    assertTrue(stream != null);
> +    Token token = null;
> +    try {
> +      while ((token = stream.next()) != null)
> +      {
> +        String text = token.termText();
> +        assertTrue(stopWordsSet.contains(text) == false);
> +      }
> +    } catch (IOException e) {
> +      assertTrue(false);
> +    }
> +    
> +  }    
> +  
> +}
> \ No newline at end of file
> 
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
> For additional commands, e-mail: lucene-dev-help@jakarta.apache.org
> 


---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org