Return-Path: Delivered-To: apmail-lucene-java-commits-archive@www.apache.org Received: (qmail 87028 invoked from network); 22 Jun 2005 02:31:37 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (209.237.227.199) by minotaur.apache.org with SMTP; 22 Jun 2005 02:31:37 -0000 Received: (qmail 94921 invoked by uid 500); 22 Jun 2005 02:31:37 -0000 Delivered-To: apmail-lucene-java-commits-archive@lucene.apache.org Received: (qmail 94902 invoked by uid 500); 22 Jun 2005 02:31:36 -0000 Mailing-List: contact java-commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: java-dev@lucene.apache.org Delivered-To: mailing list java-commits@lucene.apache.org Received: (qmail 94889 invoked by uid 99); 22 Jun 2005 02:31:36 -0000 Received: from asf.osuosl.org (HELO asf.osuosl.org) (140.211.166.49) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 21 Jun 2005 19:31:36 -0700 X-ASF-Spam-Status: No, hits=0.2 required=10.0 tests=NO_REAL_NAME X-Spam-Check-By: apache.org Received: from [209.237.227.194] (HELO minotaur.apache.org) (209.237.227.194) by apache.org (qpsmtpd/0.29) with SMTP; Tue, 21 Jun 2005 19:31:37 -0700 Received: (qmail 87011 invoked by uid 65534); 22 Jun 2005 02:31:35 -0000 Message-ID: <20050622023135.87009.qmail@minotaur.apache.org> Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r191754 - in /lucene/java/trunk/src/java/org/apache/lucene/analysis: WordlistLoader.java standard/StandardAnalyzer.java Date: Wed, 22 Jun 2005 02:31:34 -0000 To: java-commits@lucene.apache.org From: otis@apache.org X-Mailer: svnmailer-1.0.2 X-Virus-Checked: Checked by ClamAV on apache.org X-Spam-Rating: minotaur.apache.org 1.6.2 0/1000/N Author: otis Date: Tue Jun 21 19:31:33 2005 New Revision: 191754 URL: http://svn.apache.org/viewcvs?rev=191754&view=rev Log: - Applied patch from src/java/org/apache/lucene/analysis/WordlistLoader.java (reading word list from a Reader) Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/WordlistLoader.java lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/WordlistLoader.java URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/analysis/WordlistLoader.java?rev=191754&r1=191753&r2=191754&view=diff ============================================================================== --- lucene/java/trunk/src/java/org/apache/lucene/analysis/WordlistLoader.java (original) +++ lucene/java/trunk/src/java/org/apache/lucene/analysis/WordlistLoader.java Tue Jun 21 19:31:33 2005 @@ -19,7 +19,8 @@ import java.io.File; import java.io.FileReader; import java.io.IOException; -import java.io.LineNumberReader; +import java.io.Reader; +import java.io.BufferedReader; import java.util.HashSet; import java.util.Hashtable; import java.util.Iterator; @@ -34,30 +35,44 @@ /** * Loads a text file and adds every line as an entry to a HashSet (omitting - * leading and trailing whitespace). Every line of the file should contain only + * leading and trailing whitespace). Every line of the file should contain only * one word. The words need to be in lowercase if you make use of an * Analyzer which uses LowerCaseFilter (like GermanAnalyzer). - * + * * @param wordfile File containing the wordlist * @return A HashSet with the file's words */ public static HashSet getWordSet(File wordfile) throws IOException { HashSet result = new HashSet(); - FileReader freader = null; - LineNumberReader lnr = null; + FileReader reader = null; try { - freader = new FileReader(wordfile); - lnr = new LineNumberReader(freader); + reader = new FileReader(wordfile); + result = getWordSet(reader); + } + finally { + if (reader != null) + reader.close(); + } + return result; + } + + public static HashSet getWordSet(Reader reader) throws IOException { + HashSet result = new HashSet(); + BufferedReader br = null; + try { + if (reader instanceof BufferedReader) { + br = (BufferedReader) reader; + } else { + br = new BufferedReader(reader); + } String word = null; - while ((word = lnr.readLine()) != null) { + while ((word = br.readLine()) != null) { result.add(word.trim()); } } finally { - if (lnr != null) - lnr.close(); - if (freader != null) - freader.close(); + if (br != null) + br.close(); } return result; } @@ -65,7 +80,7 @@ /** * @param path Path to the wordlist * @param wordfile Name of the wordlist - * + * * @deprecated Use {@link #getWordSet(File)} instead */ public static Hashtable getWordtable(String path, String wordfile) throws IOException { @@ -74,7 +89,7 @@ /** * @param wordfile Complete path to the wordlist - * + * * @deprecated Use {@link #getWordSet(File)} instead */ public static Hashtable getWordtable(String wordfile) throws IOException { Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java?rev=191754&r1=191753&r2=191754&view=diff ============================================================================== --- lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java (original) +++ lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java Tue Jun 21 19:31:33 2005 @@ -51,6 +51,10 @@ stopSet = WordlistLoader.getWordSet(stopwords); } + public StandardAnalyzer(Reader stopwords) throws IOException { + stopSet = WordlistLoader.getWordSet(stopwords); + } + /** Constructs a {@link StandardTokenizer} filtered by a {@link StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. */ public TokenStream tokenStream(String fieldName, Reader reader) {