lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rm...@apache.org
Subject svn commit: r895339 [2/2] - in /lucene/java/trunk: ./ contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ contrib/analyzers/common/src/java/org/apache/lucene/analysis/bg/ contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/ con...
Date Sun, 03 Jan 2010 08:48:18 GMT
Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/WordlistLoader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/WordlistLoader.java?rev=895339&r1=895338&r2=895339&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/WordlistLoader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/WordlistLoader.java Sun Jan  3 08:48:17
2010
@@ -21,15 +21,69 @@
 import java.io.File;
 import java.io.FileReader;
 import java.io.IOException;
+import java.io.InputStreamReader;
 import java.io.Reader;
 import java.util.HashMap;
 import java.util.HashSet;
+import java.util.Set;
 
 /**
  * Loader for text files that represent a list of stopwords.
  */
 public class WordlistLoader {
-
+ 
+  /**
+   * Loads a text file associated with a given class (See
+   * {@link Class#getResourceAsStream(String)}) and adds every line as an entry
+   * to a {@link Set} (omitting leading and trailing whitespace). Every line of
+   * the file should contain only one word. The words need to be in lower-case if
+   * you make use of an Analyzer which uses LowerCaseFilter (like
+   * StandardAnalyzer).
+   * 
+   * @param aClass
+   *          a class that is associated with the given stopwordResource
+   * @param stopwordResource
+   *          name of the resource file associated with the given class
+   * @return a {@link Set} with the file's words
+   */
+  public static Set<String> getWordSet(Class<?> aClass, String stopwordResource)
+      throws IOException {
+    final Reader reader = new BufferedReader(new InputStreamReader(aClass
+        .getResourceAsStream(stopwordResource), "UTF-8"));
+    try {
+      return getWordSet(reader);
+    } finally {
+      reader.close();
+    }
+  }
+  
+  /**
+   * Loads a text file associated with a given class (See
+   * {@link Class#getResourceAsStream(String)}) and adds every line as an entry
+   * to a {@link Set} (omitting leading and trailing whitespace). Every line of
+   * the file should contain only one word. The words need to be in lower-case if
+   * you make use of an Analyzer which uses LowerCaseFilter (like
+   * StandardAnalyzer).
+   * 
+   * @param aClass
+   *          a class that is associated with the given stopwordResource
+   * @param stopwordResource
+   *          name of the resource file associated with the given class
+   * @param comment
+   *          the comment string to ignore
+   * @return a {@link Set} with the file's words
+   */
+  public static Set<String> getWordSet(Class<?> aClass,
+      String stopwordResource, String comment) throws IOException {
+    final Reader reader = new BufferedReader(new InputStreamReader(aClass
+        .getResourceAsStream(stopwordResource), "UTF-8"));
+    try {
+      return getWordSet(reader, comment);
+    } finally {
+      reader.close();
+    }
+  }
+  
   /**
    * Loads a text file and adds every line as an entry to a HashSet (omitting
    * leading and trailing whitespace). Every line of the file should contain only
@@ -40,17 +94,15 @@
    * @return A HashSet with the file's words
    */
   public static HashSet<String> getWordSet(File wordfile) throws IOException {
-    HashSet<String> result = new HashSet<String>();
     FileReader reader = null;
     try {
       reader = new FileReader(wordfile);
-      result = getWordSet(reader);
+      return getWordSet(reader);
     }
     finally {
       if (reader != null)
         reader.close();
     }
-    return result;
   }
 
   /**
@@ -64,17 +116,15 @@
    * @return A HashSet with the file's words
    */
   public static HashSet<String> getWordSet(File wordfile, String comment) throws IOException
{
-    HashSet<String> result = new HashSet<String>();
     FileReader reader = null;
     try {
       reader = new FileReader(wordfile);
-      result = getWordSet(reader, comment);
+      return getWordSet(reader, comment);
     }
     finally {
       if (reader != null)
         reader.close();
     }
-    return result;
   }
 
 
@@ -88,7 +138,7 @@
    * @return A HashSet with the reader's words
    */
   public static HashSet<String> getWordSet(Reader reader) throws IOException {
-    HashSet<String> result = new HashSet<String>();
+    final HashSet<String> result = new HashSet<String>();
     BufferedReader br = null;
     try {
       if (reader instanceof BufferedReader) {
@@ -119,7 +169,7 @@
    * @return A HashSet with the reader's words
    */
   public static HashSet<String> getWordSet(Reader reader, String comment) throws IOException
{
-    HashSet<String> result = new HashSet<String>();
+    final HashSet<String> result = new HashSet<String>();
     BufferedReader br = null;
     try {
       if (reader instanceof BufferedReader) {
@@ -154,21 +204,18 @@
   public static HashMap<String, String> getStemDict(File wordstemfile) throws IOException
{
     if (wordstemfile == null)
       throw new NullPointerException("wordstemfile may not be null");
-    HashMap<String, String> result = new HashMap<String, String>();
+    final HashMap<String, String> result = new HashMap<String,String>();
     BufferedReader br = null;
-    FileReader fr = null;
+    
     try {
-      fr = new FileReader(wordstemfile);
-      br = new BufferedReader(fr);
+      br = new BufferedReader(new FileReader(wordstemfile));
       String line;
       while ((line = br.readLine()) != null) {
         String[] wordstem = line.split("\t", 2);
         result.put(wordstem[0], wordstem[1]);
       }
     } finally {
-      if (fr != null)
-        fr.close();
-      if (br != null)
+      if(br != null)
         br.close();
     }
     return result;

Added: lucene/java/trunk/src/test/org/apache/lucene/index/wordliststopwords.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/wordliststopwords.txt?rev=895339&view=auto
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/wordliststopwords.txt (added)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/wordliststopwords.txt Sun Jan  3 08:48:17
2010
@@ -0,0 +1,5 @@
+#comment
+ONE
+two
+#comment
+three

Propchange: lucene/java/trunk/src/test/org/apache/lucene/index/wordliststopwords.txt
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/src/test/org/apache/lucene/index/wordliststopwords_nocomment.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/wordliststopwords_nocomment.txt?rev=895339&view=auto
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/wordliststopwords_nocomment.txt (added)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/wordliststopwords_nocomment.txt Sun
Jan  3 08:48:17 2010
@@ -0,0 +1,3 @@
+ONE
+two
+three

Propchange: lucene/java/trunk/src/test/org/apache/lucene/index/wordliststopwords_nocomment.txt
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message