lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From romseyg...@apache.org
Subject [3/3] lucene-solr:branch_7x: LUCENE-8373: Move ENGLISH_STOPWORDS_SET to EnglishAnalyzer
Date Fri, 29 Jun 2018 20:06:12 GMT
LUCENE-8373: Move ENGLISH_STOPWORDS_SET to EnglishAnalyzer


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/5d25a106
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/5d25a106
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/5d25a106

Branch: refs/heads/branch_7x
Commit: 5d25a1069a7000f54e1c2745347e8bae06d45ca0
Parents: 0db91d8
Author: Alan Woodward <romseygeek@apache.org>
Authored: Thu Jun 28 12:50:49 2018 +0100
Committer: Alan Woodward <romseygeek@apache.org>
Committed: Fri Jun 29 16:07:45 2018 +0100

----------------------------------------------------------------------
 lucene/CHANGES.txt                              | 13 +++++---
 .../commongrams/CommonGramsFilterFactory.java   |  8 +++--
 .../lucene/analysis/core/StopAnalyzer.java      | 13 +++++---
 .../lucene/analysis/core/StopFilterFactory.java |  7 +++--
 .../lucene/analysis/en/EnglishAnalyzer.java     | 32 ++++++++++++-------
 .../analysis/standard/ClassicAnalyzer.java      |  4 +--
 .../standard/UAX29URLEmailAnalyzer.java         |  4 +--
 .../lucene/analysis/core/TestAnalyzers.java     |  9 ++++--
 .../lucene/analysis/core/TestStopAnalyzer.java  |  5 +--
 .../analysis/core/TestStopFilterFactory.java    |  3 +-
 .../miscellaneous/TestWordDelimiterFilter.java  | 28 ++++++++++++++---
 .../TestWordDelimiterGraphFilter.java           | 33 ++++++++++++++++----
 .../lucene/analysis/th/TestThaiAnalyzer.java    |  4 +--
 .../analysis/standard/StandardAnalyzer.java     | 21 +++++++++----
 .../analyzing/SuggestStopFilterFactory.java     |  8 ++---
 .../analyzing/TestSuggestStopFilterFactory.java |  4 +--
 16 files changed, 135 insertions(+), 61 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5d25a106/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 4fe236b..fe8898b 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -7,13 +7,18 @@ http://s.apache.org/luceneversions
 
 API Changes:
 
- * LUCENE-8356: StandardFilter is deprecated (Alan Woodward)
+* LUCENE-8356: StandardFilter is deprecated (Alan Woodward)
+
+* LUCENE-8373: ENGLISH_STOP_WORD_SET on StandardAnalyzer is deprecated.  Instead
+  use EnglishAnalyzer.ENGLISH_STOP_WORD_SET.  The default constructor for
+  StopAnalyzer is also deprecated, and a stop word set should be explicitly
+  passed to the constructor.  (Alan Woodward)
 
 Bug Fixes:
 
- * LUCENE-8164: IndexWriter silently accepts broken payload. This has been fixed
-   via LUCENE-8165 since we are now checking for offset+length going out of bounds.
-   (Robert Muir, Nhat Nyugen, Simon Willnauer)
+* LUCENE-8164: IndexWriter silently accepts broken payload. This has been fixed
+  via LUCENE-8165 since we are now checking for offset+length going out of bounds.
+  (Robert Muir, Nhat Nyugen, Simon Willnauer)
 
 * LUCENE-8370: Reproducing 
   TestLucene{54,70}DocValuesFormat.testSortedSetVariableLengthBigVsStoredFields()

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5d25a106/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java
b/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java
index 946003f..9712451 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java
@@ -23,8 +23,10 @@ import java.util.Map;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.StopAnalyzer;
-import org.apache.lucene.analysis.util.*;
+import org.apache.lucene.analysis.en.EnglishAnalyzer;
+import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.lucene.analysis.util.ResourceLoaderAware;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
 
 /**
  * Constructs a {@link CommonGramsFilter}.
@@ -63,7 +65,7 @@ public class CommonGramsFilterFactory extends TokenFilterFactory implements
Reso
         commonWords = getWordSet(loader, commonWordFiles, ignoreCase);
       }
     } else {
-      commonWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
+      commonWords = EnglishAnalyzer.ENGLISH_STOP_WORDS_SET;
     }
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5d25a106/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java
b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java
index 7d7f532..d521674 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java
@@ -28,20 +28,23 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.WordlistLoader;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.en.EnglishAnalyzer;
 
 /** 
  * Filters {@link LetterTokenizer} with {@link LowerCaseFilter} and {@link StopFilter}.
  */
 public final class StopAnalyzer extends StopwordAnalyzerBase {
-  
+
   /** An unmodifiable set containing some common English words that are not usually useful
-  for searching.*/
-  public static final CharArraySet ENGLISH_STOP_WORDS_SET = StandardAnalyzer.ENGLISH_STOP_WORDS_SET;
-  
+    for searching.*/
+  @Deprecated
+  public static final CharArraySet ENGLISH_STOP_WORDS_SET = EnglishAnalyzer.ENGLISH_STOP_WORDS_SET;
+
   /** Builds an analyzer which removes words in
    *  {@link #ENGLISH_STOP_WORDS_SET}.
+   * @deprecated Use a constructor with a specific stop word set
    */
+  @Deprecated
   public StopAnalyzer() {
     this(ENGLISH_STOP_WORDS_SET);
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5d25a106/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java
b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java
index b6b8b60..3b1be48 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java
@@ -22,7 +22,8 @@ import java.util.Map;
 
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WordlistLoader; // jdocs
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.en.EnglishAnalyzer;
 import org.apache.lucene.analysis.util.ResourceLoader;
 import org.apache.lucene.analysis.util.ResourceLoaderAware;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
@@ -45,7 +46,7 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
  * <ul>
  *  <li><code>ignoreCase</code> defaults to <code>false</code></li>
  *  <li><code>words</code> should be the name of a stopwords file to parse,
if not 
- *      specified the factory will use {@link StopAnalyzer#ENGLISH_STOP_WORDS_SET}
+ *      specified the factory will use {@link EnglishAnalyzer#ENGLISH_STOP_WORDS_SET}
  *  </li>
  *  <li><code>format</code> defines how the <code>words</code>
file will be parsed, 
  *      and defaults to <code>wordset</code>.  If <code>words</code>
is not specified, 
@@ -103,7 +104,7 @@ public class StopFilterFactory extends TokenFilterFactory implements ResourceLoa
       if (null != format) {
         throw new IllegalArgumentException("'format' can not be specified w/o an explicit
'words' file: " + format);
       }
-      stopWords = new CharArraySet(StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase);
+      stopWords = new CharArraySet(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5d25a106/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
index 4d9af26..06e238e 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
@@ -18,6 +18,8 @@ package org.apache.lucene.analysis.en;
 
 
 import java.io.Reader;
+import java.util.Arrays;
+import java.util.List;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.CharArraySet;
@@ -27,13 +29,29 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 
 /**
  * {@link Analyzer} for English.
  */
 public final class EnglishAnalyzer extends StopwordAnalyzerBase {
+
+  /** An unmodifiable set containing some common English words that are not usually useful
+   for searching.*/
+  public static final CharArraySet ENGLISH_STOP_WORDS_SET;
+
+  static {
+    final List<String> stopWords = Arrays.asList(
+        "a", "an", "and", "are", "as", "at", "be", "but", "by",
+        "for", "if", "in", "into", "is", "it",
+        "no", "not", "of", "on", "or", "such",
+        "that", "the", "their", "then", "there", "these",
+        "they", "this", "to", "was", "will", "with"
+    );
+    final CharArraySet stopSet = new CharArraySet(stopWords, false);
+    ENGLISH_STOP_WORDS_SET = CharArraySet.unmodifiableSet(stopSet);
+  }
+
   private final CharArraySet stemExclusionSet;
    
   /**
@@ -41,22 +59,14 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase {
    * @return default stop words set.
    */
   public static CharArraySet getDefaultStopSet(){
-    return DefaultSetHolder.DEFAULT_STOP_SET;
-  }
-  
-  /**
-   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
-   * accesses the static final set the first time.;
-   */
-  private static class DefaultSetHolder {
-    static final CharArraySet DEFAULT_STOP_SET = StandardAnalyzer.STOP_WORDS_SET;
+    return ENGLISH_STOP_WORDS_SET;
   }
 
   /**
    * Builds an analyzer with the default stop words: {@link #getDefaultStopSet}.
    */
   public EnglishAnalyzer() {
-    this(DefaultSetHolder.DEFAULT_STOP_SET);
+    this(ENGLISH_STOP_WORDS_SET);
   }
   
   /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5d25a106/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
index ef2ef7e..d65f08b 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
@@ -26,7 +26,7 @@ import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.WordlistLoader;
-import org.apache.lucene.analysis.core.StopAnalyzer;
+import org.apache.lucene.analysis.en.EnglishAnalyzer;
 
 /**
  * Filters {@link ClassicTokenizer} with {@link ClassicFilter}, {@link
@@ -46,7 +46,7 @@ public final class ClassicAnalyzer extends StopwordAnalyzerBase {
 
   /** An unmodifiable set containing some common English words that are usually not
   useful for searching. */
-  public static final CharArraySet STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;

+  public static final CharArraySet STOP_WORDS_SET = EnglishAnalyzer.ENGLISH_STOP_WORDS_SET;
 
   /** Builds an analyzer with the given stop words.
    * @param stopWords stop words */

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5d25a106/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java
b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java
index 51ec7f0..be52370 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java
@@ -25,7 +25,7 @@ import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.StopAnalyzer;
+import org.apache.lucene.analysis.en.EnglishAnalyzer;
 
 /**
  * Filters {@link org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer}
@@ -42,7 +42,7 @@ public final class UAX29URLEmailAnalyzer extends StopwordAnalyzerBase {
 
   /** An unmodifiable set containing some common English words that are usually not
   useful for searching. */
-  public static final CharArraySet STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
+  public static final CharArraySet STOP_WORDS_SET = EnglishAnalyzer.ENGLISH_STOP_WORDS_SET;
 
   /** Builds an analyzer with the given stop words.
    * @param stopWords stop words */

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5d25a106/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
index c9ae2e6..b7fc18b 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
@@ -26,6 +26,7 @@ import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.en.EnglishAnalyzer;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
@@ -79,7 +80,7 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
   }
 
   public void testStop() throws Exception {
-    Analyzer a = new StopAnalyzer();
+    Analyzer a = new StopAnalyzer(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
     assertAnalyzesTo(a, "foo bar FOO BAR", 
                      new String[] { "foo", "bar", "foo", "bar" });
     assertAnalyzesTo(a, "foo a bar such FOO THESE BAR", 
@@ -234,7 +235,8 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
   
   /** blast some random strings through the analyzer */
   public void testRandomStrings() throws Exception {
-    Analyzer analyzers[] = new Analyzer[] { new WhitespaceAnalyzer(), new SimpleAnalyzer(),
new StopAnalyzer(), new UnicodeWhitespaceAnalyzer() };
+    Analyzer analyzers[] = new Analyzer[] { new WhitespaceAnalyzer(), new SimpleAnalyzer(),
+        new StopAnalyzer(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET), new UnicodeWhitespaceAnalyzer()
};
     for (Analyzer analyzer : analyzers) {
       checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
     }
@@ -243,7 +245,8 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
   
   /** blast some random large strings through the analyzer */
   public void testRandomHugeStrings() throws Exception {
-    Analyzer analyzers[] = new Analyzer[] { new WhitespaceAnalyzer(), new SimpleAnalyzer(),
new StopAnalyzer(), new UnicodeWhitespaceAnalyzer() };
+    Analyzer analyzers[] = new Analyzer[] { new WhitespaceAnalyzer(), new SimpleAnalyzer(),
+        new StopAnalyzer(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET), new UnicodeWhitespaceAnalyzer()
};
     for (Analyzer analyzer : analyzers) {
       checkRandomData(random(), analyzer, 100*RANDOM_MULTIPLIER, 8192);
     }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5d25a106/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java
b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java
index bbf9502..aa20c46 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java
@@ -25,6 +25,7 @@ import java.util.Set;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.en.EnglishAnalyzer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 
@@ -37,11 +38,11 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase {
   public void setUp() throws Exception {
     super.setUp();
     
-    Iterator<?> it = StopAnalyzer.ENGLISH_STOP_WORDS_SET.iterator();
+    Iterator<?> it = EnglishAnalyzer.ENGLISH_STOP_WORDS_SET.iterator();
     while(it.hasNext()) {
       inValidTokens.add(it.next());
     }
-    stop = new StopAnalyzer();
+    stop = new StopAnalyzer(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
   }
   
   @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5d25a106/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilterFactory.java
b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilterFactory.java
index f2d6fe3..0f24697 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilterFactory.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilterFactory.java
@@ -18,6 +18,7 @@ package org.apache.lucene.analysis.core;
 
 
 import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.en.EnglishAnalyzer;
 import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
 import org.apache.lucene.analysis.util.ClasspathResourceLoader;
 import org.apache.lucene.analysis.util.ResourceLoader;
@@ -60,7 +61,7 @@ public class TestStopFilterFactory extends BaseTokenStreamFactoryTestCase
{
 
     // defaults
     factory = (StopFilterFactory) tokenFilterFactory("Stop");
-    assertEquals(StopAnalyzer.ENGLISH_STOP_WORDS_SET, factory.getStopWords());
+    assertEquals(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET, factory.getStopWords());
     assertEquals(false, factory.isIgnoreCase());
   }
   

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5d25a106/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java
b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java
index f945cd6..790e758 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java
@@ -17,18 +17,36 @@
 package org.apache.lucene.analysis.miscellaneous;
 
 import java.io.IOException;
-import java.util.*;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Random;
 
-import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CannedTokenStream;
 import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.en.EnglishAnalyzer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.util.IOUtils;
 
-import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.*;
+import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.CATENATE_ALL;
+import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.CATENATE_NUMBERS;
+import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.CATENATE_WORDS;
+import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.GENERATE_NUMBER_PARTS;
+import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.GENERATE_WORD_PARTS;
+import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.IGNORE_KEYWORDS;
+import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.PRESERVE_ORIGINAL;
+import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.SPLIT_ON_CASE_CHANGE;
+import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.SPLIT_ON_NUMERICS;
+import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE;
 import static org.apache.lucene.analysis.miscellaneous.WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE;
 
 /**
@@ -287,7 +305,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase {
       @Override
       public TokenStreamComponents createComponents(String field) {
         Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
-        StopFilter filter = new StopFilter(tokenizer, StandardAnalyzer.STOP_WORDS_SET);
+        StopFilter filter = new StopFilter(tokenizer, EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
         return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(filter, flags,
protWords));
       }
     };

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5d25a106/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterGraphFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterGraphFilter.java
b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterGraphFilter.java
index 61ae6c0..b5dc8a3 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterGraphFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterGraphFilter.java
@@ -17,19 +17,40 @@
 package org.apache.lucene.analysis.miscellaneous;
 
 import java.io.IOException;
-import java.util.*;
-
-import org.apache.lucene.analysis.*;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CannedTokenStream;
 import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.en.EnglishAnalyzer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.TestUtil;
 
-import static org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter.*;
+import static org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter.CATENATE_ALL;
+import static org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter.CATENATE_NUMBERS;
+import static org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter.CATENATE_WORDS;
+import static org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter.GENERATE_NUMBER_PARTS;
+import static org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter.GENERATE_WORD_PARTS;
+import static org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter.IGNORE_KEYWORDS;
+import static org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter.PRESERVE_ORIGINAL;
+import static org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter.SPLIT_ON_CASE_CHANGE;
+import static org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter.SPLIT_ON_NUMERICS;
+import static org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter.STEM_ENGLISH_POSSESSIVE;
 import static org.apache.lucene.analysis.miscellaneous.WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE;
 
 /**
@@ -282,7 +303,7 @@ public class TestWordDelimiterGraphFilter extends BaseTokenStreamTestCase
{
       @Override
       public TokenStreamComponents createComponents(String field) {
         Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
-        StopFilter filter = new StopFilter(tokenizer, StandardAnalyzer.STOP_WORDS_SET);
+        StopFilter filter = new StopFilter(tokenizer, EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
         return new TokenStreamComponents(tokenizer, new WordDelimiterGraphFilter(filter,
flags, protWords));
       }
     };

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5d25a106/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
b/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
index 6eeb9af..233ea3e 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
@@ -21,7 +21,7 @@ import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.StopAnalyzer;
+import org.apache.lucene.analysis.en.EnglishAnalyzer;
 import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
 
 /**
@@ -63,7 +63,7 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
    */
   // note this test uses stopfilter's stopset
   public void testPositionIncrements() throws Exception {
-    final ThaiAnalyzer analyzer = new ThaiAnalyzer(StopAnalyzer.ENGLISH_STOP_WORDS_SET);
+    final ThaiAnalyzer analyzer = new ThaiAnalyzer(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
     assertAnalyzesTo(analyzer, "การที่ได้ต้อง the แสดงว่างานดี",

         new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง",
"ว่า", "งาน", "ดี" },
         new int[] { 0, 3, 6, 9, 18, 22, 25, 28 },

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5d25a106/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
b/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
index 7e24257..6abdeb4 100644
--- a/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
+++ b/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
@@ -35,10 +35,14 @@ import org.apache.lucene.analysis.WordlistLoader;
  */
 public final class StandardAnalyzer extends StopwordAnalyzerBase {
 
-  /** An unmodifiable set containing some common English words that are not usually useful
-  for searching.*/
+  /**
+   * An unmodifiable set containing some common English words that are not
+   * usually useful for searching.
+   * @deprecated Use the stop words on EnglishAnalyzer in the analysis-common module
+   */
+  @Deprecated
   public static final CharArraySet ENGLISH_STOP_WORDS_SET;
-  
+
   static {
     final List<String> stopWords = Arrays.asList(
       "a", "an", "and", "are", "as", "at", "be", "but", "by",
@@ -48,16 +52,21 @@ public final class StandardAnalyzer extends StopwordAnalyzerBase {
       "they", "this", "to", "was", "will", "with"
     );
     final CharArraySet stopSet = new CharArraySet(stopWords, false);
-    ENGLISH_STOP_WORDS_SET = CharArraySet.unmodifiableSet(stopSet); 
+    ENGLISH_STOP_WORDS_SET = CharArraySet.unmodifiableSet(stopSet);
   }
+
   
   /** Default maximum allowed token length */
   public static final int DEFAULT_MAX_TOKEN_LENGTH = 255;
 
   private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
 
-  /** An unmodifiable set containing some common English words that are usually not
-  useful for searching. */
+  /**
+   * An unmodifiable set containing some common English words that are usually not
+   * useful for searching.
+   * @deprecated Use the stop words on EnglishAnalyzer in the analysis-common module
+   */
+  @Deprecated
   public static final CharArraySet STOP_WORDS_SET = ENGLISH_STOP_WORDS_SET;
 
   /** Builds an analyzer with the given stop words.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5d25a106/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/SuggestStopFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/SuggestStopFilterFactory.java
b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/SuggestStopFilterFactory.java
index 10765a5..2ea52f6 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/SuggestStopFilterFactory.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/SuggestStopFilterFactory.java
@@ -21,8 +21,8 @@ import java.util.Map;
 
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WordlistLoader; // jdocs
-import org.apache.lucene.analysis.core.StopAnalyzer;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.en.EnglishAnalyzer;
 import org.apache.lucene.analysis.util.ResourceLoader;
 import org.apache.lucene.analysis.util.ResourceLoaderAware;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
@@ -47,7 +47,7 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
  * <ul>
  *  <li><code>ignoreCase</code> defaults to <code>false</code></li>
  *  <li><code>words</code> should be the name of a stopwords file to parse,
if not 
- *      specified the factory will use {@link StopAnalyzer#ENGLISH_STOP_WORDS_SET}
+ *      specified the factory will use {@link EnglishAnalyzer#ENGLISH_STOP_WORDS_SET}
  *  </li>
  *  <li><code>format</code> defines how the <code>words</code>
file will be parsed, 
  *      and defaults to <code>wordset</code>.  If <code>words</code>
is not specified, 
@@ -108,7 +108,7 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
       if (null != format) {
         throw new IllegalArgumentException("'format' can not be specified w/o an explicit
'words' file: " + format);
       }
-      stopWords = new CharArraySet(StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase);
+      stopWords = new CharArraySet(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5d25a106/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestSuggestStopFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestSuggestStopFilterFactory.java
b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestSuggestStopFilterFactory.java
index 69947e4..28cf7f6 100644
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestSuggestStopFilterFactory.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestSuggestStopFilterFactory.java
@@ -22,7 +22,7 @@ import java.util.Map;
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.CharArraySet;
-import org.apache.lucene.analysis.core.StopAnalyzer;
+import org.apache.lucene.analysis.en.EnglishAnalyzer;
 import org.apache.lucene.analysis.util.ClasspathResourceLoader;
 import org.apache.lucene.analysis.util.ResourceLoader;
 import org.apache.lucene.util.Version;
@@ -63,7 +63,7 @@ public class TestSuggestStopFilterFactory extends BaseTokenStreamTestCase
{
 
     // defaults
     factory = createFactory();
-    assertEquals(StopAnalyzer.ENGLISH_STOP_WORDS_SET, factory.getStopWords());
+    assertEquals(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET, factory.getStopWords());
     assertEquals(false, factory.isIgnoreCase());
   }
 


Mime
View raw message