lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From romseyg...@apache.org
Subject lucene-solr:master: LUCENE-7444: StandardAnalyzer not longer uses english stopwords by default
Date Wed, 13 Jun 2018 11:08:24 GMT
Repository: lucene-solr
Updated Branches:
  refs/heads/master 7eb74ac50 -> 5ae716c41


LUCENE-7444: StandardAnalyzer not longer uses english stopwords by default


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/5ae716c4
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/5ae716c4
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/5ae716c4

Branch: refs/heads/master
Commit: 5ae716c412d705570b2dafd423755eb58142212e
Parents: 7eb74ac
Author: Alan Woodward <romseygeek@apache.org>
Authored: Wed Jun 13 11:38:16 2018 +0100
Committer: Alan Woodward <romseygeek@apache.org>
Committed: Wed Jun 13 11:38:16 2018 +0100

----------------------------------------------------------------------
 lucene/CHANGES.txt                                           | 3 +++
 lucene/MIGRATE.txt                                           | 5 +++++
 .../apache/lucene/analysis/standard/StandardAnalyzer.java    | 8 ++++----
 3 files changed, 12 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5ae716c4/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 83177a8..51c461f 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -73,6 +73,9 @@ Changes in Runtime Behavior
 * LUCENE-8031: Length normalization correctly reflects omission of term frequencies.
   (Robert Muir, Adrien Grand)
 
+* LUCENE-7444: StandardAnalyzer no longer defaults to removing English stopwords
+  (Alan Woodward)
+
 Improvements
 
 * LUCENE-7997: Add BaseSimilarityTestCase to sanity check similarities.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5ae716c4/lucene/MIGRATE.txt
----------------------------------------------------------------------
diff --git a/lucene/MIGRATE.txt b/lucene/MIGRATE.txt
index fb7ee08..ae29357 100644
--- a/lucene/MIGRATE.txt
+++ b/lucene/MIGRATE.txt
@@ -52,3 +52,8 @@ Memory codecs have been removed from the codebase (MemoryPostings, MemoryDocValu
 
 Caching everything is discouraged as it disables the ability to skip non-interesting documents.
 ALWAYS_CACHE can be replaced by a UsageTrackingQueryCachingPolicy with an appropriate config.
+
+## English stopwords are no longer removed by default in StandardAnalyzer (LUCENE_7444) ##
+
+To retain the old behaviour, pass StandardAnalyzer.ENGLISH_STOP_WORDS_SET as an argument
+to the constructor
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5ae716c4/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
b/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
index 8afffd8..5048c31 100644
--- a/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
+++ b/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
@@ -31,8 +31,8 @@ import org.apache.lucene.analysis.WordlistLoader;
 
 /**
  * Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link
- * LowerCaseFilter} and {@link StopFilter}, using a list of
- * English stop words.
+ * LowerCaseFilter} and {@link StopFilter}, using a configurable list of
+ * stop words.
  */
 public final class StandardAnalyzer extends StopwordAnalyzerBase {
 
@@ -67,10 +67,10 @@ public final class StandardAnalyzer extends StopwordAnalyzerBase {
     super(stopWords);
   }
 
-  /** Builds an analyzer with the default stop words ({@link #STOP_WORDS_SET}).
+  /** Builds an analyzer with no stop words.
    */
   public StandardAnalyzer() {
-    this(STOP_WORDS_SET);
+    this(CharArraySet.EMPTY_SET);
   }
 
   /** Builds an analyzer with the stop words from the given reader.


Mime
View raw message