lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From k...@apache.org
Subject svn commit: r1504037 - in /lucene/dev/trunk/lucene: ./ analysis/common/src/java/org/apache/lucene/analysis/synonym/ analysis/common/src/test/org/apache/lucene/analysis/synonym/
Date Wed, 17 Jul 2013 07:50:32 GMT
Author: koji
Date: Wed Jul 17 07:50:32 2013
New Revision: 1504037

URL: http://svn.apache.org/r1504037
Log:
SOLR-3359: add analyzer attribute/property to SynonymFilterFactory

Added:
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/synonyms2.txt
Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilterFactory.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1504037&r1=1504036&r2=1504037&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Wed Jul 17 07:50:32 2013
@@ -57,6 +57,9 @@ New features
 * LUCENE-5098: New broadword utility methods in oal.util.BroadWord.
   (Paul Elschot via Adrien Grand, Dawid Weiss)
 
+* SOLR-3359: Added analyzer attribute/property to SynonymFilterFactory.
+  (Ryo Onodera via Koji Sekiguchi)
+
 API Changes
 
 * LUCENE-5094: Add ramBytesUsed() to MultiDocValues.OrdinalMap.

Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java?rev=1504037&r1=1504036&r2=1504037&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
(original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
Wed Jul 17 07:50:32 2013
@@ -68,6 +68,7 @@ public class SynonymFilterFactory extend
   private final String synonyms;
   private final String format;
   private final boolean expand;
+  private final String analyzerName;
   private final Map<String, String> tokArgs = new HashMap<String, String>();
 
   private SynonymMap map;
@@ -79,7 +80,13 @@ public class SynonymFilterFactory extend
     format = get(args, "format");
     expand = getBoolean(args, "expand", true);
 
+    analyzerName = get(args, "analyzer");
     tokenizerFactory = get(args, "tokenizerFactory");
+    if (analyzerName != null && tokenizerFactory != null) {
+      throw new IllegalArgumentException("Analyzer and TokenizerFactory can't be specified
both: " +
+                                         analyzerName + " and " + tokenizerFactory);
+    }
+
     if (tokenizerFactory != null) {
       assureMatchVersion();
       tokArgs.put("luceneMatchVersion", getLuceneMatchVersion().toString());
@@ -104,15 +111,20 @@ public class SynonymFilterFactory extend
   @Override
   public void inform(ResourceLoader loader) throws IOException {
     final TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader,
tokenizerFactory);
+    Analyzer analyzer;
     
-    Analyzer analyzer = new Analyzer() {
-      @Override
-      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
-        Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_50,
reader) : factory.create(reader);
-        TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_50, tokenizer)
: tokenizer;
-        return new TokenStreamComponents(tokenizer, stream);
-      }
-    };
+    if (analyzerName != null) {
+      analyzer = loadAnalyzer(loader, analyzerName);
+    } else {
+      analyzer = new Analyzer() {
+        @Override
+        protected TokenStreamComponents createComponents(String fieldName, Reader reader)
{
+          Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_50,
reader) : factory.create(reader);
+          TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_50, tokenizer)
: tokenizer;
+          return new TokenStreamComponents(tokenizer, stream);
+        }
+      };
+    }
 
     try {
       if (format == null || format.equals("solr")) {
@@ -188,4 +200,17 @@ public class SynonymFilterFactory extend
       throw new RuntimeException(e);
     }
   }
+
+  private Analyzer loadAnalyzer(ResourceLoader loader, String cname) throws IOException {
+    Class<? extends Analyzer> clazz = loader.findClass(cname, Analyzer.class);
+    try {
+      Analyzer analyzer = clazz.getConstructor(Version.class).newInstance(Version.LUCENE_50);
+      if (analyzer instanceof ResourceLoaderAware) {
+        ((ResourceLoaderAware) analyzer).inform(loader);
+      }
+      return analyzer;
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
 }

Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilterFactory.java?rev=1504037&r1=1504036&r2=1504037&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilterFactory.java
(original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilterFactory.java
Wed Jul 17 07:50:32 2013
@@ -29,6 +29,7 @@ import org.apache.lucene.analysis.util.T
 import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
 import org.apache.lucene.analysis.util.ClasspathResourceLoader;
 import org.apache.lucene.analysis.util.StringMockResourceLoader;
+import org.apache.lucene.analysis.cjk.CJKAnalyzer;
 
 public class TestSynonymFilterFactory extends BaseTokenStreamFactoryTestCase {
   /** test that we can parse and use the solr syn file */
@@ -64,6 +65,28 @@ public class TestSynonymFilterFactory ex
     }
   }
 
+  /** Test that analyzer and tokenizerFactory is both specified */
+  public void testAnalyzer() throws Exception {
+    final String analyzer = CJKAnalyzer.class.getName();
+    final String tokenizerFactory = PatternTokenizerFactory.class.getName();
+    TokenFilterFactory factory = null;
+
+    factory = tokenFilterFactory("Synonym",
+        "synonyms", "synonyms2.txt",
+        "analyzer", analyzer);
+    assertNotNull(factory);
+
+    try {
+      tokenFilterFactory("Synonym",
+          "synonyms", "synonyms.txt",
+          "analyzer", analyzer,
+          "tokenizerFactory", tokenizerFactory);
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("Analyzer and TokenizerFactory can't be specified
both"));
+    }
+  }
+
   static final String TOK_SYN_ARG_VAL = "argument";
   static final String TOK_FOO_ARG_VAL = "foofoofoo";
 

Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/synonyms2.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/synonyms2.txt?rev=1504037&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/synonyms2.txt
(added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/synonyms2.txt
Wed Jul 17 07:50:32 2013
@@ -0,0 +1,15 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+
+蛙 => カエル



Mime
View raw message