lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hoss...@apache.org
Subject svn commit: r1482474 - in /lucene/dev/trunk: lucene/ lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/ lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/ solr/
Date Tue, 14 May 2013 17:21:51 GMT
Author: hossman
Date: Tue May 14 17:21:50 2013
New Revision: 1482474

URL: http://svn.apache.org/r1482474
Log:
SOLR-4813: Fix SynonymFilterFactory to allow init parameters for tokenizer factory used when
parsing synonyms file

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilterFactory.java
    lucene/dev/trunk/solr/CHANGES.txt

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1482474&r1=1482473&r2=1482474&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Tue May 14 17:21:50 2013
@@ -195,7 +195,8 @@ Build
 
 Bug Fixes
 
-
+* SOLR-4813: Fix SynonymFilterFactory to allow init parameters for
+  tokenizer factory used when parsing synonyms file.  (Shingo Sasaki, hossman)
 
 ======================= Lucene 4.3.0 =======================
 

Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java?rev=1482474&r1=1482473&r2=1482474&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
(original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
Tue May 14 17:21:50 2013
@@ -26,6 +26,7 @@ import java.nio.charset.CharsetDecoder;
 import java.nio.charset.CodingErrorAction;
 import java.text.ParseException;
 import java.util.HashMap;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 
@@ -48,9 +49,18 @@ import org.apache.lucene.util.Version;
  *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  *     <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" 
  *             format="solr" ignoreCase="false" expand="true" 
- *             tokenizerFactory="solr.WhitespaceTokenizerFactory"/>
+ *             tokenizerFactory="solr.WhitespaceTokenizerFactory"
+ *             [optional tokenizer factory parameters]/>
  *   </analyzer>
  * &lt;/fieldType&gt;</pre>
+ * 
+ * <p>
+ * An optional param name prefix of "tokenizerFactory." may be used for any 
+ * init params that the SynonymFilterFactory needs to pass to the specified 
+ * TokenizerFactory.  If the TokenizerFactory expects an init parameters with 
+ * the same name as an init param used by the SynonymFilterFactory, the prefix 
+ * is mandatory.
+ * </p>
  */
 public class SynonymFilterFactory extends TokenFilterFactory implements ResourceLoaderAware
{
   private final boolean ignoreCase;
@@ -58,19 +68,27 @@ public class SynonymFilterFactory extend
   private final String synonyms;
   private final String format;
   private final boolean expand;
+  private final Map<String, String> tokArgs = new HashMap<String, String>();
 
   private SynonymMap map;
   
   public SynonymFilterFactory(Map<String,String> args) {
     super(args);
     ignoreCase = getBoolean(args, "ignoreCase", false);
+    synonyms = require(args, "synonyms");
+    format = get(args, "format");
+    expand = getBoolean(args, "expand", true);
+
     tokenizerFactory = get(args, "tokenizerFactory");
     if (tokenizerFactory != null) {
       assureMatchVersion();
+      tokArgs.put("luceneMatchVersion", getLuceneMatchVersion().toString());
+      for (Iterator<String> itr = args.keySet().iterator(); itr.hasNext();) {
+        String key = itr.next();
+        tokArgs.put(key.replaceAll("^tokenizerFactory\\.",""), args.get(key));
+        itr.remove();
+      }
     }
-    synonyms = require(args, "synonyms");
-    format = get(args, "format");
-    expand = getBoolean(args, "expand", true);
     if (!args.isEmpty()) {
       throw new IllegalArgumentException("Unknown parameters: " + args);
     }
@@ -159,11 +177,9 @@ public class SynonymFilterFactory extend
   
   // (there are no tests for this functionality)
   private TokenizerFactory loadTokenizerFactory(ResourceLoader loader, String cname) throws
IOException {
-    Map<String,String> args = new HashMap<String,String>();
-    args.put("luceneMatchVersion", getLuceneMatchVersion().toString());
     Class<? extends TokenizerFactory> clazz = loader.findClass(cname, TokenizerFactory.class);
     try {
-      TokenizerFactory tokFactory = clazz.getConstructor(Map.class).newInstance(args);
+      TokenizerFactory tokFactory = clazz.getConstructor(Map.class).newInstance(tokArgs);
       if (tokFactory instanceof ResourceLoaderAware) {
         ((ResourceLoaderAware) tokFactory).inform(loader);
       }

Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilterFactory.java?rev=1482474&r1=1482473&r2=1482474&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilterFactory.java
(original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilterFactory.java
Tue May 14 17:21:50 2013
@@ -19,11 +19,15 @@ package org.apache.lucene.analysis.synon
 
 import java.io.Reader;
 import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
 
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.synonym.SynonymFilter;
+import org.apache.lucene.analysis.pattern.PatternTokenizerFactory;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
 import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+import org.apache.lucene.analysis.util.ClasspathResourceLoader;
 import org.apache.lucene.analysis.util.StringMockResourceLoader;
 
 public class TestSynonymFilterFactory extends BaseTokenStreamFactoryTestCase {
@@ -59,4 +63,53 @@ public class TestSynonymFilterFactory ex
       assertTrue(expected.getMessage().contains("Unknown parameters"));
     }
   }
+
+  static final String TOK_SYN_ARG_VAL = "argument";
+  static final String TOK_FOO_ARG_VAL = "foofoofoo";
+
+  /** Test that we can parse TokenierFactory's arguments */
+  public void testTokenizerFactoryArguments() throws Exception {
+    final String clazz = PatternTokenizerFactory.class.getName();
+    TokenFilterFactory factory = null;
+
+    // simple arg form
+    factory = tokenFilterFactory("Synonym", 
+        "synonyms", "synonyms.txt", 
+        "tokenizerFactory", clazz,
+        "pattern", "(.*)",
+        "group", "0");
+    assertNotNull(factory);
+    // prefix
+    factory = tokenFilterFactory("Synonym", 
+        "synonyms", "synonyms.txt", 
+        "tokenizerFactory", clazz,
+        "tokenizerFactory.pattern", "(.*)",
+        "tokenizerFactory.group", "0");
+    assertNotNull(factory);
+
+    // sanity check that sub-PatternTokenizerFactory fails w/o pattern
+    try {
+      factory = tokenFilterFactory("Synonym", 
+          "synonyms", "synonyms.txt", 
+          "tokenizerFactory", clazz);
+      fail("tokenizerFactory should have complained about missing pattern arg");
+    } catch (Exception expected) {
+      // :NOOP:
+    }
+
+    // sanity check that sub-PatternTokenizerFactory fails on unexpected
+    try {
+      factory = tokenFilterFactory("Synonym", 
+          "synonyms", "synonyms.txt", 
+          "tokenizerFactory", clazz,
+          "tokenizerFactory.pattern", "(.*)",
+          "tokenizerFactory.bogusbogusbogus", "bogus",
+          "tokenizerFactory.group", "0");
+      fail("tokenizerFactory should have complained about missing pattern arg");
+    } catch (Exception expected) {
+      // :NOOP:
+    }
+  }
 }
+
+

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1482474&r1=1482473&r2=1482474&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Tue May 14 17:21:50 2013
@@ -173,6 +173,10 @@ Bug Fixes
 * SOLR-4807: The zkcli script now works with log4j. The zkcli.bat script
   was broken on Windows in 4.3.0, now it works. (Shawn Heisey)
 
+* SOLR-4813: Fix SynonymFilterFactory to allow init parameters for
+  tokenizer factory used when parsing synonyms file.  (Shingo Sasaki, hossman)
+  
+
 Other Changes
 ----------------------
 



Mime
View raw message