lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rm...@apache.org
Subject svn commit: r1524849 - in /lucene/dev/branches/branch_4x: ./ lucene/ lucene/analysis/ lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/ lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/
Date Thu, 19 Sep 2013 21:45:33 GMT
Author: rmuir
Date: Thu Sep 19 21:45:33 2013
New Revision: 1524849

URL: http://svn.apache.org/r1524849
Log:
LUCENE-5219: Add support to SynonymFilterFactory for custom parsers

Added:
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/synonyms-wordnet.txt
      - copied unchanged from r1524839, lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/synonyms-wordnet.txt
Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/   (props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/FSTSynonymFilterFactory.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SolrSynonymParser.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/WordnetSynonymParser.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSolrSynonymParser.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilterFactory.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestWordnetSynonymParser.java

Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1524849&r1=1524848&r2=1524849&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Thu Sep 19 21:45:33 2013
@@ -24,6 +24,9 @@ New Features
   for example if a StopFilter had removed the last token.  (Mike
   McCandless)
 
+* LUCENE-5219: Add support to SynonymFilterFactory for custom
+  parsers.  (Ryan Ernst via Robert Muir)
+
 Bug Fixes
 
 * LUCENE-4998: Fixed a few places to pass IOContext.READONCE instead

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/FSTSynonymFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/FSTSynonymFilterFactory.java?rev=1524849&r1=1524848&r2=1524849&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/FSTSynonymFilterFactory.java
(original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/FSTSynonymFilterFactory.java
Thu Sep 19 21:45:33 2013
@@ -102,61 +102,44 @@ final class FSTSynonymFilterFactory exte
     };
 
     try {
+      String formatClass = format;
       if (format == null || format.equals("solr")) {
-        // TODO: expose dedup as a parameter?
-        map = loadSolrSynonyms(loader, true, analyzer);
+        formatClass = SolrSynonymParser.class.getName();
       } else if (format.equals("wordnet")) {
-        map = loadWordnetSynonyms(loader, true, analyzer);
-      } else {
-        // TODO: somehow make this more pluggable
-        throw new IllegalArgumentException("Unrecognized synonyms format: " + format);
+        formatClass = WordnetSynonymParser.class.getName();
       }
+      // TODO: expose dedup as a parameter?
+      map = loadSynonyms(loader, formatClass, true, analyzer);
     } catch (ParseException e) {
       throw new IOException("Error parsing synonyms file:", e);
     }
   }
-  
+
   /**
-   * Load synonyms from the solr format, "format=solr".
+   * Load synonyms with the given {@link SynonymMap.Parser} class.
    */
-  private SynonymMap loadSolrSynonyms(ResourceLoader loader, boolean dedup, Analyzer analyzer)
throws IOException, ParseException {    
+  private SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, Analyzer
analyzer) throws IOException, ParseException {
     CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder()
-      .onMalformedInput(CodingErrorAction.REPORT)
-      .onUnmappableCharacter(CodingErrorAction.REPORT);
-    
-    SolrSynonymParser parser = new SolrSynonymParser(dedup, expand, analyzer);
-    File synonymFile = new File(synonyms);
-    if (synonymFile.exists()) {
-      decoder.reset();
-      parser.add(new InputStreamReader(loader.openResource(synonyms), decoder));
-    } else {
-      List<String> files = splitFileNames(synonyms);
-      for (String file : files) {
-        decoder.reset();
-        parser.add(new InputStreamReader(loader.openResource(file), decoder));
-      }
+        .onMalformedInput(CodingErrorAction.REPORT)
+        .onUnmappableCharacter(CodingErrorAction.REPORT);
+
+    SynonymMap.Parser parser;
+    Class<? extends SynonymMap.Parser> clazz = loader.findClass(cname, SynonymMap.Parser.class);
+    try {
+      parser = clazz.getConstructor(boolean.class, boolean.class, Analyzer.class).newInstance(dedup,
expand, analyzer);
+    } catch (Exception e) {
+      throw new RuntimeException(e);
     }
-    return parser.build();
-  }
-  
-  /**
-   * Load synonyms from the wordnet format, "format=wordnet".
-   */
-  private SynonymMap loadWordnetSynonyms(ResourceLoader loader, boolean dedup, Analyzer analyzer)
throws IOException, ParseException {
-    CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder()
-      .onMalformedInput(CodingErrorAction.REPORT)
-      .onUnmappableCharacter(CodingErrorAction.REPORT);
-    
-    WordnetSynonymParser parser = new WordnetSynonymParser(dedup, expand, analyzer);
+
     File synonymFile = new File(synonyms);
     if (synonymFile.exists()) {
       decoder.reset();
-      parser.add(new InputStreamReader(loader.openResource(synonyms), decoder));
+      parser.parse(new InputStreamReader(loader.openResource(synonyms), decoder));
     } else {
       List<String> files = splitFileNames(synonyms);
       for (String file : files) {
         decoder.reset();
-        parser.add(new InputStreamReader(loader.openResource(file), decoder));
+        parser.parse(new InputStreamReader(loader.openResource(file), decoder));
       }
     }
     return parser.build();

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SolrSynonymParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SolrSynonymParser.java?rev=1524849&r1=1524848&r2=1524849&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SolrSynonymParser.java
(original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SolrSynonymParser.java
Thu Sep 19 21:45:33 2013
@@ -54,17 +54,16 @@ import org.apache.lucene.util.CharsRef;
  *  </ol>
  * @lucene.experimental
  */
-public class SolrSynonymParser extends SynonymMap.Builder {
+public class SolrSynonymParser extends SynonymMap.Parser {
   private final boolean expand;
-  private final Analyzer analyzer;
   
   public SolrSynonymParser(boolean dedup, boolean expand, Analyzer analyzer) {
-    super(dedup);
+    super(dedup, analyzer);
     this.expand = expand;
-    this.analyzer = analyzer;
   }
-  
-  public void add(Reader in) throws IOException, ParseException {
+
+  @Override
+  public void parse(Reader in) throws IOException, ParseException {
     LineNumberReader br = new LineNumberReader(in);
     try {
       addInternal(br);
@@ -96,19 +95,19 @@ public class SolrSynonymParser extends S
         String inputStrings[] = split(sides[0], ",");
         inputs = new CharsRef[inputStrings.length];
         for (int i = 0; i < inputs.length; i++) {
-          inputs[i] = analyze(analyzer, unescape(inputStrings[i]).trim(), new CharsRef());
+          inputs[i] = analyze(unescape(inputStrings[i]).trim(), new CharsRef());
         }
         
         String outputStrings[] = split(sides[1], ",");
         outputs = new CharsRef[outputStrings.length];
         for (int i = 0; i < outputs.length; i++) {
-          outputs[i] = analyze(analyzer, unescape(outputStrings[i]).trim(), new CharsRef());
+          outputs[i] = analyze(unescape(outputStrings[i]).trim(), new CharsRef());
         }
       } else {
         String inputStrings[] = split(line, ",");
         inputs = new CharsRef[inputStrings.length];
         for (int i = 0; i < inputs.length; i++) {
-          inputs[i] = analyze(analyzer, unescape(inputStrings[i]).trim(), new CharsRef());
+          inputs[i] = analyze(unescape(inputStrings[i]).trim(), new CharsRef());
         }
         if (expand) {
           outputs = inputs;

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java?rev=1524849&r1=1524848&r2=1524849&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
(original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
Thu Sep 19 21:45:33 2013
@@ -21,6 +21,7 @@ import java.io.IOException;
 import java.util.HashMap;
 import java.util.Map;
 
+import org.apache.lucene.analysis.Analyzer; // javadocs
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.synonym.SynonymFilter;
 import org.apache.lucene.util.Version;
@@ -48,6 +49,18 @@ import org.apache.lucene.analysis.util.T
  * the same name as an init param used by the SynonymFilterFactory, the prefix 
  * is mandatory.
  * </p>
+ * <p>
+ * The optional {@code format} parameter controls how the synonyms will be parsed:
+ * It supports the short names of {@code solr} for {@link SolrSynonymParser} 
+ * and {@code wordnet} for and {@link WordnetSynonymParser}, or your own 
+ * {@code SynonymMap.Parser} class name. The default is {@code solr}.
+ * A custom {@link SynonymMap.Parser} is expected to have a constructor taking:
+ * <ul>
+ *   <li><code>boolean dedup</code> - true if duplicates should be ignored,
false otherwise</li>
+ *   <li><code>boolean expand</code> - true if conflation groups should
be expanded, false if they are one-directional</li>
+ *   <li><code>{@link Analyzer} analyzer</code> - an analyzer used for
each raw synonym</li>
+ * </ul>
+ * </p>
  */
 public class SynonymFilterFactory extends TokenFilterFactory implements ResourceLoaderAware
{
   private final TokenFilterFactory delegator;

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java?rev=1524849&r1=1524848&r2=1524849&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java
(original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java
Thu Sep 19 21:45:33 2013
@@ -18,6 +18,8 @@ package org.apache.lucene.analysis.synon
  */
 
 import java.io.IOException;
+import java.io.Reader;
+import java.text.ParseException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
@@ -107,39 +109,7 @@ public class SynonymMap {
       return reuse;
     }
     
-    /** Sugar: analyzes the text with the analyzer and
-     *  separates by {@link SynonymMap#WORD_SEPARATOR}.
-     *  reuse and its chars must not be null. */
-    public static CharsRef analyze(Analyzer analyzer, String text, CharsRef reuse) throws
IOException {
-      TokenStream ts = analyzer.tokenStream("", text);
-      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
-      PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
-      ts.reset();
-      reuse.length = 0;
-      while (ts.incrementToken()) {
-        int length = termAtt.length();
-        if (length == 0) {
-          throw new IllegalArgumentException("term: " + text + " analyzed to a zero-length
token");
-        }
-        if (posIncAtt.getPositionIncrement() != 1) {
-          throw new IllegalArgumentException("term: " + text + " analyzed to a token with
posinc != 1");
-        }
-        reuse.grow(reuse.length + length + 1); /* current + word + separator */
-        int end = reuse.offset + reuse.length;
-        if (reuse.length > 0) {
-          reuse.chars[end++] = SynonymMap.WORD_SEPARATOR;
-          reuse.length++;
-        }
-        System.arraycopy(termAtt.buffer(), 0, reuse.chars, end, length);
-        reuse.length += length;
-      }
-      ts.end();
-      ts.close();
-      if (reuse.length == 0) {
-        throw new IllegalArgumentException("term: " + text + " was completely eliminated
by analyzer");
-      }
-      return reuse;
-    }
+
 
     /** only used for asserting! */
     private boolean hasHoles(CharsRef chars) {
@@ -312,4 +282,60 @@ public class SynonymMap {
       return new SynonymMap(fst, words, maxHorizontalContext);
     }
   }
+
+  /**
+   * Abstraction for parsing synonym files.
+   *
+   * @lucene.experimental
+   */
+  public static abstract class Parser extends Builder {
+
+    private final Analyzer analyzer;
+
+    public Parser(boolean dedup, Analyzer analyzer) {
+      super(dedup);
+      this.analyzer = analyzer;
+    }
+
+    /**
+     * Parse the given input, adding synonyms to the inherited {@link Builder}.
+     * @param in The input to parse
+     */
+    public abstract void parse(Reader in) throws IOException, ParseException;
+
+    /** Sugar: analyzes the text with the analyzer and
+     *  separates by {@link SynonymMap#WORD_SEPARATOR}.
+     *  reuse and its chars must not be null. */
+    public CharsRef analyze(String text, CharsRef reuse) throws IOException {
+      TokenStream ts = analyzer.tokenStream("", text);
+      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
+      PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
+      ts.reset();
+      reuse.length = 0;
+      while (ts.incrementToken()) {
+        int length = termAtt.length();
+        if (length == 0) {
+          throw new IllegalArgumentException("term: " + text + " analyzed to a zero-length
token");
+        }
+        if (posIncAtt.getPositionIncrement() != 1) {
+          throw new IllegalArgumentException("term: " + text + " analyzed to a token with
posinc != 1");
+        }
+        reuse.grow(reuse.length + length + 1); /* current + word + separator */
+        int end = reuse.offset + reuse.length;
+        if (reuse.length > 0) {
+          reuse.chars[end++] = SynonymMap.WORD_SEPARATOR;
+          reuse.length++;
+        }
+        System.arraycopy(termAtt.buffer(), 0, reuse.chars, end, length);
+        reuse.length += length;
+      }
+      ts.end();
+      ts.close();
+      if (reuse.length == 0) {
+        throw new IllegalArgumentException("term: " + text + " was completely eliminated
by analyzer");
+      }
+      return reuse;
+    }
+  }
+
 }

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/WordnetSynonymParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/WordnetSynonymParser.java?rev=1524849&r1=1524848&r2=1524849&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/WordnetSynonymParser.java
(original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/WordnetSynonymParser.java
Thu Sep 19 21:45:33 2013
@@ -32,17 +32,16 @@ import org.apache.lucene.util.CharsRef;
  * @lucene.experimental
  */
 // TODO: allow you to specify syntactic categories (e.g. just nouns, etc)
-public class WordnetSynonymParser extends SynonymMap.Builder {
+public class WordnetSynonymParser extends SynonymMap.Parser {
   private final boolean expand;
-  private final Analyzer analyzer;
   
   public WordnetSynonymParser(boolean dedup, boolean expand, Analyzer analyzer) {
-    super(dedup);
+    super(dedup, analyzer);
     this.expand = expand;
-    this.analyzer = analyzer;
   }
-  
-  public void add(Reader in) throws IOException, ParseException {
+
+  @Override
+  public void parse(Reader in) throws IOException, ParseException {
     LineNumberReader br = new LineNumberReader(in);
     try {
       String line = null;
@@ -89,7 +88,7 @@ public class WordnetSynonymParser extend
     int end = line.lastIndexOf('\'');
     
     String text = line.substring(start, end).replace("''", "'");
-    return analyze(analyzer, text, reuse);
+    return analyze(text, reuse);
   }
   
   private void addInternal(CharsRef synset[], int size) {

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSolrSynonymParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSolrSynonymParser.java?rev=1524849&r1=1524848&r2=1524849&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSolrSynonymParser.java
(original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSolrSynonymParser.java
Thu Sep 19 21:45:33 2013
@@ -44,7 +44,7 @@ public class TestSolrSynonymParser exten
     "this test, that testing";
     
     SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random()));
-    parser.add(new StringReader(testFile));
+    parser.parse(new StringReader(testFile));
     final SynonymMap map = parser.build();
     
     Analyzer analyzer = new Analyzer() {
@@ -77,7 +77,7 @@ public class TestSolrSynonymParser exten
   public void testInvalidDoubleMap() throws Exception {
     String testFile = "a => b => c"; 
     SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random()));
-    parser.add(new StringReader(testFile));
+    parser.parse(new StringReader(testFile));
   }
   
   /** parse a syn file with bad syntax */
@@ -85,7 +85,7 @@ public class TestSolrSynonymParser exten
   public void testInvalidAnalyzesToNothingOutput() throws Exception {
     String testFile = "a => 1"; 
     SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random(),
MockTokenizer.SIMPLE, false));
-    parser.add(new StringReader(testFile));
+    parser.parse(new StringReader(testFile));
   }
   
   /** parse a syn file with bad syntax */
@@ -93,7 +93,7 @@ public class TestSolrSynonymParser exten
   public void testInvalidAnalyzesToNothingInput() throws Exception {
     String testFile = "1 => a"; 
     SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random(),
MockTokenizer.SIMPLE, false));
-    parser.add(new StringReader(testFile));
+    parser.parse(new StringReader(testFile));
   }
   
   /** parse a syn file with bad syntax */
@@ -101,7 +101,7 @@ public class TestSolrSynonymParser exten
   public void testInvalidPositionsInput() throws Exception {
     String testFile = "testola => the test";
     SolrSynonymParser parser = new SolrSynonymParser(true, true, new EnglishAnalyzer(TEST_VERSION_CURRENT));
-    parser.add(new StringReader(testFile));
+    parser.parse(new StringReader(testFile));
   }
   
   /** parse a syn file with bad syntax */
@@ -109,7 +109,7 @@ public class TestSolrSynonymParser exten
   public void testInvalidPositionsOutput() throws Exception {
     String testFile = "the test => testola";
     SolrSynonymParser parser = new SolrSynonymParser(true, true, new EnglishAnalyzer(TEST_VERSION_CURRENT));
-    parser.add(new StringReader(testFile));
+    parser.parse(new StringReader(testFile));
   }
   
   /** parse a syn file with some escaped syntax chars */
@@ -118,7 +118,7 @@ public class TestSolrSynonymParser exten
       "a\\=>a => b\\=>b\n" +
       "a\\,a => b\\,b";
     SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random(),
MockTokenizer.KEYWORD, false));
-    parser.add(new StringReader(testFile));
+    parser.parse(new StringReader(testFile));
     final SynonymMap map = parser.build();
     Analyzer analyzer = new Analyzer() {
       @Override

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilterFactory.java?rev=1524849&r1=1524848&r2=1524849&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilterFactory.java
(original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilterFactory.java
Thu Sep 19 21:45:33 2013
@@ -32,16 +32,33 @@ import org.apache.lucene.analysis.util.S
 import org.apache.lucene.util.Version;
 
 public class TestSynonymFilterFactory extends BaseTokenStreamFactoryTestCase {
-  /** test that we can parse and use the solr syn file */
-  public void testSynonyms() throws Exception {
+
+  /** checks for synonyms of "GB" in synonyms.txt */
+  private void checkSolrSynonyms(TokenFilterFactory factory) throws Exception {
     Reader reader = new StringReader("GB");
     TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
-    stream = tokenFilterFactory("Synonym", "synonyms", "synonyms.txt").create(stream);
+    stream = factory.create(stream);
     assertTrue(stream instanceof SynonymFilter);
-    assertTokenStreamContents(stream, 
+    assertTokenStreamContents(stream,
         new String[] { "GB", "gib", "gigabyte", "gigabytes" },
         new int[] { 1, 0, 0, 0 });
   }
+
+  /** checks for synonyms of "second" in synonyms-wordnet.txt */
+  private void checkWordnetSynonyms(TokenFilterFactory factory) throws Exception {
+    Reader reader = new StringReader("second");
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    stream = factory.create(stream);
+    assertTrue(stream instanceof SynonymFilter);
+    assertTokenStreamContents(stream,
+        new String[] { "second", "2nd", "two" },
+        new int[] { 1, 0, 0 });
+  }
+
+  /** test that we can parse and use the solr syn file */
+  public void testSynonyms() throws Exception {
+    checkSolrSynonyms(tokenFilterFactory("Synonym", "synonyms", "synonyms.txt"));
+  }
   
   /** test that we can parse and use the solr syn file, with the old impl
    * @deprecated Remove this test in Lucene 5.0 */
@@ -82,6 +99,14 @@ public class TestSynonymFilterFactory ex
         "synonyms", "synonyms.txt").create(stream);
     assertTokenStreamContents(stream, new String[] { "GB" });
   }
+
+  public void testFormat() throws Exception {
+    checkSolrSynonyms(tokenFilterFactory("Synonym", "synonyms", "synonyms.txt", "format",
"solr"));
+    checkWordnetSynonyms(tokenFilterFactory("Synonym", "synonyms", "synonyms-wordnet.txt",
"format", "wordnet"));
+    // explicit class should work the same as the "solr" alias
+    checkSolrSynonyms(tokenFilterFactory("Synonym", "synonyms", "synonyms.txt",
+        "format", SolrSynonymParser.class.getName()));
+  }
   
   /** Test that bogus arguments result in exception */
   public void testBogusArguments() throws Exception {
@@ -159,6 +184,7 @@ public class TestSynonymFilterFactory ex
       // :NOOP:
     }
   }
+
   private static void assertDelegator(final TokenFilterFactory factory,
                                       final Class delegatorClass) {
     assertNotNull(factory);

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java?rev=1524849&r1=1524848&r2=1524849&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java
(original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java
Thu Sep 19 21:45:33 2013
@@ -624,7 +624,7 @@ public class TestSynonymMapFilter extend
       "bbb => bbbb1 bbbb2\n";
       
     SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random()));
-    parser.add(new StringReader(testFile));
+    parser.parse(new StringReader(testFile));
     final SynonymMap map = parser.build();
       
     Analyzer analyzer = new Analyzer() {

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestWordnetSynonymParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestWordnetSynonymParser.java?rev=1524849&r1=1524848&r2=1524849&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestWordnetSynonymParser.java
(original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestWordnetSynonymParser.java
Thu Sep 19 21:45:33 2013
@@ -27,7 +27,6 @@ import org.apache.lucene.analysis.MockTo
 import org.apache.lucene.analysis.Tokenizer;
 
 public class TestWordnetSynonymParser extends BaseTokenStreamTestCase {
-  Analyzer analyzer;
 
   String synonymsFile = 
     "s(100000001,1,'woods',n,1,0).\n" +
@@ -42,7 +41,7 @@ public class TestWordnetSynonymParser ex
   
   public void testSynonyms() throws Exception {
     WordnetSynonymParser parser = new WordnetSynonymParser(true, true, new MockAnalyzer(random()));
-    parser.add(new StringReader(synonymsFile));
+    parser.parse(new StringReader(synonymsFile));
     final SynonymMap map = parser.build();
     
     Analyzer analyzer = new Analyzer() {



Mime
View raw message