lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gsing...@apache.org
Subject svn commit: r828728 - in /lucene/java/branches/lucene_2_9/contrib: ./ memory/src/test/org/apache/lucene/index/memory/ wordnet/src/java/org/apache/lucene/wordnet/ wordnet/src/test/ wordnet/src/test/org/ wordnet/src/test/org/apache/ wordnet/src/test/org/...
Date Thu, 22 Oct 2009 14:49:06 GMT
Author: gsingers
Date: Thu Oct 22 14:49:05 2009
New Revision: 828728

URL: http://svn.apache.org/viewvc?rev=828728&view=rev
Log:
LUCENE-2001: wordnet fix on branch

Added:
    lucene/java/branches/lucene_2_9/contrib/wordnet/src/test/
    lucene/java/branches/lucene_2_9/contrib/wordnet/src/test/org/
    lucene/java/branches/lucene_2_9/contrib/wordnet/src/test/org/apache/
    lucene/java/branches/lucene_2_9/contrib/wordnet/src/test/org/apache/lucene/
    lucene/java/branches/lucene_2_9/contrib/wordnet/src/test/org/apache/lucene/wordnet/
    lucene/java/branches/lucene_2_9/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestWordnet.java
  (with props)
    lucene/java/branches/lucene_2_9/contrib/wordnet/src/test/org/apache/lucene/wordnet/testSynonyms.txt
  (with props)
Modified:
    lucene/java/branches/lucene_2_9/contrib/CHANGES.txt
    lucene/java/branches/lucene_2_9/contrib/memory/src/test/org/apache/lucene/index/memory/TestSynonymTokenFilter.java
    lucene/java/branches/lucene_2_9/contrib/memory/src/test/org/apache/lucene/index/memory/testSynonyms.txt
    lucene/java/branches/lucene_2_9/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java

Modified: lucene/java/branches/lucene_2_9/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/contrib/CHANGES.txt?rev=828728&r1=828727&r2=828728&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/contrib/CHANGES.txt (original)
+++ lucene/java/branches/lucene_2_9/contrib/CHANGES.txt Thu Oct 22 14:49:05 2009
@@ -18,6 +18,9 @@
    
  * LUCENE-1929: Highlighter throws exception on NumericRangeQuery and does not
    support deprecated RangeQuery.  (Mark Miller)
+   
+ * LUCENE-2001: Wordnet Syns2Index incorrectly parses synonyms that
+   contain a single quote. (Parag H. Dave via Robert Muir)
 
 ======================= Release 2.9.0 2009-09-23 =======================
 

Modified: lucene/java/branches/lucene_2_9/contrib/memory/src/test/org/apache/lucene/index/memory/TestSynonymTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/contrib/memory/src/test/org/apache/lucene/index/memory/TestSynonymTokenFilter.java?rev=828728&r1=828727&r2=828728&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/contrib/memory/src/test/org/apache/lucene/index/memory/TestSynonymTokenFilter.java
(original)
+++ lucene/java/branches/lucene_2_9/contrib/memory/src/test/org/apache/lucene/index/memory/TestSynonymTokenFilter.java
Thu Oct 22 14:49:05 2009
@@ -45,6 +45,14 @@
         new int[] { 1, 1, 1, 1, 0, 0 });
   }
   
+  public void testSynonymsSingleQuote() throws Exception {
+    SynonymMap map = new SynonymMap(new FileInputStream(testFile));
+    /* all expansions */
+    Analyzer analyzer = new SynonymWhitespaceAnalyzer(map, Integer.MAX_VALUE);
+    assertAnalyzesTo(analyzer, "king",
+        new String[] { "king", "baron" });
+  }
+  
   public void testSynonymsLimitedAmount() throws Exception {
     SynonymMap map = new SynonymMap(new FileInputStream(testFile));
     /* limit to one synonym expansion */

Modified: lucene/java/branches/lucene_2_9/contrib/memory/src/test/org/apache/lucene/index/memory/testSynonyms.txt
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/contrib/memory/src/test/org/apache/lucene/index/memory/testSynonyms.txt?rev=828728&r1=828727&r2=828728&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/contrib/memory/src/test/org/apache/lucene/index/memory/testSynonyms.txt
(original)
+++ lucene/java/branches/lucene_2_9/contrib/memory/src/test/org/apache/lucene/index/memory/testSynonyms.txt
Thu Oct 22 14:49:05 2009
@@ -3,3 +3,7 @@
 s(100000001,3,'forest',n,1,0).
 s(100000002,1,'wolfish',n,1,0).
 s(100000002,2,'ravenous',n,1,0).
+s(100000003,1,'king',n,1,1).
+s(100000003,2,'baron',n,1,1).
+s(100000004,1,'king''sevil',n,1,1).
+s(100000004,2,'meany',n,1,1).

Modified: lucene/java/branches/lucene_2_9/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java?rev=828728&r1=828727&r2=828728&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java
(original)
+++ lucene/java/branches/lucene_2_9/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java
Thu Oct 22 14:49:05 2009
@@ -163,8 +163,8 @@
             String num = line.substring(0, comma);
             int q1 = line.indexOf('\'');
             line = line.substring(q1 + 1);
-            int q2 = line.indexOf('\'');
-            String word = line.substring(0, q2).toLowerCase();
+            int q2 = line.lastIndexOf('\'');
+            String word = line.substring(0, q2).toLowerCase().replaceAll("''", "'");
 
             // make sure is a normal word
             if (! isDecent(word))

Added: lucene/java/branches/lucene_2_9/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestWordnet.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestWordnet.java?rev=828728&view=auto
==============================================================================
--- lucene/java/branches/lucene_2_9/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestWordnet.java
(added)
+++ lucene/java/branches/lucene_2_9/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestWordnet.java
Thu Oct 22 14:49:05 2009
@@ -0,0 +1,87 @@
+package org.apache.lucene.wordnet;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Searcher;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestWordnet extends LuceneTestCase {
+  private Searcher searcher;
+  
+  File dataDir = new File(System.getProperty("dataDir", "./bin"));
+  File testFile = new File(dataDir, "org/apache/lucene/wordnet/testSynonyms.txt");
+  
+  String storePathName = 
+    new File(System.getProperty("tempDir"),"testLuceneWordnet").getAbsolutePath();
+  
+  protected void setUp() throws Exception {
+    super.setUp();
+    // create a temporary synonym index
+    String commandLineArgs[] = { testFile.getAbsolutePath(), storePathName };
+    
+    try {
+      Syns2Index.main(commandLineArgs);
+    } catch (Throwable t) { throw new RuntimeException(t); }
+    
+    searcher = new IndexSearcher(FSDirectory.open(new File(storePathName)), true);
+  }
+  
+  public void testExpansion() throws IOException {
+    assertExpandsTo("woods", new String[] { "woods", "forest", "wood" });
+  }
+  
+  public void testExpansionSingleQuote() throws IOException {
+    assertExpandsTo("king", new String[] { "king", "baron" });
+  }
+  
+  private void assertExpandsTo(String term, String expected[]) throws IOException {
+    Query expandedQuery = SynExpand.expand(term, searcher, new 
+        WhitespaceAnalyzer(), "field", 1F);
+    BooleanQuery expectedQuery = new BooleanQuery();
+    for (int i = 0; i < expected.length; i++)
+      expectedQuery.add(new TermQuery(new Term("field", expected[i])), 
+          BooleanClause.Occur.SHOULD);
+    assertEquals(expectedQuery, expandedQuery);
+  }
+
+  protected void tearDown() throws Exception {
+    searcher.close();
+    rmDir(storePathName); // delete our temporary synonym index
+    super.tearDown();
+  }
+  
+  private void rmDir(String directory) {
+    File dir = new File(directory);
+    File[] files = dir.listFiles();
+    for (int i = 0; i < files.length; i++) {
+      files[i].delete();
+    }
+    dir.delete();
+  }
+}

Propchange: lucene/java/branches/lucene_2_9/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestWordnet.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/lucene_2_9/contrib/wordnet/src/test/org/apache/lucene/wordnet/testSynonyms.txt
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/contrib/wordnet/src/test/org/apache/lucene/wordnet/testSynonyms.txt?rev=828728&view=auto
==============================================================================
--- lucene/java/branches/lucene_2_9/contrib/wordnet/src/test/org/apache/lucene/wordnet/testSynonyms.txt
(added)
+++ lucene/java/branches/lucene_2_9/contrib/wordnet/src/test/org/apache/lucene/wordnet/testSynonyms.txt
Thu Oct 22 14:49:05 2009
@@ -0,0 +1,9 @@
+s(100000001,1,'woods',n,1,0).
+s(100000001,2,'wood',n,1,0).
+s(100000001,3,'forest',n,1,0).
+s(100000002,1,'wolfish',n,1,0).
+s(100000002,2,'ravenous',n,1,0).
+s(100000003,1,'king',n,1,1).
+s(100000003,2,'baron',n,1,1).
+s(100000004,1,'king''sevil',n,1,1).
+s(100000004,2,'meany',n,1,1).

Propchange: lucene/java/branches/lucene_2_9/contrib/wordnet/src/test/org/apache/lucene/wordnet/testSynonyms.txt
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message