Return-Path: Delivered-To: apmail-lucene-java-commits-archive@www.apache.org Received: (qmail 45524 invoked from network); 21 Oct 2009 18:26:05 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3) by minotaur.apache.org with SMTP; 21 Oct 2009 18:26:05 -0000 Received: (qmail 66808 invoked by uid 500); 21 Oct 2009 17:41:14 -0000 Delivered-To: apmail-lucene-java-commits-archive@lucene.apache.org Received: (qmail 55659 invoked by uid 500); 21 Oct 2009 17:37:53 -0000 Mailing-List: contact java-commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: java-dev@lucene.apache.org Delivered-To: mailing list java-commits@lucene.apache.org Received: (qmail 49146 invoked by uid 99); 21 Oct 2009 16:32:26 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 21 Oct 2009 16:32:26 +0000 X-ASF-Spam-Status: No, hits=-2.6 required=5.0 tests=BAYES_00 X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 21 Oct 2009 16:32:24 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 1F51E23888FC; Wed, 21 Oct 2009 16:32:04 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r828091 - in /lucene/java/trunk/contrib: ./ memory/src/test/org/apache/lucene/index/memory/ wordnet/src/java/org/apache/lucene/wordnet/ wordnet/src/test/ wordnet/src/test/org/ wordnet/src/test/org/apache/ wordnet/src/test/org/apache/lucene/... Date: Wed, 21 Oct 2009 16:32:03 -0000 To: java-commits@lucene.apache.org From: rmuir@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20091021163204.1F51E23888FC@eris.apache.org> Author: rmuir Date: Wed Oct 21 16:32:03 2009 New Revision: 828091 URL: http://svn.apache.org/viewvc?rev=828091&view=rev Log: LUCENE-2001: Fix parsing bug in wordnet contrib Added: lucene/java/trunk/contrib/wordnet/src/test/ lucene/java/trunk/contrib/wordnet/src/test/org/ lucene/java/trunk/contrib/wordnet/src/test/org/apache/ lucene/java/trunk/contrib/wordnet/src/test/org/apache/lucene/ lucene/java/trunk/contrib/wordnet/src/test/org/apache/lucene/wordnet/ lucene/java/trunk/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestWordnet.java (with props) lucene/java/trunk/contrib/wordnet/src/test/org/apache/lucene/wordnet/testSynonyms.txt (with props) Modified: lucene/java/trunk/contrib/CHANGES.txt lucene/java/trunk/contrib/memory/src/test/org/apache/lucene/index/memory/TestSynonymTokenFilter.java lucene/java/trunk/contrib/memory/src/test/org/apache/lucene/index/memory/testSynonyms.txt lucene/java/trunk/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java Modified: lucene/java/trunk/contrib/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/CHANGES.txt?rev=828091&r1=828090&r2=828091&view=diff ============================================================================== --- lucene/java/trunk/contrib/CHANGES.txt (original) +++ lucene/java/trunk/contrib/CHANGES.txt Wed Oct 21 16:32:03 2009 @@ -38,6 +38,9 @@ * LUCENE-1953: FastVectorHighlighter: small fragCharSize can cause StringIndexOutOfBoundsException. (Koji Sekiguchi) + * LUCENE-2001: Wordnet Syns2Index incorrectly parses synonyms that + contain a single quote. (Parag H. Dave via Robert Muir) + New features * LUCENE-1924: Added BalancedSegmentMergePolicy to contrib/misc, Modified: lucene/java/trunk/contrib/memory/src/test/org/apache/lucene/index/memory/TestSynonymTokenFilter.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/memory/src/test/org/apache/lucene/index/memory/TestSynonymTokenFilter.java?rev=828091&r1=828090&r2=828091&view=diff ============================================================================== --- lucene/java/trunk/contrib/memory/src/test/org/apache/lucene/index/memory/TestSynonymTokenFilter.java (original) +++ lucene/java/trunk/contrib/memory/src/test/org/apache/lucene/index/memory/TestSynonymTokenFilter.java Wed Oct 21 16:32:03 2009 @@ -45,6 +45,14 @@ new int[] { 1, 1, 1, 1, 0, 0 }); } + public void testSynonymsSingleQuote() throws Exception { + SynonymMap map = new SynonymMap(new FileInputStream(testFile)); + /* all expansions */ + Analyzer analyzer = new SynonymWhitespaceAnalyzer(map, Integer.MAX_VALUE); + assertAnalyzesTo(analyzer, "king", + new String[] { "king", "baron" }); + } + public void testSynonymsLimitedAmount() throws Exception { SynonymMap map = new SynonymMap(new FileInputStream(testFile)); /* limit to one synonym expansion */ Modified: lucene/java/trunk/contrib/memory/src/test/org/apache/lucene/index/memory/testSynonyms.txt URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/memory/src/test/org/apache/lucene/index/memory/testSynonyms.txt?rev=828091&r1=828090&r2=828091&view=diff ============================================================================== --- lucene/java/trunk/contrib/memory/src/test/org/apache/lucene/index/memory/testSynonyms.txt (original) +++ lucene/java/trunk/contrib/memory/src/test/org/apache/lucene/index/memory/testSynonyms.txt Wed Oct 21 16:32:03 2009 @@ -3,3 +3,7 @@ s(100000001,3,'forest',n,1,0). s(100000002,1,'wolfish',n,1,0). s(100000002,2,'ravenous',n,1,0). +s(100000003,1,'king',n,1,1). +s(100000003,2,'baron',n,1,1). +s(100000004,1,'king''sevil',n,1,1). +s(100000004,2,'meany',n,1,1). Modified: lucene/java/trunk/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java?rev=828091&r1=828090&r2=828091&view=diff ============================================================================== --- lucene/java/trunk/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java (original) +++ lucene/java/trunk/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java Wed Oct 21 16:32:03 2009 @@ -165,8 +165,8 @@ String num = line.substring(0, comma); int q1 = line.indexOf('\''); line = line.substring(q1 + 1); - int q2 = line.indexOf('\''); - String word = line.substring(0, q2).toLowerCase(); + int q2 = line.lastIndexOf('\''); + String word = line.substring(0, q2).toLowerCase().replace("''", "'"); // make sure is a normal word if (! isDecent(word)) Added: lucene/java/trunk/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestWordnet.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestWordnet.java?rev=828091&view=auto ============================================================================== --- lucene/java/trunk/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestWordnet.java (added) +++ lucene/java/trunk/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestWordnet.java Wed Oct 21 16:32:03 2009 @@ -0,0 +1,89 @@ +package org.apache.lucene.wordnet; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File; +import java.io.IOException; + +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Searcher; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.util.LuceneTestCase; + +public class TestWordnet extends LuceneTestCase { + private Searcher searcher; + + File dataDir = new File(System.getProperty("dataDir", "./bin")); + File testFile = new File(dataDir, "org/apache/lucene/wordnet/testSynonyms.txt"); + + String storePathName = + new File(System.getProperty("tempDir"),"testLuceneWordnet").getAbsolutePath(); + + @Override + protected void setUp() throws Exception { + super.setUp(); + // create a temporary synonym index + String commandLineArgs[] = { testFile.getAbsolutePath(), storePathName }; + + try { + Syns2Index.main(commandLineArgs); + } catch (Throwable t) { throw new RuntimeException(t); } + + searcher = new IndexSearcher(FSDirectory.open(new File(storePathName)), true); + } + + public void testExpansion() throws IOException { + assertExpandsTo("woods", new String[] { "woods", "forest", "wood" }); + } + + public void testExpansionSingleQuote() throws IOException { + assertExpandsTo("king", new String[] { "king", "baron" }); + } + + private void assertExpandsTo(String term, String expected[]) throws IOException { + Query expandedQuery = SynExpand.expand(term, searcher, new + WhitespaceAnalyzer(), "field", 1F); + BooleanQuery expectedQuery = new BooleanQuery(); + for (String t : expected) + expectedQuery.add(new TermQuery(new Term("field", t)), + BooleanClause.Occur.SHOULD); + assertEquals(expectedQuery, expandedQuery); + } + + @Override + protected void tearDown() throws Exception { + searcher.close(); + rmDir(storePathName); // delete our temporary synonym index + super.tearDown(); + } + + private void rmDir(String directory) { + File dir = new File(directory); + File[] files = dir.listFiles(); + for (int i = 0; i < files.length; i++) { + files[i].delete(); + } + dir.delete(); + } +} Propchange: lucene/java/trunk/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestWordnet.java ------------------------------------------------------------------------------ svn:eol-style = native Added: lucene/java/trunk/contrib/wordnet/src/test/org/apache/lucene/wordnet/testSynonyms.txt URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/wordnet/src/test/org/apache/lucene/wordnet/testSynonyms.txt?rev=828091&view=auto ============================================================================== --- lucene/java/trunk/contrib/wordnet/src/test/org/apache/lucene/wordnet/testSynonyms.txt (added) +++ lucene/java/trunk/contrib/wordnet/src/test/org/apache/lucene/wordnet/testSynonyms.txt Wed Oct 21 16:32:03 2009 @@ -0,0 +1,9 @@ +s(100000001,1,'woods',n,1,0). +s(100000001,2,'wood',n,1,0). +s(100000001,3,'forest',n,1,0). +s(100000002,1,'wolfish',n,1,0). +s(100000002,2,'ravenous',n,1,0). +s(100000003,1,'king',n,1,1). +s(100000003,2,'baron',n,1,1). +s(100000004,1,'king''sevil',n,1,1). +s(100000004,2,'meany',n,1,1). Propchange: lucene/java/trunk/contrib/wordnet/src/test/org/apache/lucene/wordnet/testSynonyms.txt ------------------------------------------------------------------------------ svn:eol-style = native