lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rm...@apache.org
Subject svn commit: r1140394 - in /lucene/dev/trunk: lucene/contrib/ lucene/contrib/misc/ lucene/contrib/misc/src/java/org/apache/lucene/index/ lucene/contrib/misc/src/java/org/apache/lucene/misc/ lucene/contrib/wordnet/ lucene/contrib/wordnet/src/java/org/apa...
Date Tue, 28 Jun 2011 01:44:16 GMT
Author: rmuir
Date: Tue Jun 28 01:44:15 2011
New Revision: 1140394

URL: http://svn.apache.org/viewvc?rev=1140394&view=rev
Log:
LUCENE-3250: remove contrib/misc,wordnet,suggest dependencies on modules/analysis

Modified:
    lucene/dev/trunk/lucene/contrib/CHANGES.txt
    lucene/dev/trunk/lucene/contrib/misc/build.xml
    lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java
    lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/misc/IndexMergeTool.java
    lucene/dev/trunk/lucene/contrib/wordnet/build.xml
    lucene/dev/trunk/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java
    lucene/dev/trunk/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynLookup.java
    lucene/dev/trunk/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java
    lucene/dev/trunk/modules/suggest/build.xml
    lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java

Modified: lucene/dev/trunk/lucene/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/CHANGES.txt?rev=1140394&r1=1140393&r2=1140394&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/contrib/CHANGES.txt Tue Jun 28 01:44:15 2011
@@ -4,6 +4,11 @@ For more information on past and future 
 http://s.apache.org/luceneversions
 
 ======================= Trunk (not yet released) =======================
+
+Changes in runtime behavior
+
+ * LUCENE-3250: Wordnet's SynExpand requires a non-null Analyzer (it no longer
+   treats null as StandardAnalyzer).  (Robert Muir)
   
 Build
 

Modified: lucene/dev/trunk/lucene/contrib/misc/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/misc/build.xml?rev=1140394&r1=1140393&r2=1140394&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/misc/build.xml (original)
+++ lucene/dev/trunk/lucene/contrib/misc/build.xml Tue Jun 28 01:44:15 2011
@@ -27,22 +27,6 @@
 
   <import file="../contrib-build.xml"/>
 
-  <module-uptodate name="analysis/common" jarfile="${common.dir}/../modules/analysis/build/common/lucene-analyzers-common-${version}.jar"
-      property="analyzers-common.uptodate" classpath.property="analyzers-common.jar"/>
-
-  <path id="classpath">
-	 <pathelement path="${analyzers-common.jar}"/>
-	 <path refid="base.classpath"/>
-  </path>
-
-  <target name="compile-core" depends="compile-analyzers-common, common.compile-core"
/>
-
-  <target name="compile-analyzers-common" unless="analyzers-common.uptodate">
-    <subant target="default">
-      <fileset dir="${common.dir}/../modules/analysis/common" includes="build.xml"/>
-    </subant>
-  </target>
-
   <target name="build-native-unix" >
     <mkdir dir="${common.build.dir}/native"/>
 

Modified: lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java?rev=1140394&r1=1140393&r2=1140394&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java
(original)
+++ lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java
Tue Jun 28 01:44:15 2011
@@ -21,7 +21,6 @@ import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
 
-import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
 import org.apache.lucene.index.IndexWriter; // javadoc
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
 import org.apache.lucene.store.Directory;
@@ -98,7 +97,7 @@ public class MultiPassIndexSplitter {
       }
       IndexWriter w = new IndexWriter(outputs[i], new IndexWriterConfig(
           Version.LUCENE_CURRENT,
-          new WhitespaceAnalyzer(Version.LUCENE_CURRENT))
+          null)
           .setOpenMode(OpenMode.CREATE));
       System.err.println("Writing part " + (i + 1) + " ...");
       w.addIndexes(input);

Modified: lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/misc/IndexMergeTool.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/misc/IndexMergeTool.java?rev=1140394&r1=1140393&r2=1140394&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/misc/IndexMergeTool.java
(original)
+++ lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/misc/IndexMergeTool.java
Tue Jun 28 01:44:15 2011
@@ -16,7 +16,6 @@ package org.apache.lucene.misc;
   * limitations under the License.
   */
 
-import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
@@ -40,7 +39,7 @@ public class IndexMergeTool {
     FSDirectory mergedIndex = FSDirectory.open(new File(args[0]));
 
     IndexWriter writer = new IndexWriter(mergedIndex, new IndexWriterConfig(
-        Version.LUCENE_CURRENT, new WhitespaceAnalyzer(Version.LUCENE_CURRENT))
+        Version.LUCENE_CURRENT, null)
         .setOpenMode(OpenMode.CREATE));
 
     Directory[] indexes = new Directory[args.length - 1];

Modified: lucene/dev/trunk/lucene/contrib/wordnet/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/wordnet/build.xml?rev=1140394&r1=1140393&r2=1140394&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/wordnet/build.xml (original)
+++ lucene/dev/trunk/lucene/contrib/wordnet/build.xml Tue Jun 28 01:44:15 2011
@@ -30,22 +30,6 @@
 
   <import file="../contrib-build.xml"/>
 
-  <module-uptodate name="analysis/common" jarfile="${common.dir}/../modules/analysis/build/common/lucene-analyzers-common-${version}.jar"
-      property="analyzers-common.uptodate" classpath.property="analyzers-common.jar"/>
-
-  <path id="classpath">
-	 <pathelement path="${analyzers-common.jar}"/>
-	 <path refid="base.classpath"/>
-  </path>
-
-  <target name="compile-core" depends="compile-analyzers-common, common.compile-core"
/>
-
-  <target name="compile-analyzers-common" unless="analyzers-common.uptodate">
-    <subant target="default">
-      <fileset dir="${common.dir}/../modules/analysis/common" includes="build.xml"/>
-    </subant>
-  </target>
-	
   <target name="index" depends="compile" description="Build WordNet index">
     <fail if="synindex.exists">
       Index already exists - must remove first.
@@ -83,24 +67,4 @@
     </java>
   </target>
 
-  <target name="expand" description="Perform synonym expansion on a query">
-    <fail unless="synindex.exists">
-      Index does not exist.
-    </fail>
-
-    <fail unless="query">
-      Must specify 'query' property.
-    </fail>
-    
-    <java classname="org.apache.lucene.wordnet.SynExpand">
-      <classpath>
-        <path refid="compile.classpath"/>
-        <pathelement location="${build.dir}/classes"/>
-      </classpath>
-
-      <arg file="${synindex.dir}"/>
-      <arg value="${query}"/>
-    </java>
-  </target>
-
 </project>

Modified: lucene/dev/trunk/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java?rev=1140394&r1=1140393&r2=1140394&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java
(original)
+++ lucene/dev/trunk/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java
Tue Jun 28 01:44:15 2011
@@ -17,7 +17,6 @@ package org.apache.lucene.wordnet;
  * limitations under the License.
  */
 
-import java.io.File;
 import java.io.IOException;
 import java.io.StringReader;
 import java.util.HashSet;
@@ -28,7 +27,6 @@ import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.IndexReader;
@@ -41,8 +39,6 @@ import org.apache.lucene.search.IndexSea
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Scorer;
 import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.store.FSDirectory;
-import org.apache.lucene.util.Version;
 
 
 /**
@@ -54,48 +50,13 @@ import org.apache.lucene.util.Version;
 public final class SynExpand {
 
 	/**
-	 * Test driver for synonym expansion.
-	 * Uses boost factor of 0.9 for illustrative purposes.
-	 *
-	 * If you pass in the query "big dog" then it prints out:
-	 *
-	 * <code><pre>
-	 * Query: big adult^0.9 bad^0.9 bighearted^0.9 boastful^0.9 boastfully^0.9 bounteous^0.9
bountiful^0.9 braggy^0.9 crowing^0.9 freehanded^0.9 giving^0.9 grown^0.9 grownup^0.9 handsome^0.9
large^0.9 liberal^0.9 magnanimous^0.9 momentous^0.9 openhanded^0.9 prominent^0.9 swelled^0.9
vainglorious^0.9 vauntingly^0.9
-	 * dog andiron^0.9 blackguard^0.9 bounder^0.9 cad^0.9 chase^0.9 click^0.9 detent^0.9 dogtooth^0.9
firedog^0.9 frank^0.9 frankfurter^0.9 frump^0.9 heel^0.9 hotdog^0.9 hound^0.9 pawl^0.9 tag^0.9
tail^0.9 track^0.9 trail^0.9 weenie^0.9 wiener^0.9 wienerwurst^0.9
-	 * </pre></code>
-	 */
-	public static void main(String[] args) throws IOException
-	{
-		if (args.length != 2)
-		{
-			System.out.println(
-							   "java org.apache.lucene.wordnet.SynExpand <index path> <query>");
-		}
-
-		FSDirectory directory = FSDirectory.open(new File(args[0]));
-		IndexSearcher searcher = new IndexSearcher(directory, true);
-
-		String query = args[1];
-		String field = "contents";
-
-		Query q = expand( query, searcher, new StandardAnalyzer(Version.LUCENE_CURRENT), field,
0.9f);
-		System.out.println( "Query: " + q.toString( field));
-
-
-
-		searcher.close();
-		directory.close();
-	}
-
-
-	/**
 	 * Perform synonym expansion on a query.
 	 *
 	 * @param query users query that is assumed to not have any "special" query syntax, thus
it should be just normal words, so "big dog" makes sense, but a query like "title:foo^1.2"
doesn't as this should presumably be passed directly to the default query parser.
 	 *
 	 * @param syns a opened to the Lucene index you previously created with {@link Syns2Index}.
The searcher is not closed or otherwise altered.
 	 *
-	 * @param a optional analyzer used to parse the users query else {@link StandardAnalyzer}
is used
+	 * @param a analyzer used to parse the users query.
 	 *
 	 * @param f optional field name to search in or null if you want the default of "contents"
 	 *
@@ -113,7 +74,6 @@ public final class SynExpand {
 		final Set<String> already = new HashSet<String>(); // avoid dups 
 		List<String> top = new LinkedList<String>(); // needs to be separately listed..
 		final String field = ( f == null) ? "contents" : f;
-		if ( a == null) a = new StandardAnalyzer(Version.LUCENE_CURRENT);
 
 		// [1] Parse query into separate words so that when we expand we can avoid dups
 		TokenStream ts = a.reusableTokenStream( field, new StringReader( query));

Modified: lucene/dev/trunk/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynLookup.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynLookup.java?rev=1140394&r1=1140393&r2=1140394&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynLookup.java
(original)
+++ lucene/dev/trunk/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynLookup.java
Tue Jun 28 01:44:15 2011
@@ -41,6 +41,7 @@ import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.Scorer;
 import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TotalHitCountCollector;
 import org.apache.lucene.store.FSDirectory;
 
 
@@ -48,24 +49,6 @@ import org.apache.lucene.store.FSDirecto
  * Test program to look up synonyms.
  */
 public class SynLookup {
-
-  final static class CountingCollector extends Collector {
-    public int numHits = 0;
-    
-    @Override
-    public void setScorer(Scorer scorer) throws IOException {}
-    @Override
-    public void collect(int doc) throws IOException {
-      numHits++;
-    }
-
-    @Override
-    public void setNextReader(AtomicReaderContext context) {}
-    @Override
-    public boolean acceptsDocsOutOfOrder() {
-      return true;
-    }    
-  }
   
 	public static void main(String[] args) throws IOException {
 		if (args.length != 2) {
@@ -78,16 +61,16 @@ public class SynLookup {
 
 		String word = args[1];
 		Query query = new TermQuery(new Term(Syns2Index.F_WORD, word));
-		CountingCollector countingCollector = new CountingCollector();
+		TotalHitCountCollector countingCollector = new TotalHitCountCollector();
 		searcher.search(query, countingCollector);
 
-		if (countingCollector.numHits == 0) {
+		if (countingCollector.getTotalHits() == 0) {
 			System.out.println("No synonyms found for " + word);
 		} else {
 			System.out.println("Synonyms found for \"" + word + "\":");
 		}
 
-		ScoreDoc[] hits = searcher.search(query, countingCollector.numHits).scoreDocs;
+		ScoreDoc[] hits = searcher.search(query, countingCollector.getTotalHits()).scoreDocs;
 		
 		for (int i = 0; i < hits.length; i++) {
 			Document doc = searcher.doc(hits[i].doc);

Modified: lucene/dev/trunk/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java?rev=1140394&r1=1140393&r2=1140394&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java
(original)
+++ lucene/dev/trunk/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java
Tue Jun 28 01:44:15 2011
@@ -22,6 +22,7 @@ import java.io.File;
 import java.io.FileInputStream;
 import java.io.InputStreamReader;
 import java.io.PrintStream;
+import java.io.Reader;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
@@ -31,7 +32,7 @@ import java.util.TreeMap;
 import java.util.TreeSet;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexWriter;
@@ -90,9 +91,15 @@ public class Syns2Index
 	public static final String F_WORD = "word";
 
 	/**
-	 *
+	 * we don't actually analyze any text (only a NOT_ANALYZED field),
+	 * but analyzer can't be null, docinverter wants the offset gap!
 	 */
-    private static final Analyzer ana = new StandardAnalyzer(Version.LUCENE_CURRENT);
+    private static final Analyzer ana = new Analyzer() {
+      @Override
+      public TokenStream tokenStream(String fieldName, Reader reader) {
+        return null;
+      }
+    };
 
     /**
      * Takes arg of prolog file name and index directory.

Modified: lucene/dev/trunk/modules/suggest/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/build.xml?rev=1140394&r1=1140393&r2=1140394&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/build.xml (original)
+++ lucene/dev/trunk/modules/suggest/build.xml Tue Jun 28 01:44:15 2011
@@ -29,21 +29,5 @@
 
   <import file="../../lucene/contrib/contrib-build.xml"/>
 
-  <module-uptodate name="analysis/common" jarfile="${common.dir}/../modules/analysis/build/common/lucene-analyzers-common-${version}.jar"
-      property="analyzers-common.uptodate" classpath.property="analyzers-common.jar"/>
-
-  <path id="classpath">
-	 <pathelement path="${analyzers-common.jar}"/>
-	 <path refid="base.classpath"/>
-  </path>
-
-  <target name="compile-core" depends="compile-analyzers-common, common.compile-core"
/>
-
-  <target name="compile-analyzers-common" unless="analyzers-common.uptodate">
-    <subant target="default">
-      <fileset dir="${common.dir}/../modules/analysis/common" includes="build.xml"/>
-    </subant>
-  </target>
-
   <target name="dist-maven" depends="jar-core,javadocs,contrib-build.dist-maven" />
 </project>

Modified: lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java?rev=1140394&r1=1140393&r2=1140394&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java
(original)
+++ lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java
Tue Jun 28 01:44:15 2011
@@ -18,12 +18,14 @@ package org.apache.lucene.search.spell;
  */
 
 import java.io.IOException;
+import java.io.Reader;
 import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.Iterator;
 import java.util.List;
 
-import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexReader;
@@ -112,6 +114,17 @@ public class SpellChecker implements jav
 
   private StringDistance sd;
   private Comparator<SuggestWord> comparator;
+  
+  /** we don't need to actually analyze any content:
+   *  all fields are indexed NOT_ANALYZED, but docsinverter
+   *  needs this for the offset gap!
+   */
+  private static Analyzer noAnalyzer = new Analyzer() {
+    @Override
+    public TokenStream tokenStream(String fieldName, Reader reader) {
+      return null;
+    }
+  };
 
   /**
    * Use the given directory as a spell checker index. The directory
@@ -168,7 +181,7 @@ public class SpellChecker implements jav
       if (!IndexReader.indexExists(spellIndexDir)) {
           IndexWriter writer = new IndexWriter(spellIndexDir,
             new IndexWriterConfig(Version.LUCENE_CURRENT,
-                new WhitespaceAnalyzer(Version.LUCENE_CURRENT)));
+                noAnalyzer));
           writer.close();
       }
       swapSearcher(spellIndexDir);
@@ -466,7 +479,7 @@ public class SpellChecker implements jav
       final Directory dir = this.spellIndex;
       final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
           Version.LUCENE_CURRENT,
-          new WhitespaceAnalyzer(Version.LUCENE_CURRENT))
+          noAnalyzer)
           .setOpenMode(OpenMode.CREATE));
       writer.close();
       swapSearcher(dir);
@@ -503,7 +516,7 @@ public class SpellChecker implements jav
     synchronized (modifyCurrentIndexLock) {
       ensureOpen();
       final Directory dir = this.spellIndex;
-      final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT,
new WhitespaceAnalyzer(Version.LUCENE_CURRENT)).setRAMBufferSizeMB(ramMB));
+      final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT,
noAnalyzer).setRAMBufferSizeMB(ramMB));
       ((TieredMergePolicy) writer.getConfig().getMergePolicy()).setMaxMergeAtOnce(mergeFactor);
       IndexSearcher indexSearcher = obtainSearcher();
       final List<TermsEnum> termsEnums = new ArrayList<TermsEnum>();



Mime
View raw message