lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rm...@apache.org
Subject svn commit: r1130612 - in /lucene/dev/branches/branch_3x: ./ lucene/ lucene/backwards/ lucene/contrib/ lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/ lucene/contrib/spellchecker/src/java/org/apache/lucene/search/suggest/ lucene/co...
Date Thu, 02 Jun 2011 15:51:28 GMT
Author: rmuir
Date: Thu Jun  2 15:51:21 2011
New Revision: 1130612

URL: http://svn.apache.org/viewvc?rev=1130612&view=rev
Log:
LUCENE-3135, SOLR-2378: backport all suggester functionality

Added:
    lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java
      - copied, changed from r1130570, lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java
    lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SortedIterator.java
      - copied unchanged from r1130570, lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/SortedIterator.java
    lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/TermFreqIterator.java
      - copied unchanged from r1130571, lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/TermFreqIterator.java
    lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/suggest/
      - copied from r1130567, lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/
    lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/test/org/apache/lucene/search/suggest/
      - copied from r1130569, lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/suggest/LookupFactory.java
      - copied unchanged from r1130598, lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/LookupFactory.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/suggest/fst/
      - copied from r1130598, lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/fst/
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellLookupFactory.java
      - copied unchanged from r1130598, lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellLookupFactory.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/suggest/tst/TSTLookupFactory.java
      - copied unchanged from r1130598, lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/tst/TSTLookupFactory.java
    lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/suggest/SuggesterFSTTest.java
      - copied unchanged from r1130601, lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/suggest/SuggesterFSTTest.java
    lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/suggest/SuggesterTSTTest.java
      - copied unchanged from r1130601, lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/suggest/SuggesterTSTTest.java
Removed:
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/suggest/BufferingTermFreqIteratorWrapper.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/suggest/FileDictionary.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/suggest/Lookup.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/suggest/SortedTermFreqIteratorWrapper.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/suggest/UnsortedTermFreqIteratorWrapper.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellLookup.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellTernarySearchTrie.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/suggest/tst/TSTAutocomplete.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/suggest/tst/TSTLookup.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/suggest/tst/TernaryTreeNode.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/util/HighFrequencyDictionary.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/util/SortedIterator.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/util/TermFreqIterator.java
    lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/suggest/PersistenceTest.java
Modified:
    lucene/dev/branches/branch_3x/   (props changed)
    lucene/dev/branches/branch_3x/lucene/   (props changed)
    lucene/dev/branches/branch_3x/lucene/backwards/   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt
    lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/suggest/Lookup.java
    lucene/dev/branches/branch_3x/solr/   (props changed)
    lucene/dev/branches/branch_3x/solr/CHANGES.txt
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/SpellCheckerRequestHandler.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/FileBasedSpellChecker.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/IndexBasedSpellChecker.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/suggest/Suggester.java
    lucene/dev/branches/branch_3x/solr/src/test-files/solr/conf/solrconfig-spellchecker.xml
    lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/suggest/SuggesterTest.java

Modified: lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt?rev=1130612&r1=1130611&r2=1130612&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt Thu Jun  2 15:51:21 2011
@@ -9,6 +9,10 @@ New Features
 
  * LUCENE-152: Add KStem (light stemmer for English).
    (Yonik Seeley via Robert Muir)
+
+ * LUCENE-3135: Add suggesters (autocomplete) to contrib/spellchecker,
+   with three implementations: Jaspell, Ternary Trie, and Finite State.
+   (Andrzej Bialecki, Dawid Weiss, Mike Mccandless, Robert Muir)
  
 API Changes
 

Copied: lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java
(from r1130570, lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java?p2=lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java&p1=lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java&r1=1130570&r2=1130612&rev=1130612&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java
(original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java
Thu Jun  2 15:51:21 2011
@@ -21,19 +21,19 @@ import java.io.IOException;
 import java.util.Iterator;
 
 import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.index.Terms;
-import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermEnum;
 import org.apache.lucene.search.spell.Dictionary;
-import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.StringHelper;
-import org.apache.lucene.util.BytesRef;
 
 /**
  * HighFrequencyDictionary: terms taken from the given field
  * of a Lucene index, which appear in a number of documents
  * above a given threshold.
  *
+ * When using IndexReader.terms(Term) the code must not call next() on TermEnum
+ * as the first call to TermEnum, see: http://issues.apache.org/jira/browse/LUCENE-6
+ *
  * Threshold is a value in [0..1] representing the minimum
  * number of documents (of the total) where a term should appear.
  * 
@@ -43,7 +43,6 @@ public class HighFrequencyDictionary imp
   private IndexReader reader;
   private String field;
   private float thresh;
-  private final CharsRef spare = new CharsRef();
 
   public HighFrequencyDictionary(IndexReader reader, String field, float thresh) {
     this.reader = reader;
@@ -55,77 +54,86 @@ public class HighFrequencyDictionary imp
     return new HighFrequencyIterator();
   }
 
-  final class HighFrequencyIterator implements TermFreqIterator, SortedIterator {
-    private TermsEnum termsEnum;
-    private BytesRef actualTerm;
+  final class HighFrequencyIterator implements TermFreqIterator {
+    private TermEnum termEnum;
+    private Term actualTerm;
+    private int actualFreq;
     private boolean hasNextCalled;
     private int minNumDocs;
 
     HighFrequencyIterator() {
       try {
-        Terms terms = MultiFields.getTerms(reader, field);
-        if (terms != null) {
-          termsEnum = terms.iterator();
-        }
+        termEnum = reader.terms(new Term(field, ""));
         minNumDocs = (int)(thresh * (float)reader.numDocs());
       } catch (IOException e) {
         throw new RuntimeException(e);
       }
     }
 
-    private boolean isFrequent(int freq) {
-      return freq >= minNumDocs;
-    }
-    
-    public float freq() {
+    private boolean isFrequent(Term term) {
       try {
-        return termsEnum.docFreq();
-      } catch (IOException ioe) {
-        throw new RuntimeException(ioe);
+        return reader.docFreq(term) >= minNumDocs;
+      } catch (IOException e) {
+        throw new RuntimeException(e);
       }
     }
-    
+
     public String next() {
-      if (!hasNextCalled && !hasNext()) {
-        return null;
+      if (!hasNextCalled) {
+        hasNext();
       }
       hasNextCalled = false;
 
-      return (actualTerm != null) ? actualTerm.utf8ToChars(spare).toString() : null;
+      try {
+        termEnum.next();
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+
+      return (actualTerm != null) ? actualTerm.text() : null;
+    }
+
+    public float freq() {
+      return actualFreq;
     }
 
+
     public boolean hasNext() {
       if (hasNextCalled) {
         return actualTerm != null;
       }
       hasNextCalled = true;
 
-      if (termsEnum == null) {
-        return false;
-      }
-
-      while(true) {
-
-        try {
-          actualTerm = termsEnum.next();
-        } catch (IOException e) {
-          throw new RuntimeException(e);
-        }
+      do {
+        actualTerm = termEnum.term();
+        actualFreq = termEnum.docFreq();
 
         // if there are no words return false
         if (actualTerm == null) {
           return false;
         }
 
+        String currentField = actualTerm.field();
+
+        // if the next word doesn't have the same field return false
+        if (currentField != field) {   // intern'd comparison
+          actualTerm = null;
+          return false;
+        }
+
         // got a valid term, does it pass the threshold?
+        if (isFrequent(actualTerm)) {
+          return true;
+        }
+
+        // term not up to threshold
         try {
-          if (isFrequent(termsEnum.docFreq())) {
-            return true;
-          }
-        } catch (IOException ioe) {
-          throw new RuntimeException(ioe);
+          termEnum.next();
+        } catch (IOException e) {
+          throw new RuntimeException(e);
         }
-      }
+
+      } while (true);
     }
 
     public void remove() {

Modified: lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/suggest/Lookup.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/suggest/Lookup.java?rev=1130612&r1=1130567&r2=1130612&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/suggest/Lookup.java
(original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/suggest/Lookup.java
Thu Jun  2 15:51:21 2011
@@ -36,7 +36,7 @@ public abstract class Lookup {
   public static final class LookupPriorityQueue extends PriorityQueue<LookupResult>
{
     
     public LookupPriorityQueue(int size) {
-      super(size);
+      initialize(size);
     }
 
     @Override

Modified: lucene/dev/branches/branch_3x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/CHANGES.txt?rev=1130612&r1=1130611&r2=1130612&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/solr/CHANGES.txt Thu Jun  2 15:51:21 2011
@@ -23,6 +23,14 @@ on how to get started.
 
 ======================= 3.x (not yet released) ================
 
+New Features
+----------------------
+
+* SOLR-2378: A new, automaton-based, implementation of suggest (autocomplete)
+  component, offering an order of magnitude smaller memory consumption
+  compared to ternary trees and jaspell and very fast lookups at runtime.
+  (Dawid Weiss)
+
 Bug Fixes
 ----------------------
 

Modified: lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/SpellCheckerRequestHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/SpellCheckerRequestHandler.java?rev=1130612&r1=1130611&r2=1130612&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/SpellCheckerRequestHandler.java
(original)
+++ lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/SpellCheckerRequestHandler.java
Thu Jun  2 15:51:21 2011
@@ -22,6 +22,7 @@ import org.apache.lucene.index.Term;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.spell.Dictionary;
 import org.apache.lucene.search.spell.SpellChecker;
+import org.apache.lucene.search.spell.HighFrequencyDictionary;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.store.RAMDirectory;
@@ -32,7 +33,6 @@ import org.apache.solr.common.params.Sol
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.SimpleOrderedMap;
 import org.apache.solr.core.SolrCore;
-import org.apache.solr.util.HighFrequencyDictionary;
 import org.apache.solr.util.plugin.SolrCoreAware;
 
 import java.io.File;

Modified: lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/FileBasedSpellChecker.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/FileBasedSpellChecker.java?rev=1130612&r1=1130611&r2=1130612&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/FileBasedSpellChecker.java
(original)
+++ lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/FileBasedSpellChecker.java
Thu Jun  2 15:51:21 2011
@@ -27,11 +27,12 @@ import org.slf4j.LoggerFactory;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.search.spell.PlainTextDictionary;
+import org.apache.lucene.search.spell.HighFrequencyDictionary;
 import org.apache.lucene.store.RAMDirectory;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.schema.FieldType;
-import org.apache.solr.util.HighFrequencyDictionary;
+
 import org.apache.solr.search.SolrIndexSearcher;
 
 /**

Modified: lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/IndexBasedSpellChecker.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/IndexBasedSpellChecker.java?rev=1130612&r1=1130611&r2=1130612&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/IndexBasedSpellChecker.java
(original)
+++ lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/IndexBasedSpellChecker.java
Thu Jun  2 15:51:21 2011
@@ -21,7 +21,7 @@ import org.apache.lucene.store.FSDirecto
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.search.SolrIndexSearcher;
-import org.apache.solr.util.HighFrequencyDictionary;
+import org.apache.lucene.search.spell.HighFrequencyDictionary;
 
 import java.io.File;
 import java.io.IOException;

Modified: lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/suggest/Suggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/suggest/Suggester.java?rev=1130612&r1=1130611&r2=1130612&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/suggest/Suggester.java
(original)
+++ lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/suggest/Suggester.java
Thu Jun  2 15:51:21 2011
@@ -1,4 +1,4 @@
-/*
+/**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -28,15 +28,20 @@ import java.util.List;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.search.spell.Dictionary;
+import org.apache.lucene.search.spell.HighFrequencyDictionary;
+import org.apache.lucene.search.suggest.FileDictionary;
+import org.apache.lucene.search.suggest.Lookup;
+import org.apache.lucene.search.suggest.Lookup.LookupResult;
+
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.search.SolrIndexSearcher;
 import org.apache.solr.spelling.SolrSpellChecker;
 import org.apache.solr.spelling.SpellingOptions;
 import org.apache.solr.spelling.SpellingResult;
-import org.apache.solr.spelling.suggest.Lookup.LookupResult;
-import org.apache.solr.spelling.suggest.jaspell.JaspellLookup;
-import org.apache.solr.util.HighFrequencyDictionary;
+import org.apache.solr.spelling.suggest.fst.FSTLookupFactory;
+import org.apache.solr.spelling.suggest.jaspell.JaspellLookupFactory;
+import org.apache.solr.spelling.suggest.tst.TSTLookupFactory;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -81,11 +86,18 @@ public class Suggester extends SolrSpell
     sourceLocation = (String) config.get(LOCATION);
     field = (String)config.get(FIELD);
     lookupImpl = (String)config.get(LOOKUP_IMPL);
-    if (lookupImpl == null) {
-      lookupImpl = JaspellLookup.class.getName();
+
+    // support the old classnames without -Factory for config file backwards compatibility.
+    if (lookupImpl == null || "org.apache.solr.spelling.suggest.jaspell.JaspellLookup".equals(lookupImpl))
{
+      lookupImpl = JaspellLookupFactory.class.getName();
+    } else if ("org.apache.solr.spelling.suggest.tst.TSTLookup".equals(lookupImpl)) {
+      lookupImpl = TSTLookupFactory.class.getName();
+    } else if ("org.apache.solr.spelling.suggest.fst.FSTLookup".equals(lookupImpl)) {
+      lookupImpl = FSTLookupFactory.class.getName();
     }
-    lookup = (Lookup) core.getResourceLoader().newInstance(lookupImpl);
-    lookup.init(config, core);
+
+    LookupFactory factory = (LookupFactory) core.getResourceLoader().newInstance(lookupImpl);
+    lookup = factory.create(config, core);
     String store = (String)config.get(STORE_DIR);
     if (store != null) {
       storeDir = new File(store);
@@ -110,7 +122,7 @@ public class Suggester extends SolrSpell
   public void build(SolrCore core, SolrIndexSearcher searcher) {
     LOG.info("build()");
     if (sourceLocation == null) {
-      reader = searcher.getReader();
+      reader = searcher.getIndexReader();
       dictionary = new HighFrequencyDictionary(reader, field, threshold);
     } else {
       try {
@@ -152,7 +164,6 @@ public class Suggester extends SolrSpell
   
   static SpellingResult EMPTY_RESULT = new SpellingResult();
 
-
   @Override
   public SpellingResult getSuggestions(Collection<Token> tokens, IndexReader reader,
int count, boolean onlyMorePopular, boolean extendedResults) throws IOException {
     return getSuggestions(new SpellingOptions(tokens, reader, count, onlyMorePopular, extendedResults,
Float.MIN_VALUE, null));

Modified: lucene/dev/branches/branch_3x/solr/src/test-files/solr/conf/solrconfig-spellchecker.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/test-files/solr/conf/solrconfig-spellchecker.xml?rev=1130612&r1=1130611&r2=1130612&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/test-files/solr/conf/solrconfig-spellchecker.xml
(original)
+++ lucene/dev/branches/branch_3x/solr/src/test-files/solr/conf/solrconfig-spellchecker.xml
Thu Jun  2 15:51:21 2011
@@ -94,6 +94,52 @@
    </requestHandler>
 
   <!-- Suggest component -->
+  <searchComponent class="solr.SpellCheckComponent" name="suggest_jaspell">
+    <lst name="spellchecker">
+      <str name="name">suggest</str>
+      <str name="classname">org.apache.solr.spelling.suggest.Suggester</str>
+      <str name="lookupImpl">org.apache.solr.spelling.suggest.jaspell.JaspellLookup</str>
+      <str name="field">suggest</str>
+      <str name="storeDir">suggest</str>
+      <str name="buildOnCommit">true</str>
+
+      <!-- Suggester properties -->
+      <float name="threshold">0.0</float>
+    </lst>
+  </searchComponent>
+
+  <!-- TSTLookup suggest component -->
+  <searchComponent class="solr.SpellCheckComponent" name="suggest_tst">
+    <lst name="spellchecker">
+      <str name="name">suggest_tst</str>
+      <str name="classname">org.apache.solr.spelling.suggest.Suggester</str>
+      <str name="lookupImpl">org.apache.solr.spelling.suggest.tst.TSTLookup</str>
+      <str name="field">suggest</str>
+      <str name="storeDir">suggest_tst</str>
+      <str name="buildOnCommit">true</str>
+
+      <!-- Suggester properties -->
+      <float name="threshold">0.0</float>
+    </lst>
+  </searchComponent>
+
+  <!-- FSTLookup suggest component -->
+  <searchComponent class="solr.SpellCheckComponent" name="suggest_fst">
+    <lst name="spellchecker">
+      <str name="name">suggest_fst</str>
+      <str name="classname">org.apache.solr.spelling.suggest.Suggester</str>
+      <str name="lookupImpl">org.apache.solr.spelling.suggest.fst.FSTLookup</str>
+      <str name="field">suggest</str>
+      <str name="storeDir">suggest_fst</str>
+      <str name="buildOnCommit">true</str>
+
+      <!-- Suggester properties -->
+      <int name="weightBuckets">5</int>
+      <bool name="exactMatchFirst">true</bool>
+    </lst>
+  </searchComponent>
+
+  <!-- Suggest component -->
   <searchComponent class="solr.SpellCheckComponent" name="suggest">
     <lst name="spellchecker">
       <str name="name">suggest</str>
@@ -110,6 +156,8 @@
 -->
     </lst>
   </searchComponent>
+
+  <!--  The default (jaspell) -->
   <requestHandler class="org.apache.solr.handler.component.SearchHandler" name="/suggest">
     <lst name="defaults">
       <str name="spellcheck">true</str>
@@ -117,10 +165,33 @@
       <str name="spellcheck.collate">true</str>
     </lst>
     <arr name="components">
-      <str>suggest</str>
+      <str>suggest_jaspell</str>
     </arr>
   </requestHandler>
 
+  <!--  tst (ternary tree based) -->
+  <requestHandler class="org.apache.solr.handler.component.SearchHandler" name="/suggest_tst">
+    <lst name="defaults">
+      <str name="spellcheck">true</str>
+      <str name="spellcheck.dictionary">suggest_tst</str>
+      <str name="spellcheck.collate">true</str>
+    </lst>
+    <arr name="components">
+      <str>suggest_tst</str>
+    </arr>
+  </requestHandler>
+  
+  <!--  fst (finite state automaton based) -->
+  <requestHandler class="org.apache.solr.handler.component.SearchHandler" name="/suggest_fst">
+    <lst name="defaults">
+      <str name="spellcheck">true</str>
+      <str name="spellcheck.dictionary">suggest_fst</str>
+      <str name="spellcheck.collate">false</str>
+    </lst>
+    <arr name="components">
+      <str>suggest_fst</str>
+    </arr>
+  </requestHandler>
   
 
   <queryResponseWriter name="standard" class="solr.XMLResponseWriter"/>

Modified: lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/suggest/SuggesterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/suggest/SuggesterTest.java?rev=1130612&r1=1130611&r2=1130612&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/suggest/SuggesterTest.java
(original)
+++ lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/suggest/SuggesterTest.java
Thu Jun  2 15:51:21 2011
@@ -1,4 +1,4 @@
-/*
+/**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -17,22 +17,19 @@
 
 package org.apache.solr.spelling.suggest;
 
-import org.apache.lucene.util.RamUsageEstimator;
+import java.io.File;
+
 import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.common.params.SpellingParams;
-import org.apache.solr.spelling.suggest.Lookup.LookupResult;
-import org.apache.solr.spelling.suggest.jaspell.JaspellLookup;
-import org.apache.solr.spelling.suggest.tst.TSTLookup;
-import org.apache.solr.util.TermFreqIterator;
 import org.junit.BeforeClass;
 import org.junit.Test;
-import java.io.File;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
 
 public class SuggesterTest extends SolrTestCaseJ4 {
+  /**
+   * Expected URI at which the given suggester will live.
+   */
+  protected String requestUri = "/suggest";
+
   @BeforeClass
   public static void beforeClass() throws Exception {
     initCore("solrconfig-spellchecker.xml","schema-spellchecker.xml");
@@ -53,10 +50,9 @@ public class SuggesterTest extends SolrT
   @Test
   public void testSuggestions() throws Exception {
     addDocs();
-
     assertU(commit()); // configured to do a rebuild on commit
 
-    assertQ(req("qt","/suggest", "q","ac", SpellingParams.SPELLCHECK_COUNT, "2", SpellingParams.SPELLCHECK_ONLY_MORE_POPULAR,
"true"),
+    assertQ(req("qt", requestUri, "q", "ac", SpellingParams.SPELLCHECK_COUNT, "2", SpellingParams.SPELLCHECK_ONLY_MORE_POPULAR,
"true"),
         "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='ac']/int[@name='numFound'][.='2']",
         "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='ac']/arr[@name='suggestion']/str[1][.='acquire']",
         "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='ac']/arr[@name='suggestion']/str[2][.='accommodate']"
@@ -76,12 +72,12 @@ public class SuggesterTest extends SolrT
     dataDir = data;
     configString = config;
     initCore();
-    assertQ(req("qt","/suggest", "q","ac", SpellingParams.SPELLCHECK_COUNT, "2", SpellingParams.SPELLCHECK_ONLY_MORE_POPULAR,
"true"),
+    assertQ(req("qt", requestUri, "q", "ac", SpellingParams.SPELLCHECK_COUNT, "2", SpellingParams.SPELLCHECK_ONLY_MORE_POPULAR,
"true"),
             "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='ac']/int[@name='numFound'][.='2']",
             "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='ac']/arr[@name='suggestion']/str[1][.='acquire']",
             "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='ac']/arr[@name='suggestion']/str[2][.='accommodate']"
         );
-    
+
     // restore the property
     System.setProperty("solr.test.leavedatadir", leaveData);
   }
@@ -90,136 +86,13 @@ public class SuggesterTest extends SolrT
   public void testRebuild() throws Exception {
     addDocs();
     assertU(commit());
-    assertQ(req("qt","/suggest", "q","ac", SpellingParams.SPELLCHECK_COUNT, "2", SpellingParams.SPELLCHECK_ONLY_MORE_POPULAR,
"true"),
-        "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='ac']/int[@name='numFound'][.='2']");
+    assertQ(req("qt", requestUri, "q", "ac", SpellingParams.SPELLCHECK_COUNT, "2", SpellingParams.SPELLCHECK_ONLY_MORE_POPULAR,
"true"),
+      "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='ac']/int[@name='numFound'][.='2']");
     assertU(adoc("id", "4",
         "text", "actually"
        ));
     assertU(commit());
-    assertQ(req("qt","/suggest", "q","ac", SpellingParams.SPELLCHECK_COUNT, "2", SpellingParams.SPELLCHECK_ONLY_MORE_POPULAR,
"true"),
-    "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='ac']/int[@name='numFound'][.='2']");
-  }
-
-  
-  private TermFreqIterator getTFIT() {
-    final int count = 100000;
-    TermFreqIterator tfit = new TermFreqIterator() {
-      Random r = new Random(1234567890L);
-      Random r1 = new Random(1234567890L);
-      int pos;
-
-      public float freq() {
-        return r1.nextInt(4);
-      }
-
-      public boolean hasNext() {
-        return pos < count;
-      }
-
-      public String next() {
-        pos++;
-        return Long.toString(r.nextLong());
-      }
-
-      public void remove() {
-        throw new UnsupportedOperationException();
-      }
-      
-    };
-    return tfit;
-  }
-  
-  private void _benchmark(Lookup lookup, Map<String,Integer> ref, boolean estimate,
Bench bench) throws Exception {
-    long start = System.currentTimeMillis();
-    lookup.build(getTFIT());
-    long buildTime = System.currentTimeMillis() - start;
-    TermFreqIterator tfit = getTFIT();
-    long elapsed = 0;
-    while (tfit.hasNext()) {
-      String key = tfit.next();
-      // take only the first part of the key
-      int len = key.length() > 4 ? key.length() / 3 : 2;
-      String prefix = key.substring(0, len);
-      start = System.nanoTime();
-      List<LookupResult> res = lookup.lookup(prefix, true, 10);
-      elapsed += System.nanoTime() - start;
-      assertTrue(res.size() > 0);
-      for (LookupResult lr : res) {
-        assertTrue(lr.key.startsWith(prefix));
-      }
-      if (ref != null) { // verify the counts
-        Integer Cnt = ref.get(key);
-        if (Cnt == null) { // first pass
-          ref.put(key, res.size());
-        } else {
-          assertEquals(key + ", prefix: " + prefix, Cnt.intValue(), res.size());
-        }
-      }
-    }
-    if (estimate) {
-      RamUsageEstimator rue = new RamUsageEstimator();
-      long size = rue.estimateRamUsage(lookup);
-      System.err.println(lookup.getClass().getSimpleName() + " - size=" + size);
-    }
-    if (bench != null) {
-      bench.buildTime += buildTime;
-      bench.lookupTime +=  elapsed;
-    }
-  }
-  
-  class Bench {
-    long buildTime;
-    long lookupTime;
-  }
-
-  @Test
-  public void testBenchmark() throws Exception {
-    // this benchmark is very time consuming
-    boolean doTest = false;
-    if (!doTest) {
-      return;
-    }
-    Map<String,Integer> ref = new HashMap<String,Integer>();
-    JaspellLookup jaspell = new JaspellLookup();
-    TSTLookup tst = new TSTLookup();
-    
-    _benchmark(tst, ref, true, null);
-    _benchmark(jaspell, ref, true, null);
-    jaspell = null;
-    tst = null;
-    int count = 100;
-    Bench b = runBenchmark(JaspellLookup.class, count);
-    System.err.println(JaspellLookup.class.getSimpleName() + ": buildTime[ms]=" + (b.buildTime
/ count) +
-            " lookupTime[ms]=" + (b.lookupTime / count / 1000000));
-    b = runBenchmark(TSTLookup.class, count);
-    System.err.println(TSTLookup.class.getSimpleName() + ": buildTime[ms]=" + (b.buildTime
/ count) +
-            " lookupTime[ms]=" + (b.lookupTime / count / 1000000));
-  }
-  
-  private Bench runBenchmark(Class<? extends Lookup> cls, int count) throws Exception
{
-    System.err.println("* Running " + count + " iterations for " + cls.getSimpleName() +
" ...");
-    System.err.println("  - warm-up 10 iterations...");
-    for (int i = 0; i < 10; i++) {
-      System.runFinalization();
-      System.gc();
-      Lookup lookup = cls.newInstance();
-      _benchmark(lookup, null, false, null);
-      lookup = null;
-    }
-    Bench b = new Bench();
-    System.err.print("  - main iterations:"); System.err.flush();
-    for (int i = 0; i < count; i++) {
-      System.runFinalization();
-      System.gc();
-      Lookup lookup = cls.newInstance();
-      _benchmark(lookup, null, false, b);
-      lookup = null;
-      if (i > 0 && (i % 10 == 0)) {
-        System.err.print(" " + i);
-        System.err.flush();
-      }
-    }
-    System.err.println();
-    return b;
+    assertQ(req("qt", requestUri, "q", "ac", SpellingParams.SPELLCHECK_COUNT, "2", SpellingParams.SPELLCHECK_ONLY_MORE_POPULAR,
"true"),
+      "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='ac']/int[@name='numFound'][.='2']");
   }
 }



Mime
View raw message