Author: rmuir
Date: Thu Jun 2 15:51:21 2011
New Revision: 1130612
URL: http://svn.apache.org/viewvc?rev=1130612&view=rev
Log:
LUCENE-3135, SOLR-2378: backport all suggester functionality
Added:
lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java
- copied, changed from r1130570, lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java
lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SortedIterator.java
- copied unchanged from r1130570, lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/SortedIterator.java
lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/TermFreqIterator.java
- copied unchanged from r1130571, lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/TermFreqIterator.java
lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/suggest/
- copied from r1130567, lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/
lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/test/org/apache/lucene/search/suggest/
- copied from r1130569, lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/suggest/LookupFactory.java
- copied unchanged from r1130598, lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/LookupFactory.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/suggest/fst/
- copied from r1130598, lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/fst/
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellLookupFactory.java
- copied unchanged from r1130598, lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellLookupFactory.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/suggest/tst/TSTLookupFactory.java
- copied unchanged from r1130598, lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/tst/TSTLookupFactory.java
lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/suggest/SuggesterFSTTest.java
- copied unchanged from r1130601, lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/suggest/SuggesterFSTTest.java
lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/suggest/SuggesterTSTTest.java
- copied unchanged from r1130601, lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/suggest/SuggesterTSTTest.java
Removed:
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/suggest/BufferingTermFreqIteratorWrapper.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/suggest/FileDictionary.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/suggest/Lookup.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/suggest/SortedTermFreqIteratorWrapper.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/suggest/UnsortedTermFreqIteratorWrapper.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellLookup.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellTernarySearchTrie.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/suggest/tst/TSTAutocomplete.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/suggest/tst/TSTLookup.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/suggest/tst/TernaryTreeNode.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/util/HighFrequencyDictionary.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/util/SortedIterator.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/util/TermFreqIterator.java
lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/suggest/PersistenceTest.java
Modified:
lucene/dev/branches/branch_3x/ (props changed)
lucene/dev/branches/branch_3x/lucene/ (props changed)
lucene/dev/branches/branch_3x/lucene/backwards/ (props changed)
lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt
lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/suggest/Lookup.java
lucene/dev/branches/branch_3x/solr/ (props changed)
lucene/dev/branches/branch_3x/solr/CHANGES.txt
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/SpellCheckerRequestHandler.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/FileBasedSpellChecker.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/IndexBasedSpellChecker.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/suggest/Suggester.java
lucene/dev/branches/branch_3x/solr/src/test-files/solr/conf/solrconfig-spellchecker.xml
lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/suggest/SuggesterTest.java
Modified: lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt?rev=1130612&r1=1130611&r2=1130612&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt Thu Jun 2 15:51:21 2011
@@ -9,6 +9,10 @@ New Features
* LUCENE-152: Add KStem (light stemmer for English).
(Yonik Seeley via Robert Muir)
+
+ * LUCENE-3135: Add suggesters (autocomplete) to contrib/spellchecker,
+ with three implementations: Jaspell, Ternary Trie, and Finite State.
+ (Andrzej Bialecki, Dawid Weiss, Mike Mccandless, Robert Muir)
API Changes
Copied: lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java
(from r1130570, lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java?p2=lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java&p1=lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java&r1=1130570&r2=1130612&rev=1130612&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java
(original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java
Thu Jun 2 15:51:21 2011
@@ -21,19 +21,19 @@ import java.io.IOException;
import java.util.Iterator;
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.index.Terms;
-import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermEnum;
import org.apache.lucene.search.spell.Dictionary;
-import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.StringHelper;
-import org.apache.lucene.util.BytesRef;
/**
* HighFrequencyDictionary: terms taken from the given field
* of a Lucene index, which appear in a number of documents
* above a given threshold.
*
+ * When using IndexReader.terms(Term) the code must not call next() on TermEnum
+ * as the first call to TermEnum, see: http://issues.apache.org/jira/browse/LUCENE-6
+ *
* Threshold is a value in [0..1] representing the minimum
* number of documents (of the total) where a term should appear.
*
@@ -43,7 +43,6 @@ public class HighFrequencyDictionary imp
private IndexReader reader;
private String field;
private float thresh;
- private final CharsRef spare = new CharsRef();
public HighFrequencyDictionary(IndexReader reader, String field, float thresh) {
this.reader = reader;
@@ -55,77 +54,86 @@ public class HighFrequencyDictionary imp
return new HighFrequencyIterator();
}
- final class HighFrequencyIterator implements TermFreqIterator, SortedIterator {
- private TermsEnum termsEnum;
- private BytesRef actualTerm;
+ final class HighFrequencyIterator implements TermFreqIterator {
+ private TermEnum termEnum;
+ private Term actualTerm;
+ private int actualFreq;
private boolean hasNextCalled;
private int minNumDocs;
HighFrequencyIterator() {
try {
- Terms terms = MultiFields.getTerms(reader, field);
- if (terms != null) {
- termsEnum = terms.iterator();
- }
+ termEnum = reader.terms(new Term(field, ""));
minNumDocs = (int)(thresh * (float)reader.numDocs());
} catch (IOException e) {
throw new RuntimeException(e);
}
}
- private boolean isFrequent(int freq) {
- return freq >= minNumDocs;
- }
-
- public float freq() {
+ private boolean isFrequent(Term term) {
try {
- return termsEnum.docFreq();
- } catch (IOException ioe) {
- throw new RuntimeException(ioe);
+ return reader.docFreq(term) >= minNumDocs;
+ } catch (IOException e) {
+ throw new RuntimeException(e);
}
}
-
+
public String next() {
- if (!hasNextCalled && !hasNext()) {
- return null;
+ if (!hasNextCalled) {
+ hasNext();
}
hasNextCalled = false;
- return (actualTerm != null) ? actualTerm.utf8ToChars(spare).toString() : null;
+ try {
+ termEnum.next();
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+
+ return (actualTerm != null) ? actualTerm.text() : null;
+ }
+
+ public float freq() {
+ return actualFreq;
}
+
public boolean hasNext() {
if (hasNextCalled) {
return actualTerm != null;
}
hasNextCalled = true;
- if (termsEnum == null) {
- return false;
- }
-
- while(true) {
-
- try {
- actualTerm = termsEnum.next();
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
+ do {
+ actualTerm = termEnum.term();
+ actualFreq = termEnum.docFreq();
// if there are no words return false
if (actualTerm == null) {
return false;
}
+ String currentField = actualTerm.field();
+
+ // if the next word doesn't have the same field return false
+ if (currentField != field) { // intern'd comparison
+ actualTerm = null;
+ return false;
+ }
+
// got a valid term, does it pass the threshold?
+ if (isFrequent(actualTerm)) {
+ return true;
+ }
+
+ // term not up to threshold
try {
- if (isFrequent(termsEnum.docFreq())) {
- return true;
- }
- } catch (IOException ioe) {
- throw new RuntimeException(ioe);
+ termEnum.next();
+ } catch (IOException e) {
+ throw new RuntimeException(e);
}
- }
+
+ } while (true);
}
public void remove() {
Modified: lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/suggest/Lookup.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/suggest/Lookup.java?rev=1130612&r1=1130567&r2=1130612&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/suggest/Lookup.java
(original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/suggest/Lookup.java
Thu Jun 2 15:51:21 2011
@@ -36,7 +36,7 @@ public abstract class Lookup {
public static final class LookupPriorityQueue extends PriorityQueue<LookupResult>
{
public LookupPriorityQueue(int size) {
- super(size);
+ initialize(size);
}
@Override
Modified: lucene/dev/branches/branch_3x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/CHANGES.txt?rev=1130612&r1=1130611&r2=1130612&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/solr/CHANGES.txt Thu Jun 2 15:51:21 2011
@@ -23,6 +23,14 @@ on how to get started.
======================= 3.x (not yet released) ================
+New Features
+----------------------
+
+* SOLR-2378: A new, automaton-based, implementation of suggest (autocomplete)
+ component, offering an order of magnitude smaller memory consumption
+ compared to ternary trees and jaspell and very fast lookups at runtime.
+ (Dawid Weiss)
+
Bug Fixes
----------------------
Modified: lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/SpellCheckerRequestHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/SpellCheckerRequestHandler.java?rev=1130612&r1=1130611&r2=1130612&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/SpellCheckerRequestHandler.java
(original)
+++ lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/SpellCheckerRequestHandler.java
Thu Jun 2 15:51:21 2011
@@ -22,6 +22,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.spell.Dictionary;
import org.apache.lucene.search.spell.SpellChecker;
+import org.apache.lucene.search.spell.HighFrequencyDictionary;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
@@ -32,7 +33,6 @@ import org.apache.solr.common.params.Sol
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.SolrCore;
-import org.apache.solr.util.HighFrequencyDictionary;
import org.apache.solr.util.plugin.SolrCoreAware;
import java.io.File;
Modified: lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/FileBasedSpellChecker.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/FileBasedSpellChecker.java?rev=1130612&r1=1130611&r2=1130612&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/FileBasedSpellChecker.java
(original)
+++ lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/FileBasedSpellChecker.java
Thu Jun 2 15:51:21 2011
@@ -27,11 +27,12 @@ import org.slf4j.LoggerFactory;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.search.spell.PlainTextDictionary;
+import org.apache.lucene.search.spell.HighFrequencyDictionary;
import org.apache.lucene.store.RAMDirectory;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.FieldType;
-import org.apache.solr.util.HighFrequencyDictionary;
+
import org.apache.solr.search.SolrIndexSearcher;
/**
Modified: lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/IndexBasedSpellChecker.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/IndexBasedSpellChecker.java?rev=1130612&r1=1130611&r2=1130612&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/IndexBasedSpellChecker.java
(original)
+++ lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/IndexBasedSpellChecker.java
Thu Jun 2 15:51:21 2011
@@ -21,7 +21,7 @@ import org.apache.lucene.store.FSDirecto
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.search.SolrIndexSearcher;
-import org.apache.solr.util.HighFrequencyDictionary;
+import org.apache.lucene.search.spell.HighFrequencyDictionary;
import java.io.File;
import java.io.IOException;
Modified: lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/suggest/Suggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/suggest/Suggester.java?rev=1130612&r1=1130611&r2=1130612&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/suggest/Suggester.java
(original)
+++ lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/suggest/Suggester.java
Thu Jun 2 15:51:21 2011
@@ -1,4 +1,4 @@
-/*
+/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -28,15 +28,20 @@ import java.util.List;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.spell.Dictionary;
+import org.apache.lucene.search.spell.HighFrequencyDictionary;
+import org.apache.lucene.search.suggest.FileDictionary;
+import org.apache.lucene.search.suggest.Lookup;
+import org.apache.lucene.search.suggest.Lookup.LookupResult;
+
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.spelling.SolrSpellChecker;
import org.apache.solr.spelling.SpellingOptions;
import org.apache.solr.spelling.SpellingResult;
-import org.apache.solr.spelling.suggest.Lookup.LookupResult;
-import org.apache.solr.spelling.suggest.jaspell.JaspellLookup;
-import org.apache.solr.util.HighFrequencyDictionary;
+import org.apache.solr.spelling.suggest.fst.FSTLookupFactory;
+import org.apache.solr.spelling.suggest.jaspell.JaspellLookupFactory;
+import org.apache.solr.spelling.suggest.tst.TSTLookupFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -81,11 +86,18 @@ public class Suggester extends SolrSpell
sourceLocation = (String) config.get(LOCATION);
field = (String)config.get(FIELD);
lookupImpl = (String)config.get(LOOKUP_IMPL);
- if (lookupImpl == null) {
- lookupImpl = JaspellLookup.class.getName();
+
+ // support the old classnames without -Factory for config file backwards compatibility.
+ if (lookupImpl == null || "org.apache.solr.spelling.suggest.jaspell.JaspellLookup".equals(lookupImpl))
{
+ lookupImpl = JaspellLookupFactory.class.getName();
+ } else if ("org.apache.solr.spelling.suggest.tst.TSTLookup".equals(lookupImpl)) {
+ lookupImpl = TSTLookupFactory.class.getName();
+ } else if ("org.apache.solr.spelling.suggest.fst.FSTLookup".equals(lookupImpl)) {
+ lookupImpl = FSTLookupFactory.class.getName();
}
- lookup = (Lookup) core.getResourceLoader().newInstance(lookupImpl);
- lookup.init(config, core);
+
+ LookupFactory factory = (LookupFactory) core.getResourceLoader().newInstance(lookupImpl);
+ lookup = factory.create(config, core);
String store = (String)config.get(STORE_DIR);
if (store != null) {
storeDir = new File(store);
@@ -110,7 +122,7 @@ public class Suggester extends SolrSpell
public void build(SolrCore core, SolrIndexSearcher searcher) {
LOG.info("build()");
if (sourceLocation == null) {
- reader = searcher.getReader();
+ reader = searcher.getIndexReader();
dictionary = new HighFrequencyDictionary(reader, field, threshold);
} else {
try {
@@ -152,7 +164,6 @@ public class Suggester extends SolrSpell
static SpellingResult EMPTY_RESULT = new SpellingResult();
-
@Override
public SpellingResult getSuggestions(Collection<Token> tokens, IndexReader reader,
int count, boolean onlyMorePopular, boolean extendedResults) throws IOException {
return getSuggestions(new SpellingOptions(tokens, reader, count, onlyMorePopular, extendedResults,
Float.MIN_VALUE, null));
Modified: lucene/dev/branches/branch_3x/solr/src/test-files/solr/conf/solrconfig-spellchecker.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/test-files/solr/conf/solrconfig-spellchecker.xml?rev=1130612&r1=1130611&r2=1130612&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/test-files/solr/conf/solrconfig-spellchecker.xml
(original)
+++ lucene/dev/branches/branch_3x/solr/src/test-files/solr/conf/solrconfig-spellchecker.xml
Thu Jun 2 15:51:21 2011
@@ -94,6 +94,52 @@
</requestHandler>
<!-- Suggest component -->
+ <searchComponent class="solr.SpellCheckComponent" name="suggest_jaspell">
+ <lst name="spellchecker">
+ <str name="name">suggest</str>
+ <str name="classname">org.apache.solr.spelling.suggest.Suggester</str>
+ <str name="lookupImpl">org.apache.solr.spelling.suggest.jaspell.JaspellLookup</str>
+ <str name="field">suggest</str>
+ <str name="storeDir">suggest</str>
+ <str name="buildOnCommit">true</str>
+
+ <!-- Suggester properties -->
+ <float name="threshold">0.0</float>
+ </lst>
+ </searchComponent>
+
+ <!-- TSTLookup suggest component -->
+ <searchComponent class="solr.SpellCheckComponent" name="suggest_tst">
+ <lst name="spellchecker">
+ <str name="name">suggest_tst</str>
+ <str name="classname">org.apache.solr.spelling.suggest.Suggester</str>
+ <str name="lookupImpl">org.apache.solr.spelling.suggest.tst.TSTLookup</str>
+ <str name="field">suggest</str>
+ <str name="storeDir">suggest_tst</str>
+ <str name="buildOnCommit">true</str>
+
+ <!-- Suggester properties -->
+ <float name="threshold">0.0</float>
+ </lst>
+ </searchComponent>
+
+ <!-- FSTLookup suggest component -->
+ <searchComponent class="solr.SpellCheckComponent" name="suggest_fst">
+ <lst name="spellchecker">
+ <str name="name">suggest_fst</str>
+ <str name="classname">org.apache.solr.spelling.suggest.Suggester</str>
+ <str name="lookupImpl">org.apache.solr.spelling.suggest.fst.FSTLookup</str>
+ <str name="field">suggest</str>
+ <str name="storeDir">suggest_fst</str>
+ <str name="buildOnCommit">true</str>
+
+ <!-- Suggester properties -->
+ <int name="weightBuckets">5</int>
+ <bool name="exactMatchFirst">true</bool>
+ </lst>
+ </searchComponent>
+
+ <!-- Suggest component -->
<searchComponent class="solr.SpellCheckComponent" name="suggest">
<lst name="spellchecker">
<str name="name">suggest</str>
@@ -110,6 +156,8 @@
-->
</lst>
</searchComponent>
+
+ <!-- The default (jaspell) -->
<requestHandler class="org.apache.solr.handler.component.SearchHandler" name="/suggest">
<lst name="defaults">
<str name="spellcheck">true</str>
@@ -117,10 +165,33 @@
<str name="spellcheck.collate">true</str>
</lst>
<arr name="components">
- <str>suggest</str>
+ <str>suggest_jaspell</str>
</arr>
</requestHandler>
+ <!-- tst (ternary tree based) -->
+ <requestHandler class="org.apache.solr.handler.component.SearchHandler" name="/suggest_tst">
+ <lst name="defaults">
+ <str name="spellcheck">true</str>
+ <str name="spellcheck.dictionary">suggest_tst</str>
+ <str name="spellcheck.collate">true</str>
+ </lst>
+ <arr name="components">
+ <str>suggest_tst</str>
+ </arr>
+ </requestHandler>
+
+ <!-- fst (finite state automaton based) -->
+ <requestHandler class="org.apache.solr.handler.component.SearchHandler" name="/suggest_fst">
+ <lst name="defaults">
+ <str name="spellcheck">true</str>
+ <str name="spellcheck.dictionary">suggest_fst</str>
+ <str name="spellcheck.collate">false</str>
+ </lst>
+ <arr name="components">
+ <str>suggest_fst</str>
+ </arr>
+ </requestHandler>
<queryResponseWriter name="standard" class="solr.XMLResponseWriter"/>
Modified: lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/suggest/SuggesterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/suggest/SuggesterTest.java?rev=1130612&r1=1130611&r2=1130612&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/suggest/SuggesterTest.java
(original)
+++ lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/suggest/SuggesterTest.java
Thu Jun 2 15:51:21 2011
@@ -1,4 +1,4 @@
-/*
+/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -17,22 +17,19 @@
package org.apache.solr.spelling.suggest;
-import org.apache.lucene.util.RamUsageEstimator;
+import java.io.File;
+
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.params.SpellingParams;
-import org.apache.solr.spelling.suggest.Lookup.LookupResult;
-import org.apache.solr.spelling.suggest.jaspell.JaspellLookup;
-import org.apache.solr.spelling.suggest.tst.TSTLookup;
-import org.apache.solr.util.TermFreqIterator;
import org.junit.BeforeClass;
import org.junit.Test;
-import java.io.File;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
public class SuggesterTest extends SolrTestCaseJ4 {
+ /**
+ * Expected URI at which the given suggester will live.
+ */
+ protected String requestUri = "/suggest";
+
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig-spellchecker.xml","schema-spellchecker.xml");
@@ -53,10 +50,9 @@ public class SuggesterTest extends SolrT
@Test
public void testSuggestions() throws Exception {
addDocs();
-
assertU(commit()); // configured to do a rebuild on commit
- assertQ(req("qt","/suggest", "q","ac", SpellingParams.SPELLCHECK_COUNT, "2", SpellingParams.SPELLCHECK_ONLY_MORE_POPULAR,
"true"),
+ assertQ(req("qt", requestUri, "q", "ac", SpellingParams.SPELLCHECK_COUNT, "2", SpellingParams.SPELLCHECK_ONLY_MORE_POPULAR,
"true"),
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='ac']/int[@name='numFound'][.='2']",
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='ac']/arr[@name='suggestion']/str[1][.='acquire']",
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='ac']/arr[@name='suggestion']/str[2][.='accommodate']"
@@ -76,12 +72,12 @@ public class SuggesterTest extends SolrT
dataDir = data;
configString = config;
initCore();
- assertQ(req("qt","/suggest", "q","ac", SpellingParams.SPELLCHECK_COUNT, "2", SpellingParams.SPELLCHECK_ONLY_MORE_POPULAR,
"true"),
+ assertQ(req("qt", requestUri, "q", "ac", SpellingParams.SPELLCHECK_COUNT, "2", SpellingParams.SPELLCHECK_ONLY_MORE_POPULAR,
"true"),
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='ac']/int[@name='numFound'][.='2']",
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='ac']/arr[@name='suggestion']/str[1][.='acquire']",
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='ac']/arr[@name='suggestion']/str[2][.='accommodate']"
);
-
+
// restore the property
System.setProperty("solr.test.leavedatadir", leaveData);
}
@@ -90,136 +86,13 @@ public class SuggesterTest extends SolrT
public void testRebuild() throws Exception {
addDocs();
assertU(commit());
- assertQ(req("qt","/suggest", "q","ac", SpellingParams.SPELLCHECK_COUNT, "2", SpellingParams.SPELLCHECK_ONLY_MORE_POPULAR,
"true"),
- "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='ac']/int[@name='numFound'][.='2']");
+ assertQ(req("qt", requestUri, "q", "ac", SpellingParams.SPELLCHECK_COUNT, "2", SpellingParams.SPELLCHECK_ONLY_MORE_POPULAR,
"true"),
+ "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='ac']/int[@name='numFound'][.='2']");
assertU(adoc("id", "4",
"text", "actually"
));
assertU(commit());
- assertQ(req("qt","/suggest", "q","ac", SpellingParams.SPELLCHECK_COUNT, "2", SpellingParams.SPELLCHECK_ONLY_MORE_POPULAR,
"true"),
- "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='ac']/int[@name='numFound'][.='2']");
- }
-
-
- private TermFreqIterator getTFIT() {
- final int count = 100000;
- TermFreqIterator tfit = new TermFreqIterator() {
- Random r = new Random(1234567890L);
- Random r1 = new Random(1234567890L);
- int pos;
-
- public float freq() {
- return r1.nextInt(4);
- }
-
- public boolean hasNext() {
- return pos < count;
- }
-
- public String next() {
- pos++;
- return Long.toString(r.nextLong());
- }
-
- public void remove() {
- throw new UnsupportedOperationException();
- }
-
- };
- return tfit;
- }
-
- private void _benchmark(Lookup lookup, Map<String,Integer> ref, boolean estimate,
Bench bench) throws Exception {
- long start = System.currentTimeMillis();
- lookup.build(getTFIT());
- long buildTime = System.currentTimeMillis() - start;
- TermFreqIterator tfit = getTFIT();
- long elapsed = 0;
- while (tfit.hasNext()) {
- String key = tfit.next();
- // take only the first part of the key
- int len = key.length() > 4 ? key.length() / 3 : 2;
- String prefix = key.substring(0, len);
- start = System.nanoTime();
- List<LookupResult> res = lookup.lookup(prefix, true, 10);
- elapsed += System.nanoTime() - start;
- assertTrue(res.size() > 0);
- for (LookupResult lr : res) {
- assertTrue(lr.key.startsWith(prefix));
- }
- if (ref != null) { // verify the counts
- Integer Cnt = ref.get(key);
- if (Cnt == null) { // first pass
- ref.put(key, res.size());
- } else {
- assertEquals(key + ", prefix: " + prefix, Cnt.intValue(), res.size());
- }
- }
- }
- if (estimate) {
- RamUsageEstimator rue = new RamUsageEstimator();
- long size = rue.estimateRamUsage(lookup);
- System.err.println(lookup.getClass().getSimpleName() + " - size=" + size);
- }
- if (bench != null) {
- bench.buildTime += buildTime;
- bench.lookupTime += elapsed;
- }
- }
-
- class Bench {
- long buildTime;
- long lookupTime;
- }
-
- @Test
- public void testBenchmark() throws Exception {
- // this benchmark is very time consuming
- boolean doTest = false;
- if (!doTest) {
- return;
- }
- Map<String,Integer> ref = new HashMap<String,Integer>();
- JaspellLookup jaspell = new JaspellLookup();
- TSTLookup tst = new TSTLookup();
-
- _benchmark(tst, ref, true, null);
- _benchmark(jaspell, ref, true, null);
- jaspell = null;
- tst = null;
- int count = 100;
- Bench b = runBenchmark(JaspellLookup.class, count);
- System.err.println(JaspellLookup.class.getSimpleName() + ": buildTime[ms]=" + (b.buildTime
/ count) +
- " lookupTime[ms]=" + (b.lookupTime / count / 1000000));
- b = runBenchmark(TSTLookup.class, count);
- System.err.println(TSTLookup.class.getSimpleName() + ": buildTime[ms]=" + (b.buildTime
/ count) +
- " lookupTime[ms]=" + (b.lookupTime / count / 1000000));
- }
-
- private Bench runBenchmark(Class<? extends Lookup> cls, int count) throws Exception
{
- System.err.println("* Running " + count + " iterations for " + cls.getSimpleName() +
" ...");
- System.err.println(" - warm-up 10 iterations...");
- for (int i = 0; i < 10; i++) {
- System.runFinalization();
- System.gc();
- Lookup lookup = cls.newInstance();
- _benchmark(lookup, null, false, null);
- lookup = null;
- }
- Bench b = new Bench();
- System.err.print(" - main iterations:"); System.err.flush();
- for (int i = 0; i < count; i++) {
- System.runFinalization();
- System.gc();
- Lookup lookup = cls.newInstance();
- _benchmark(lookup, null, false, b);
- lookup = null;
- if (i > 0 && (i % 10 == 0)) {
- System.err.print(" " + i);
- System.err.flush();
- }
- }
- System.err.println();
- return b;
+ assertQ(req("qt", requestUri, "q", "ac", SpellingParams.SPELLCHECK_COUNT, "2", SpellingParams.SPELLCHECK_ONLY_MORE_POPULAR,
"true"),
+ "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='ac']/int[@name='numFound'][.='2']");
}
}
|