Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 0DA67200B94 for ; Sun, 2 Oct 2016 16:35:25 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 0C235160AC7; Sun, 2 Oct 2016 14:35:25 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id CCA1C160AEA for ; Sun, 2 Oct 2016 16:35:22 +0200 (CEST) Received: (qmail 95025 invoked by uid 500); 2 Oct 2016 14:35:20 -0000 Mailing-List: contact commits-help@lucenenet.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: lucene-net-dev@lucenenet.apache.org Delivered-To: mailing list commits@lucenenet.apache.org Received: (qmail 94228 invoked by uid 99); 2 Oct 2016 14:35:20 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 02 Oct 2016 14:35:20 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 2C4C0DFF56; Sun, 2 Oct 2016 14:35:20 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: synhershko@apache.org To: commits@lucenenet.apache.org Date: Sun, 02 Oct 2016 14:35:43 -0000 Message-Id: <7b1b428835d842f08c755cf8f3cf5973@git.apache.org> In-Reply-To: <6014ffb9c2e748049fcffef430b6e351@git.apache.org> References: <6014ffb9c2e748049fcffef430b6e351@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [25/50] [abbrv] lucenenet git commit: Updated Suggest documentation links to code elements and made corrections. archived-at: Sun, 02 Oct 2016 14:35:25 -0000 Updated Suggest documentation links to code elements and made corrections. Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/9a6f9734 Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/9a6f9734 Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/9a6f9734 Branch: refs/heads/master Commit: 9a6f9734347b004bd51b3521294ea779ff905d3f Parents: 106a905 Author: Shad Storhaug Authored: Fri Sep 16 23:54:08 2016 +0700 Committer: Shad Storhaug Committed: Sun Oct 2 17:44:41 2016 +0700 ---------------------------------------------------------------------- .../Spell/DirectSpellChecker.cs | 106 ++++++++++---- .../Spell/HighFrequencyDictionary.cs | 2 +- .../Spell/JaroWinklerDistance.cs | 12 +- .../Spell/LevensteinDistance.cs | 3 +- .../Spell/LuceneLevenshteinDistance.cs | 18 +-- src/Lucene.Net.Suggest/Spell/NGramDistance.cs | 3 +- .../Spell/PlainTextDictionary.cs | 8 +- src/Lucene.Net.Suggest/Spell/SpellChecker.cs | 143 ++++++++++--------- src/Lucene.Net.Suggest/Spell/SuggestMode.cs | 2 +- src/Lucene.Net.Suggest/Spell/SuggestWord.cs | 2 +- .../Spell/SuggestWordFrequencyComparator.cs | 4 +- .../Spell/SuggestWordQueue.cs | 5 +- .../Spell/SuggestWordScoreComparator.cs | 4 +- .../Spell/WordBreakSpellChecker.cs | 61 ++++---- .../Analyzing/AnalyzingInfixSuggester.cs | 103 ++++++------- .../Suggest/Analyzing/AnalyzingSuggester.cs | 58 ++++---- .../Suggest/Analyzing/BlendedInfixSuggester.cs | 20 ++- .../Suggest/Analyzing/FSTUtil.cs | 8 +- .../Suggest/Analyzing/FreeTextSuggester.cs | 49 ++++--- .../Suggest/Analyzing/FuzzySuggester.cs | 66 ++++----- .../Suggest/Analyzing/SuggestStopFilter.cs | 21 +-- .../Suggest/BufferedInputIterator.cs | 2 +- .../Suggest/BufferingTermFreqIteratorWrapper.cs | 2 +- .../Suggest/DocumentDictionary.cs | 50 +++---- .../Suggest/DocumentValueSourceDictionary.cs | 49 ++++--- .../Suggest/FileDictionary.cs | 52 +++---- .../Suggest/Fst/BytesRefSorter.cs | 14 +- .../Suggest/Fst/FSTCompletion.cs | 53 +++---- .../Suggest/Fst/FSTCompletionBuilder.cs | 64 ++++----- .../Suggest/Fst/FSTCompletionLookup.cs | 32 ++--- .../Suggest/Fst/WFSTCompletionLookup.cs | 4 +- .../Suggest/InMemorySorter.cs | 2 +- src/Lucene.Net.Suggest/Suggest/InputIterator.cs | 16 +-- .../Suggest/Jaspell/JaspellLookup.cs | 8 +- .../Suggest/Jaspell/JaspellTernarySearchTrie.cs | 124 ++++++++-------- src/Lucene.Net.Suggest/Suggest/Lookup.cs | 30 ++-- .../Suggest/SortedInputIterator.cs | 5 +- .../Suggest/SortedTermFreqIteratorWrapper.cs | 6 +- .../Suggest/Tst/TSTAutocomplete.cs | 4 +- src/Lucene.Net.Suggest/Suggest/Tst/TSTLookup.cs | 7 +- 40 files changed, 636 insertions(+), 586 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9a6f9734/src/Lucene.Net.Suggest/Spell/DirectSpellChecker.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Suggest/Spell/DirectSpellChecker.cs b/src/Lucene.Net.Suggest/Spell/DirectSpellChecker.cs index 40ddfdf..1835909 100644 --- a/src/Lucene.Net.Suggest/Spell/DirectSpellChecker.cs +++ b/src/Lucene.Net.Suggest/Spell/DirectSpellChecker.cs @@ -29,9 +29,9 @@ namespace Lucene.Net.Search.Spell /// Simple automaton-based spellchecker. /// /// Candidates are presented directly from the term dictionary, based on - /// Levenshtein distance. This is an alternative to + /// Levenshtein distance. This is an alternative to /// if you are using an edit-distance-like metric such as Levenshtein - /// or . + /// or . /// /// /// A practical benefit of this spellchecker is that it requires no additional @@ -39,15 +39,15 @@ namespace Lucene.Net.Search.Spell /// /// /// - /// - /// + /// /// - /// @lucene.experimental + /// @lucene.experimental public class DirectSpellChecker { /// /// The default StringDistance, Damerau-Levenshtein distance implemented internally - /// via . + /// via . /// /// Note: this is the fastest distance metric, because Damerau-Levenshtein is used /// to draw candidates from the term dictionary: this just re-uses the scoring. @@ -92,14 +92,19 @@ namespace Lucene.Net.Search.Spell private IStringDistance distance = INTERNAL_LEVENSHTEIN; /// - /// Creates a DirectSpellChecker with default configuration values + /// Creates a DirectSpellChecker with default configuration values + /// public DirectSpellChecker() { } /// - /// Get the maximum number of Levenshtein edit-distances to draw - /// candidate terms from. + /// Gets or sets the maximum number of Levenshtein edit-distances to draw + /// candidate terms from.This value can be 1 or 2. The default is 2. + /// + /// Note: a large number of spelling errors occur with an edit distance + /// of 1, by setting this value to 1 you can increase both performance + /// and precision at the cost of recall. /// public virtual int MaxEdits { @@ -119,7 +124,10 @@ namespace Lucene.Net.Search.Spell /// - /// Get the minimal number of characters that must match exactly + /// Gets or sets the minimal number of characters that must match exactly. + /// + /// This can improve both performance and accuracy of results, + /// as misspellings are commonly not the first character. /// public virtual int MinPrefix { @@ -135,7 +143,10 @@ namespace Lucene.Net.Search.Spell /// - /// Get the maximum number of top-N inspections per suggestion + /// Get the maximum number of top-N inspections per suggestion. + /// + /// Increasing this number can improve the accuracy of results, at the cost + /// of performance. /// public virtual int MaxInspections { @@ -151,7 +162,8 @@ namespace Lucene.Net.Search.Spell /// - /// Get the minimal accuracy from the StringDistance for a match + /// Gets or sets the minimal accuracy required (default: 0.5f) from a StringDistance + /// for a suggestion match. /// public virtual float Accuracy { @@ -167,7 +179,16 @@ namespace Lucene.Net.Search.Spell /// - /// Get the minimal threshold of documents a term must appear for a match + /// Gets or sets the minimal threshold of documents a term must appear for a match. + ///

+ /// This can improve quality by only suggesting high-frequency terms. Note that + /// very high values might decrease performance slightly, by forcing the spellchecker + /// to draw more candidates from the term dictionary, but a practical value such + /// as 1 can be very useful towards improving quality. + ///

+ /// This can be specified as a relative percentage of documents such as 0.5f, + /// or it can be specified as an absolute whole document frequency, such as 4f. + /// Absolute document frequencies may not be fractional. ///

public virtual float ThresholdFrequency { @@ -187,7 +208,11 @@ namespace Lucene.Net.Search.Spell /// - /// Get the minimum length of a query term needed to return suggestions + /// Gets or sets the minimum length of a query term (default: 4) needed to return suggestions. + ///

+ /// Very short query terms will often cause only bad suggestions with any distance + /// metric. + /// public virtual int MinQueryLength { get @@ -202,8 +227,16 @@ namespace Lucene.Net.Search.Spell ///

- /// Get the maximum threshold of documents a query term can appear in order - /// to provide suggestions. + /// Gets or sets the maximum threshold (default: 0.01f) of documents a query term can + /// appear in order to provide suggestions. + ///

+ /// Very high-frequency terms are typically spelled correctly. Additionally, + /// this can increase performance as it will do no work for the common case + /// of correctly-spelled input terms. + ///

+ /// This can be specified as a relative percentage of documents such as 0.5f, + /// or it can be specified as an absolute whole document frequency, such as 4f. + /// Absolute document frequencies may not be fractional. ///

public virtual float MaxQueryFrequency { @@ -223,7 +256,15 @@ namespace Lucene.Net.Search.Spell /// - /// true if the spellchecker should lowercase terms + /// True if the spellchecker should lowercase terms (default: true) + ///

+ /// This is a convenience method, if your index field has more complicated + /// analysis (such as StandardTokenizer removing punctuation), its probably + /// better to turn this off, and instead run your query terms through your + /// Analyzer first. + ///

+ /// If this option is not on, case differences count as an edit! + /// public virtual bool LowerCaseTerms { get @@ -238,7 +279,8 @@ namespace Lucene.Net.Search.Spell ///

- /// Get the current comparator in use. + /// Gets or sets the comparator for sorting suggestions. + /// The default is /// public virtual IComparer Comparator { @@ -254,7 +296,14 @@ namespace Lucene.Net.Search.Spell /// - /// Get the string distance metric in use. + /// Gets or sets the string distance metric. + /// The default is . + ///

+ /// Note: because this spellchecker draws its candidates from the term + /// dictionary using Damerau-Levenshtein, it works best with an edit-distance-like + /// string metric. If you use a different metric than the default, + /// you might want to consider increasing + /// to draw more candidates for your metric to rank. ///

public virtual IStringDistance Distance { @@ -270,8 +319,8 @@ namespace Lucene.Net.Search.Spell /// - /// Calls {@link #suggestSimilar(Term, int, IndexReader, SuggestMode) - /// suggestSimilar(term, numSug, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX)} + /// Calls + /// SuggestSimilar(term, numSug, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX) /// public virtual SuggestWord[] SuggestSimilar(Term term, int numSug, IndexReader ir) { @@ -279,9 +328,8 @@ namespace Lucene.Net.Search.Spell } /// - /// Calls {@link #suggestSimilar(Term, int, IndexReader, SuggestMode, float) - /// suggestSimilar(term, numSug, ir, suggestMode, this.accuracy)} - /// + /// Calls + /// SuggestSimilar(term, numSug, ir, suggestMode, this.accuracy) /// public virtual SuggestWord[] SuggestSimilar(Term term, int numSug, IndexReader ir, SuggestMode suggestMode) { @@ -291,10 +339,10 @@ namespace Lucene.Net.Search.Spell /// /// Suggest similar words. /// - /// Unlike , the similarity used to fetch the most + /// + /// Unlike , the similarity used to fetch the most /// relevant terms is an edit distance, therefore typically a low value /// for numSug will work very well. - /// /// /// /// Term you want to spell check on @@ -303,7 +351,7 @@ namespace Lucene.Net.Search.Spell /// specifies when to return suggested words /// return only suggested words that match with this similarity /// sorted list of the suggested words according to the comparator - /// If there is a low-level I/O error. + /// If there is a low-level I/O error. public virtual SuggestWord[] SuggestSimilar(Term term, int numSug, IndexReader ir, SuggestMode suggestMode, float accuracy) { CharsRef spare = new CharsRef(); @@ -402,7 +450,7 @@ namespace Lucene.Net.Search.Spell /// The minimum accuracy a suggested spelling correction needs to have in order to be included /// a chars scratch /// a collection of spelling corrections sorted by ScoreTerm's natural order. - /// If I/O related errors occur + /// If I/O related errors occur protected internal virtual IEnumerable SuggestSimilar(Term term, int numSug, IndexReader ir, int docfreq, int editDistance, float accuracy, CharsRef spare) { @@ -481,7 +529,7 @@ namespace Lucene.Net.Search.Spell } /// - /// Holds a spelling correction for internal usage inside . + /// Holds a spelling correction for internal usage inside . /// protected internal class ScoreTerm : IComparable { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9a6f9734/src/Lucene.Net.Suggest/Spell/HighFrequencyDictionary.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Suggest/Spell/HighFrequencyDictionary.cs b/src/Lucene.Net.Suggest/Spell/HighFrequencyDictionary.cs index 6b84a47..9a7e243 100644 --- a/src/Lucene.Net.Suggest/Spell/HighFrequencyDictionary.cs +++ b/src/Lucene.Net.Suggest/Spell/HighFrequencyDictionary.cs @@ -29,7 +29,7 @@ namespace Lucene.Net.Search.Spell /// Threshold is a value in [0..1] representing the minimum /// number of documents (of the total) where a term should appear. /// - /// Based on LuceneDictionary. + /// Based on . /// public class HighFrequencyDictionary : IDictionary { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9a6f9734/src/Lucene.Net.Suggest/Spell/JaroWinklerDistance.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Suggest/Spell/JaroWinklerDistance.cs b/src/Lucene.Net.Suggest/Spell/JaroWinklerDistance.cs index e43a64e..c17f7cd 100644 --- a/src/Lucene.Net.Suggest/Spell/JaroWinklerDistance.cs +++ b/src/Lucene.Net.Suggest/Spell/JaroWinklerDistance.cs @@ -22,10 +22,8 @@ namespace Lucene.Net.Search.Spell /// /// Similarity measure for short strings such as person names. - /// - /// + /// See http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance /// - /// http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance public class JaroWinklerDistance : IStringDistance { @@ -34,7 +32,7 @@ namespace Lucene.Net.Search.Spell /// /// Creates a new distance metric with the default threshold /// for the Jaro Winkler bonus (0.7) - /// + /// public JaroWinklerDistance() { } @@ -126,9 +124,9 @@ namespace Lucene.Net.Search.Spell } /// - /// Sets the threshold used to determine when Winkler bonus should be used. - /// Set to a negative value to get the Jaro distance. - /// the new value of the threshold + /// Gets or sets the threshold used to determine when Winkler bonus should be used. + /// The default value is 0.7. Set to a negative value to get the Jaro distance. + /// public virtual float Threshold { set http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9a6f9734/src/Lucene.Net.Suggest/Spell/LevensteinDistance.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Suggest/Spell/LevensteinDistance.cs b/src/Lucene.Net.Suggest/Spell/LevensteinDistance.cs index 4a1e281..65e3b12 100644 --- a/src/Lucene.Net.Suggest/Spell/LevensteinDistance.cs +++ b/src/Lucene.Net.Suggest/Spell/LevensteinDistance.cs @@ -22,11 +22,12 @@ namespace Lucene.Net.Search.Spell /// /// Levenstein edit distance class. /// + // LUCENENET NOTE: This class is misspelled: It should be Levenshtein public sealed class LevensteinDistance : IStringDistance { /// - /// Optimized to run a bit faster than the static getDistance(). + /// Optimized to run a bit faster than the static GetDistance(). /// In one benchmark times were 5.3sec using ctr vs 8.5sec w/ static method, thus 37% faster. /// public LevensteinDistance() http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9a6f9734/src/Lucene.Net.Suggest/Spell/LuceneLevenshteinDistance.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Suggest/Spell/LuceneLevenshteinDistance.cs b/src/Lucene.Net.Suggest/Spell/LuceneLevenshteinDistance.cs index bce0cc2..5ee47ea 100644 --- a/src/Lucene.Net.Suggest/Spell/LuceneLevenshteinDistance.cs +++ b/src/Lucene.Net.Suggest/Spell/LuceneLevenshteinDistance.cs @@ -26,15 +26,15 @@ namespace Lucene.Net.Search.Spell /// way as Lucene's FuzzyTermsEnum with the transpositions option enabled. /// /// Notes: - ///
    - ///
  • This metric treats full unicode codepoints as characters - ///
  • This metric scales raw edit distances into a floating point score - /// based upon the shortest of the two terms - ///
  • Transpositions of two adjacent codepoints are treated as primitive - /// edits. - ///
  • Edits are applied in parallel: for example, "ab" and "bca" have - /// distance 3. - ///
+ /// + /// This metric treats full unicode codepoints as characters + /// This metric scales raw edit distances into a floating point score + /// based upon the shortest of the two terms + /// Transpositions of two adjacent codepoints are treated as primitive + /// edits. + /// Edits are applied in parallel: for example, "ab" and "bca" have + /// distance 3. + /// /// /// NOTE: this class is not particularly efficient. It is only intended /// for merging results from multiple DirectSpellCheckers. http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9a6f9734/src/Lucene.Net.Suggest/Spell/NGramDistance.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Suggest/Spell/NGramDistance.cs b/src/Lucene.Net.Suggest/Spell/NGramDistance.cs index b133fa4..7d52afe 100644 --- a/src/Lucene.Net.Suggest/Spell/NGramDistance.cs +++ b/src/Lucene.Net.Suggest/Spell/NGramDistance.cs @@ -24,14 +24,13 @@ namespace Lucene.Net.Search.Spell /// "N-gram similarity and distance". Proceedings of the Twelfth International /// Conference on String Processing and Information Retrieval (SPIRE 2005), pp. 115-126, /// Buenos Aires, Argentina, November 2005. - /// http://www.cs.ualberta.ca/~kondrak/papers/spire05.pdf + /// http://www.cs.ualberta.ca/~kondrak/papers/spire05.pdf /// /// This implementation uses the position-based optimization to compute partial /// matches of n-gram sub-strings and adds a null-character prefix of size n-1 /// so that the first character is contained in the same number of n-grams as /// a middle character. Null-character prefix matches are discounted so that /// strings with no matching characters will return a distance of 0. - /// /// public class NGramDistance : IStringDistance { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9a6f9734/src/Lucene.Net.Suggest/Spell/PlainTextDictionary.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Suggest/Spell/PlainTextDictionary.cs b/src/Lucene.Net.Suggest/Spell/PlainTextDictionary.cs index 7e85d36..c25679e 100644 --- a/src/Lucene.Net.Suggest/Spell/PlainTextDictionary.cs +++ b/src/Lucene.Net.Suggest/Spell/PlainTextDictionary.cs @@ -26,10 +26,10 @@ namespace Lucene.Net.Search.Spell /// /// Dictionary represented by a text file. /// - ///

Format allowed: 1 word per line:
- /// word1
- /// word2
- /// word3
+ /// Format allowed: 1 word per line: + /// word1 + /// word2 + /// word3 ///

public class PlainTextDictionary : IDictionary { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9a6f9734/src/Lucene.Net.Suggest/Spell/SpellChecker.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Suggest/Spell/SpellChecker.cs b/src/Lucene.Net.Suggest/Spell/SpellChecker.cs index b88cef1..524a40b 100644 --- a/src/Lucene.Net.Suggest/Spell/SpellChecker.cs +++ b/src/Lucene.Net.Suggest/Spell/SpellChecker.cs @@ -32,17 +32,16 @@ namespace Lucene.Net.Search.Spell /// (initially inspired by the David Spencer code). /// /// - /// Example Usage: + /// Example Usage (C#): /// - ///
+    /// 
     ///  SpellChecker spellchecker = new SpellChecker(spellIndexDirectory);
     ///  // To index a field of a user index:
-    ///  spellchecker.indexDictionary(new LuceneDictionary(my_lucene_reader, a_field));
+    ///  spellchecker.IndexDictionary(new LuceneDictionary(my_lucene_reader, a_field));
     ///  // To index a file containing words:
-    ///  spellchecker.indexDictionary(new PlainTextDictionary(new File("myfile.txt")));
-    ///  String[] suggestions = spellchecker.suggestSimilar("misspelt", 5);
-    /// 
- /// + /// spellchecker.IndexDictionary(new PlainTextDictionary(new FileInfo("myfile.txt"))); + /// string[] suggestions = spellchecker.SuggestSimilar("misspelt", 5); + /// /// ///
/// @@ -50,7 +49,8 @@ namespace Lucene.Net.Search.Spell { /// - /// The default minimum score to use, if not specified by calling . + /// The default minimum score to use, if not specified by setting + /// or overriding with . /// public const float DEFAULT_ACCURACY = 0.5f; @@ -62,7 +62,7 @@ namespace Lucene.Net.Search.Spell /// /// the spell index /// - // don't modify the directory directly - see #swapSearcher() + // don't modify the directory directly - see SwapSearcher() // TODO: why is this package private? internal Directory spellIndex; /// @@ -71,8 +71,8 @@ namespace Lucene.Net.Search.Spell private float bStart = 2.0f; private float bEnd = 1.0f; - // don't use this searcher directly - see #swapSearcher() + // don't use this searcher directly - see SwapSearcher() private IndexSearcher searcher; /// @@ -100,20 +100,20 @@ namespace Lucene.Net.Search.Spell /// Use the given directory as a spell checker index. The directory /// is created if it doesn't exist yet. /// the spell index directory - /// the measurement to use - /// if Spellchecker can not open the directory + /// the measurement to use + /// if Spellchecker can not open the directory public SpellChecker(Directory spellIndex, IStringDistance sd) : this(spellIndex, sd, SuggestWordQueue.DEFAULT_COMPARATOR) { } /// /// Use the given directory as a spell checker index with a - /// as the default . The + /// as the default . The /// directory is created if it doesn't exist yet. /// /// /// the spell index directory - /// + /// /// if spellchecker can not open the directory public SpellChecker(Directory spellIndex) : this(spellIndex, new LevensteinDistance()) @@ -121,12 +121,12 @@ namespace Lucene.Net.Search.Spell } /// - /// Use the given directory as a spell checker index with the given measure - /// and the given for sorting the results. + /// Use the given directory as a spell checker index with the given measure + /// and the given for sorting the results. /// The spelling index /// The distance /// The comparator - /// if there is a problem opening the index + /// if there is a problem opening the index public SpellChecker(Directory spellIndex, IStringDistance sd, IComparer comparator) { SpellIndex = spellIndex; @@ -135,7 +135,7 @@ namespace Lucene.Net.Search.Spell } /// - /// Use a different index as the spell checker index or re-open + /// Sets a different index as the spell checker index or re-open /// the existing index if spellIndex is the same value /// as given in the constructor. /// the spell directory to use @@ -163,8 +163,8 @@ namespace Lucene.Net.Search.Spell } /// - /// Sets the for the . - /// the comparator + /// Gets or sets the for the . + /// public virtual IComparer Comparator { set @@ -179,11 +179,9 @@ namespace Lucene.Net.Search.Spell /// - /// Sets the implementation for this - /// instance. + /// Gets or sets the implementation for this + /// instance. /// - /// the implementation for this - /// instance public virtual IStringDistance StringDistance { set @@ -197,8 +195,11 @@ namespace Lucene.Net.Search.Spell } /// - /// Sets the accuracy 0 < minScore < 1; default - /// The new accuracy + /// Gets or sets the accuracy (minimum score) to be used, unless overridden in + /// , + /// to decide whether a suggestion is included or not. + /// Sets the accuracy 0 < minScore < 1; default + /// public virtual float Accuracy { set @@ -214,25 +215,25 @@ namespace Lucene.Net.Search.Spell /// /// Suggest similar words. - /// - /// As the Lucene similarity that is used to fetch the most relevant n-grammed terms + /// + /// As the Lucene similarity that is used to fetch the most relevant n-grammed terms /// is not the same as the edit distance strategy used to calculate the best /// matching spell-checked word from the hits that Lucene found, one usually has /// to retrieve a couple of numSug's in order to get the true best match. - /// /// - /// I.e. if numSug == 1, don't count on that suggestion being the best one. + /// + /// I.e. if numSug == 1, don't count on that suggestion being the best one. /// Thus, you should set this value to at least 5 for a good suggestion. - /// /// /// /// the word you want a spell check done on /// the number of suggested words - /// if the underlying index throws an - /// if the Spellchecker is already closed - /// String[] - /// - /// + /// if the underlying index throws an + /// if the Spellchecker is already disposed + /// string[] the sorted list of the suggest words with these 2 criteria: + /// first criteria: the edit distance, second criteria (only if restricted mode): the popularity + /// of the suggest words in the field of the user index + /// public virtual string[] SuggestSimilar(string word, int numSug) { return this.SuggestSimilar(word, numSug, null, null, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); @@ -240,34 +241,34 @@ namespace Lucene.Net.Search.Spell /// /// Suggest similar words. - /// - /// As the Lucene similarity that is used to fetch the most relevant n-grammed terms + /// + /// As the Lucene similarity that is used to fetch the most relevant n-grammed terms /// is not the same as the edit distance strategy used to calculate the best /// matching spell-checked word from the hits that Lucene found, one usually has /// to retrieve a couple of numSug's in order to get the true best match. - /// /// - /// I.e. if numSug == 1, don't count on that suggestion being the best one. + /// + /// I.e. if numSug == 1, don't count on that suggestion being the best one. /// Thus, you should set this value to at least 5 for a good suggestion. - /// /// /// /// the word you want a spell check done on /// the number of suggested words /// The minimum score a suggestion must have in order to qualify for inclusion in the results - /// if the underlying index throws an - /// if the Spellchecker is already closed - /// String[] - /// - /// + /// if the underlying index throws an + /// if the Spellchecker is already disposed + /// string[] the sorted list of the suggest words with these 2 criteria: + /// first criteria: the edit distance, second criteria (only if restricted mode): the popularity + /// of the suggest words in the field of the user index + /// public virtual string[] SuggestSimilar(string word, int numSug, float accuracy) { return this.SuggestSimilar(word, numSug, null, null, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, accuracy); } /// - /// Calls {@link #suggestSimilar(String, int, IndexReader, String, SuggestMode, float) - /// suggestSimilar(word, numSug, ir, suggestMode, field, this.accuracy)} + /// Calls + /// SuggestSimilar(word, numSug, ir, suggestMode, field, this.accuracy) /// /// public virtual string[] SuggestSimilar(string word, int numSug, IndexReader ir, string field, SuggestMode suggestMode) @@ -277,16 +278,15 @@ namespace Lucene.Net.Search.Spell /// /// Suggest similar words (optionally restricted to a field of an index). - /// - /// As the Lucene similarity that is used to fetch the most relevant n-grammed terms + /// + /// As the Lucene similarity that is used to fetch the most relevant n-grammed terms /// is not the same as the edit distance strategy used to calculate the best /// matching spell-checked word from the hits that Lucene found, one usually has /// to retrieve a couple of numSug's in order to get the true best match. - /// /// - /// I.e. if numSug == 1, don't count on that suggestion being the best one. + /// + /// I.e. if numSug == 1, don't count on that suggestion being the best one. /// Thus, you should set this value to at least 5 for a good suggestion. - /// /// /// /// the word you want a spell check done on @@ -297,9 +297,9 @@ namespace Lucene.Net.Search.Spell /// /// (NOTE: if indexReader==null and/or field==null, then this is overridden with SuggestMode.SUGGEST_ALWAYS) /// The minimum score a suggestion must have in order to qualify for inclusion in the results - /// if the underlying index throws an - /// if the Spellchecker is already closed - /// String[] the sorted list of the suggest words with these 2 criteria: + /// if the underlying index throws an + /// if the is already disposed + /// string[] the sorted list of the suggest words with these 2 criteria: /// first criteria: the edit distance, second criteria (only if restricted mode): the popularity /// of the suggest words in the field of the user index /// @@ -457,7 +457,7 @@ namespace Lucene.Net.Search.Spell /// /// Removes all terms from the spell check index. - /// If there is a low-level I/O error. + /// If there is a low-level I/O error. /// if the Spellchecker is already closed public virtual void ClearIndex() { @@ -474,8 +474,8 @@ namespace Lucene.Net.Search.Spell /// /// Check whether the word exists in the index. /// word to check - /// If there is a low-level I/O error. - /// if the Spellchecker is already closed + /// If there is a low-level I/O error. + /// if the is already disposed /// true if the word exists in the index public virtual bool Exist(string word) { @@ -494,12 +494,13 @@ namespace Lucene.Net.Search.Spell } /// - /// Indexes the data from the given . + /// Indexes the data from the given . /// Dictionary to index - /// to use + /// to use /// whether or not the spellcheck index should be fully merged - /// if the Spellchecker is already closed - /// If there is a low-level I/O error. + /// if the is already disposed + /// If there is a low-level I/O error. + // LUCENENET TODO: Replace all usage of AlreadyClosedException with System.ObjectDisposedException public void IndexDictionary(IDictionary dict, IndexWriterConfig config, bool fullMerge) { lock (modifyCurrentIndexLock) @@ -673,9 +674,9 @@ namespace Lucene.Net.Search.Spell } /// - /// Close the IndexSearcher used by this SpellChecker - /// if the close operation causes an - /// if the is already closed + /// Dispose the underlying IndexSearcher used by this SpellChecker + /// if the close operation causes an + /// if the is already disposed public void Dispose() { lock (searcherLock) @@ -719,7 +720,7 @@ namespace Lucene.Net.Search.Spell /// Creates a new read-only IndexSearcher /// the directory used to open the searcher /// a new read-only IndexSearcher - /// f there is a low-level IO error + /// f there is a low-level IO error // for testing purposes internal virtual IndexSearcher CreateSearcher(Directory dir) { @@ -727,11 +728,11 @@ namespace Lucene.Net.Search.Spell } /// - /// Returns true if and only if the is - /// disposed, otherwise false. + /// Returns true if and only if the is + /// disposed, otherwise false. /// - /// true if and only if the is - /// disposed, otherwise false. + /// true if and only if the is + /// disposed, otherwise false. internal virtual bool IsDisposed { get http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9a6f9734/src/Lucene.Net.Suggest/Spell/SuggestMode.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Suggest/Spell/SuggestMode.cs b/src/Lucene.Net.Suggest/Spell/SuggestMode.cs index fdda563..a782b41 100644 --- a/src/Lucene.Net.Suggest/Spell/SuggestMode.cs +++ b/src/Lucene.Net.Suggest/Spell/SuggestMode.cs @@ -37,7 +37,7 @@ /// /// Always attempt to offer suggestions (however, other parameters may limit /// suggestions. For example, see - /// ). + /// ). /// SUGGEST_ALWAYS } http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9a6f9734/src/Lucene.Net.Suggest/Spell/SuggestWord.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Suggest/Spell/SuggestWord.cs b/src/Lucene.Net.Suggest/Spell/SuggestWord.cs index 1576a7b..ef5de3e 100644 --- a/src/Lucene.Net.Suggest/Spell/SuggestWord.cs +++ b/src/Lucene.Net.Suggest/Spell/SuggestWord.cs @@ -18,7 +18,7 @@ */ /// - /// SuggestWord, used in suggestSimilar method in SpellChecker class. + /// SuggestWord, used in suggestSimilar method in class. ///

/// Default sort is first by score, then by frequency. ///

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9a6f9734/src/Lucene.Net.Suggest/Spell/SuggestWordFrequencyComparator.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Suggest/Spell/SuggestWordFrequencyComparator.cs b/src/Lucene.Net.Suggest/Spell/SuggestWordFrequencyComparator.cs index 252233f..3ddf3e9 100644 --- a/src/Lucene.Net.Suggest/Spell/SuggestWordFrequencyComparator.cs +++ b/src/Lucene.Net.Suggest/Spell/SuggestWordFrequencyComparator.cs @@ -27,8 +27,8 @@ namespace Lucene.Net.Search.Spell { /// - /// Creates a new comparator that will compare by , - /// then by , then by . + /// Creates a new comparator that will compare by , + /// then by , then by . /// public SuggestWordFrequencyComparator() { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9a6f9734/src/Lucene.Net.Suggest/Spell/SuggestWordQueue.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Suggest/Spell/SuggestWordQueue.cs b/src/Lucene.Net.Suggest/Spell/SuggestWordQueue.cs index 3a0ebc9..ea9311f 100644 --- a/src/Lucene.Net.Suggest/Spell/SuggestWordQueue.cs +++ b/src/Lucene.Net.Suggest/Spell/SuggestWordQueue.cs @@ -25,19 +25,18 @@ namespace Lucene.Net.Search.Spell /// /// /// - /// public sealed class SuggestWordQueue : PriorityQueue { /// /// Default comparator: score then frequency. - /// + /// public static readonly IComparer DEFAULT_COMPARATOR = new SuggestWordScoreComparator(); private readonly IComparer comparator; /// - /// Use the + /// Use the /// The size of the queue public SuggestWordQueue(int size) : base(size) http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9a6f9734/src/Lucene.Net.Suggest/Spell/SuggestWordScoreComparator.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Suggest/Spell/SuggestWordScoreComparator.cs b/src/Lucene.Net.Suggest/Spell/SuggestWordScoreComparator.cs index 3d875db..b444e8f 100644 --- a/src/Lucene.Net.Suggest/Spell/SuggestWordScoreComparator.cs +++ b/src/Lucene.Net.Suggest/Spell/SuggestWordScoreComparator.cs @@ -27,8 +27,8 @@ namespace Lucene.Net.Search.Spell { /// - /// Creates a new comparator that will compare by , - /// then by , then by . + /// Creates a new comparator that will compare by , + /// then by , then by . /// public SuggestWordScoreComparator() { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9a6f9734/src/Lucene.Net.Suggest/Spell/WordBreakSpellChecker.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Suggest/Spell/WordBreakSpellChecker.cs b/src/Lucene.Net.Suggest/Spell/WordBreakSpellChecker.cs index fb79975..d1d1e0e 100644 --- a/src/Lucene.Net.Suggest/Spell/WordBreakSpellChecker.cs +++ b/src/Lucene.Net.Suggest/Spell/WordBreakSpellChecker.cs @@ -42,19 +42,17 @@ namespace Lucene.Net.Search.Spell /// /// Creates a new spellchecker with default configuration values - /// - /// - /// - /// - /// + /// + /// + /// + /// + /// public WordBreakSpellChecker() { } /// - /// /// Determines the order to list word break suggestions - /// /// public enum BreakSuggestionSortMethod { @@ -75,19 +73,16 @@ namespace Lucene.Net.Search.Spell } /// - /// /// Generate suggestions by breaking the passed-in term into multiple words. /// The scores returned are equal to the number of word breaks needed so a /// lower score is generally preferred over a higher score. - /// /// /// - /// - default = + /// - default = /// - /// - default = - /// + /// - default = /// one or more arrays of words formed by breaking up the original term - /// If there is a low-level I/O error. + /// If there is a low-level I/O error. public virtual SuggestWord[][] SuggestWordBreaks(Term term, int maxSuggestions, IndexReader ir, SuggestMode suggestMode, BreakSuggestionSortMethod sortMethod) { if (maxSuggestions < 1) @@ -135,30 +130,30 @@ namespace Lucene.Net.Search.Spell /// /// /// Generate suggestions by combining one or more of the passed-in terms into - /// single words. The returned contains both a - /// and also an array detailing which passed-in terms were + /// single words. The returned contains both a + /// and also an array detailing which passed-in terms were /// involved in creating this combination. The scores returned are equal to the /// number of word combinations needed, also one less than the length of the - /// array . Generally, a + /// array . Generally, a /// suggestion with a lower score is preferred over a higher score. /// /// /// To prevent two adjacent terms from being combined (for instance, if one is /// mandatory and the other is prohibited), separate the two terms with - /// + /// /// /// - /// When suggestMode equals , each + /// When suggestMode equals , each /// suggestion will include at least one term not in the index. /// /// - /// When suggestMode equals , each + /// When suggestMode equals , each /// suggestion will have the same, or better frequency than the most-popular /// included term. /// /// /// an array of words generated by combining original terms - /// If there is a low-level I/O error. + /// If there is a low-level I/O error. public virtual CombineSuggestion[] SuggestWordCombinations(Term[] terms, int maxSuggestions, IndexReader ir, SuggestMode suggestMode) { if (maxSuggestions < 1) @@ -363,9 +358,10 @@ namespace Lucene.Net.Search.Spell } /// - /// Returns the minimum frequency a term must have - /// to be part of a suggestion. - /// + /// Gets or sets the minimum frequency a term must have to be + /// included as part of a suggestion. Default=1 Not applicable when used with + /// + /// public virtual int MinSuggestionFrequency { get @@ -379,8 +375,9 @@ namespace Lucene.Net.Search.Spell } /// - /// Returns the maximum length of a combined suggestion - /// + /// Gets or sets the maximum length of a suggestion made + /// by combining 1 or more original terms. Default=20. + /// public virtual int MaxCombineWordLength { get @@ -394,8 +391,8 @@ namespace Lucene.Net.Search.Spell } /// - /// Returns the minimum size of a broken word - /// + /// Gets or sets the minimum length to break words down to. Default=1. + /// public virtual int MinBreakWordLength { get @@ -409,8 +406,9 @@ namespace Lucene.Net.Search.Spell } /// - /// Returns the maximum number of changes to perform on the input - /// + /// Gets or sets the maximum numbers of changes (word breaks or combinations) to make + /// on the original term(s). Default=1. + /// public virtual int MaxChanges { get @@ -424,8 +422,9 @@ namespace Lucene.Net.Search.Spell } /// - /// Returns the maximum number of word combinations to evaluate. - /// + /// Gets or sets the maximum number of word combinations to evaluate. Default=1000. A higher + /// value might improve result quality. A lower value might improve performance. + /// public virtual int MaxEvaluations { get http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9a6f9734/src/Lucene.Net.Suggest/Suggest/Analyzing/AnalyzingInfixSuggester.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Suggest/Suggest/Analyzing/AnalyzingInfixSuggester.cs b/src/Lucene.Net.Suggest/Suggest/Analyzing/AnalyzingInfixSuggester.cs index 23a38eb..4ce7e27 100644 --- a/src/Lucene.Net.Suggest/Suggest/Analyzing/AnalyzingInfixSuggester.cs +++ b/src/Lucene.Net.Suggest/Suggest/Analyzing/AnalyzingInfixSuggester.cs @@ -70,13 +70,13 @@ namespace Lucene.Net.Search.Suggest.Analyzing /// /// Field name used for the indexed text, as a - /// StringField, for exact lookup. + /// , for exact lookup. /// protected internal const string EXACT_TEXT_FIELD_NAME = "exacttext"; /// /// Field name used for the indexed context, as a - /// StringField and a SortedSetDVField, for filtering. + /// and a , for filtering. /// protected internal const string CONTEXTS_FIELD_NAME = "contexts"; @@ -95,7 +95,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing private IndexWriter writer; /// - /// used for lookups. + /// used for lookups. protected internal SearcherManager searcherMgr; /// @@ -110,10 +110,11 @@ namespace Lucene.Net.Search.Suggest.Analyzing /// /// Create a new instance, loading from a previously built - /// AnalyzingInfixSuggester directory, if it exists. This directory must be - /// private to the infix suggester (i.e., not an external - /// Lucene index). Note that - /// will also close the provided directory. + /// directory, if it exists. + /// This directory must be + /// private to the infix suggester (i.e., not an external + /// Lucene index). Note that + /// will also dispose the provided directory. /// public AnalyzingInfixSuggester(LuceneVersion matchVersion, Directory dir, Analyzer analyzer) : this(matchVersion, dir, analyzer, analyzer, DEFAULT_MIN_PREFIX_CHARS) @@ -122,13 +123,13 @@ namespace Lucene.Net.Search.Suggest.Analyzing /// /// Create a new instance, loading from a previously built - /// AnalyzingInfixSuggester directory, if it exists. This directory must be - /// private to the infix suggester (i.e., not an external - /// Lucene index). Note that - /// will also close the provided directory. + /// directory, if it exists. This directory must be + /// private to the infix suggester (i.e., not an external + /// Lucene index). Note that + /// will also dispose the provided directory. /// /// Minimum number of leading characters - /// before PrefixQuery is used (default 4). + /// before is used (default 4). /// Prefixes shorter than this are indexed as character /// ngrams (increasing index size but making lookups /// faster). @@ -156,7 +157,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing /// /// Override this to customize index settings, e.g. which - /// codec to use. + /// codec to use. /// protected internal virtual IndexWriterConfig GetIndexWriterConfig(LuceneVersion matchVersion, Analyzer indexAnalyzer, IndexWriterConfig.OpenMode_e openMode) { @@ -173,8 +174,8 @@ namespace Lucene.Net.Search.Suggest.Analyzing } /// - /// Subclass can override to choose a specific {@link - /// Directory} implementation. + /// Subclass can override to choose a specific + /// implementation. /// protected internal virtual Directory GetDirectory(DirectoryInfo path) { @@ -283,11 +284,11 @@ namespace Lucene.Net.Search.Suggest.Analyzing } /// - /// Adds a new suggestion. Be sure to use - /// instead if you want to replace a previous suggestion. - /// After adding or updating a batch of new suggestions, - /// you must call in the end in order to - /// see the suggestions in + /// Adds a new suggestion. Be sure to use + /// instead if you want to replace a previous suggestion. + /// After adding or updating a batch of new suggestions, + /// you must call in the end in order to + /// see the suggestions in /// public virtual void Add(BytesRef text, IEnumerable contexts, long weight, BytesRef payload) { @@ -296,12 +297,12 @@ namespace Lucene.Net.Search.Suggest.Analyzing /// /// Updates a previous suggestion, matching the exact same - /// text as before. Use this to change the weight or - /// payload of an already added suggstion. If you know - /// this text is not already present you can use {@link - /// #add} instead. After adding or updating a batch of - /// new suggestions, you must call in the - /// end in order to see the suggestions in + /// text as before. Use this to change the weight or + /// payload of an already added suggstion. If you know + /// this text is not already present you can use + /// instead. After adding or updating a batch of + /// new suggestions, you must call in the + /// end in order to see the suggestions in /// public virtual void Update(BytesRef text, IEnumerable contexts, long weight, BytesRef payload) { @@ -339,8 +340,8 @@ namespace Lucene.Net.Search.Suggest.Analyzing /// /// Reopens the underlying searcher; it's best to "batch - /// up" many additions/updates, and then call refresh - /// once in the end. + /// up" many additions/updates, and then call refresh + /// once in the end. /// public virtual void Refresh() { @@ -378,8 +379,8 @@ namespace Lucene.Net.Search.Suggest.Analyzing /// /// This is called if the last token isn't ended - /// (e.g. user did not type a space after it). Return an - /// appropriate Query clause to add to the BooleanQuery. + /// (e.g. user did not type a space after it). Return an + /// appropriate clause to add to the . /// protected internal virtual Query GetLastTokenQuery(string token) { @@ -394,8 +395,8 @@ namespace Lucene.Net.Search.Suggest.Analyzing /// /// Retrieve suggestions, specifying whether all terms - /// must match ({@code allTermsRequired}) and whether the hits - /// should be highlighted ({@code doHighlight}). + /// must match () and whether the hits + /// should be highlighted (). /// public virtual IList DoLookup(string key, IEnumerable contexts, int num, bool allTermsRequired, bool doHighlight) { @@ -541,7 +542,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing /// /// Create the results based on the search hits. /// Can be overridden by subclass to add particular behavior (e.g. weight transformation) - /// If there are problems reading fields from the underlying Lucene index. + /// If there are problems reading fields from the underlying Lucene index. protected internal virtual IList CreateResults(IndexSearcher searcher, TopFieldDocs hits, int num, string charSequence, bool doHighlight, IEnumerable matchedTokens, string prefixToken) { @@ -612,7 +613,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing /// /// Subclass can override this to tweak the Query before - /// searching. + /// searching. /// protected internal virtual Query FinishQuery(BooleanQuery bq, bool allTermsRequired) { @@ -621,9 +622,9 @@ namespace Lucene.Net.Search.Suggest.Analyzing /// /// Override this method to customize the Object - /// representing a single highlighted suggestions; the - /// result is set on each {@link - /// LookupResult#highlightKey} member. + /// representing a single highlighted suggestions; the + /// result is set on each + /// member. /// protected internal virtual object Highlight(string text, IEnumerable matchedTokens, string prefixToken) { @@ -678,10 +679,10 @@ namespace Lucene.Net.Search.Suggest.Analyzing /// /// Called while highlighting a single result, to append a - /// non-matching chunk of text from the suggestion to the - /// provided fragments list. - /// The {@code StringBuilder} to append to - /// The text chunk to add + /// non-matching chunk of text from the suggestion to the + /// provided fragments list. + /// The to append to + /// The text chunk to add protected internal virtual void AddNonMatch(StringBuilder sb, string text) { sb.Append(text); @@ -689,10 +690,10 @@ namespace Lucene.Net.Search.Suggest.Analyzing /// /// Called while highlighting a single result, to append - /// the whole matched token to the provided fragments list. - /// The {@code StringBuilder} to append to - /// The surface form (original) text - /// The analyzed token corresponding to the surface form text + /// the whole matched token to the provided fragments list. + /// The to append to + /// The surface form (original) text + /// The analyzed token corresponding to the surface form text protected internal virtual void AddWholeMatch(StringBuilder sb, string surface, string analyzed) { sb.Append(""); @@ -702,13 +703,13 @@ namespace Lucene.Net.Search.Suggest.Analyzing /// /// Called while highlighting a single result, to append a - /// matched prefix token, to the provided fragments list. - /// The {@code StringBuilder} to append to - /// The fragment of the surface form - /// (indexed during , corresponding to + /// matched prefix token, to the provided fragments list. + /// The to append to + /// The fragment of the surface form + /// (indexed during , corresponding to /// this match - /// The analyzed token that matched - /// The prefix of the token that matched + /// The analyzed token that matched + /// The prefix of the token that matched protected internal virtual void AddPrefixMatch(StringBuilder sb, string surface, string analyzed, string prefixToken) { // TODO: apps can try to invert their analysis logic http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9a6f9734/src/Lucene.Net.Suggest/Suggest/Analyzing/AnalyzingSuggester.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Suggest/Suggest/Analyzing/AnalyzingSuggester.cs b/src/Lucene.Net.Suggest/Suggest/Analyzing/AnalyzingSuggester.cs index d0f4a0d..95d4ff6 100644 --- a/src/Lucene.Net.Suggest/Suggest/Analyzing/AnalyzingSuggester.cs +++ b/src/Lucene.Net.Suggest/Suggest/Analyzing/AnalyzingSuggester.cs @@ -43,7 +43,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing /// suggestion "The Ghost of Christmas Past". Note that /// position increments MUST NOT be preserved for this example /// to work, so you should call the constructor with - /// preservePositionIncrements parameter set to + /// parameter set to /// false /// ///
@@ -63,24 +63,24 @@ namespace Lucene.Net.Search.Suggest.Analyzing /// /// /// There are some limitations: - ///
    + /// /// - ///
  • A lookup from a query like "net" in English won't + /// A lookup from a query like "net" in English won't /// be any different than "net " (ie, user added a /// trailing space) because analyzers don't reflect /// when they've seen a token separator and when they - /// haven't. + /// haven't. /// - ///
  • If you're using {@code StopFilter}, and the user will + /// If you're using , and the user will /// type "fast apple", but so far all they've typed is /// "fast a", again because the analyzer doesn't convey whether /// it's seen a token separator after the "a", - /// {@code StopFilter} will remove that "a" causing - /// far more matches than you'd expect. + /// will remove that "a" causing + /// far more matches than you'd expect. /// - ///
  • Lookups with the empty string return no results - /// instead of all results. - ///
+ /// Lookups with the empty string return no results + /// instead of all results. + /// /// /// @lucene.experimental ///
@@ -89,10 +89,10 @@ namespace Lucene.Net.Search.Suggest.Analyzing { /// - /// FST: - /// input is the analyzed form, with a null byte between terms - /// weights are encoded as costs: (Integer.MAX_VALUE-weight) - /// surface is the original, unanalyzed form. + /// FST(Weight,Surface): + /// input is the analyzed form, with a null byte between terms + /// weights are encoded as costs: ( - weight) + /// surface is the original, unanalyzed form. /// private FST.Pair> fst = null; @@ -119,18 +119,18 @@ namespace Lucene.Net.Search.Suggest.Analyzing private readonly bool preserveSep; /// - /// Include this flag in the options parameter to {@link - /// #AnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean)} to always - /// return the exact match first, regardless of score. This - /// has no performance impact but could result in - /// low-quality suggestions. + /// Include this flag in the options parameter to + /// to always + /// return the exact match first, regardless of score. This + /// has no performance impact but could result in + /// low-quality suggestions. /// public const int EXACT_FIRST = 1; /// - /// Include this flag in the options parameter to {@link - /// #AnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean)} to preserve - /// token separators when matching. + /// Include this flag in the options parameter to + /// to preserve + /// token separators when matching. /// public const int PRESERVE_SEP = 2; @@ -180,9 +180,9 @@ namespace Lucene.Net.Search.Suggest.Analyzing private long count = 0; /// - /// Calls {@link #AnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean) - /// AnalyzingSuggester(analyzer, analyzer, EXACT_FIRST | - /// PRESERVE_SEP, 256, -1, true)} + /// Calls + /// AnalyzingSuggester(analyzer, analyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, true) + /// /// public AnalyzingSuggester(Analyzer analyzer) : this(analyzer, analyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, true) @@ -190,9 +190,9 @@ namespace Lucene.Net.Search.Suggest.Analyzing } /// - /// Calls {@link #AnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean) - /// AnalyzingSuggester(indexAnalyzer, queryAnalyzer, EXACT_FIRST | - /// PRESERVE_SEP, 256, -1, true)} + /// Calls + /// AnalyzingSuggester(indexAnalyzer, queryAnalyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, true) + /// /// public AnalyzingSuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer) : this(indexAnalyzer, queryAnalyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, true) @@ -206,7 +206,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing /// analyzing suggestions while building the index. /// Analyzer that will be used for /// analyzing query text during lookup - /// see , + /// see , /// Maximum number of /// surface forms to keep for a single analyzed form. /// When there are too many surface forms we discard the http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9a6f9734/src/Lucene.Net.Suggest/Suggest/Analyzing/BlendedInfixSuggester.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Suggest/Suggest/Analyzing/BlendedInfixSuggester.cs b/src/Lucene.Net.Suggest/Suggest/Analyzing/BlendedInfixSuggester.cs index 7c82ef1..fb3ba65 100644 --- a/src/Lucene.Net.Suggest/Suggest/Analyzing/BlendedInfixSuggester.cs +++ b/src/Lucene.Net.Suggest/Suggest/Analyzing/BlendedInfixSuggester.cs @@ -31,7 +31,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing // - allow to use the search score /// - /// Extension of the AnalyzingInfixSuggester which transforms the weight + /// Extension of the which transforms the weight /// after search to take into account the position of the searched term into /// the indexed text. /// Please note that it increases the number of elements searched and applies the @@ -68,20 +68,26 @@ namespace Lucene.Net.Search.Suggest.Analyzing public enum BlenderType { /// - /// Application dependent; override {@link - /// #calculateCoefficient} to compute it. + /// Application dependent; override + /// to compute it. /// CUSTOM, /// - /// weight*(1 - 0.10*position) + /// weight*(1 - 0.10*position) + /// POSITION_LINEAR, /// - /// weight/(1+position) + /// weight/(1+position) + /// POSITION_RECIPROCAL, // TODO: //SCORE } + /// + /// LUCENENET specific to ensure our Queue is only altered by a single + /// thread at a time. + /// private static readonly object syncLock = new object(); /// @@ -101,7 +107,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing /// /// Type of blending strategy, see BlenderType for more precisions /// Factor to multiply the number of searched elements before ponderate - /// If there are problems opening the underlying Lucene index. + /// If there are problems opening the underlying Lucene index. public BlendedInfixSuggester(LuceneVersion matchVersion, Directory dir, Analyzer indexAnalyzer, Analyzer queryAnalyzer, int minPrefixChars, BlenderType blenderType, int numFactor) : base(matchVersion, dir, indexAnalyzer, queryAnalyzer, minPrefixChars) { @@ -247,7 +253,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing /// tokens found in the query /// unfinished token in the query /// the coefficient - /// If there are problems reading term vectors from the underlying Lucene index. + /// If there are problems reading term vectors from the underlying Lucene index. private double CreateCoefficient(IndexSearcher searcher, int doc, IEnumerable matchedTokens, string prefixToken) { Terms tv = searcher.IndexReader.GetTermVector(doc, TEXT_FIELD_NAME); http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9a6f9734/src/Lucene.Net.Suggest/Suggest/Analyzing/FSTUtil.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Suggest/Suggest/Analyzing/FSTUtil.cs b/src/Lucene.Net.Suggest/Suggest/Analyzing/FSTUtil.cs index 3255a36..3aed4d4 100644 --- a/src/Lucene.Net.Suggest/Suggest/Analyzing/FSTUtil.cs +++ b/src/Lucene.Net.Suggest/Suggest/Analyzing/FSTUtil.cs @@ -28,7 +28,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing /// /// Exposes a utility method to enumerate all paths - /// intersecting an with an . + /// intersecting an with an . /// public class FSTUtil { @@ -45,7 +45,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing public readonly State state; /// - /// Node in the FST where path ends: + /// Node in the where path ends: public readonly FST.Arc fstNode; /// @@ -68,8 +68,8 @@ namespace Lucene.Net.Search.Suggest.Analyzing } /// - /// Enumerates all minimal prefix paths in the automaton that also intersect the FST, - /// accumulating the FST end node and output for each path. + /// Enumerates all minimal prefix paths in the automaton that also intersect the , + /// accumulating the end node and output for each path. /// public static IList> IntersectPrefixPaths(Automaton a, FST fst) { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9a6f9734/src/Lucene.Net.Suggest/Suggest/Analyzing/FreeTextSuggester.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Suggest/Suggest/Analyzing/FreeTextSuggester.cs b/src/Lucene.Net.Suggest/Suggest/Analyzing/FreeTextSuggester.cs index 0a52947..3bd5f4d 100644 --- a/src/Lucene.Net.Suggest/Suggest/Analyzing/FreeTextSuggester.cs +++ b/src/Lucene.Net.Suggest/Suggest/Analyzing/FreeTextSuggester.cs @@ -39,9 +39,9 @@ namespace Lucene.Net.Search.Suggest.Analyzing // - add pruning of low-freq ngrams? /// - /// Builds an ngram model from the text sent to {@link - /// #build} and predicts based on the last grams-1 tokens in - /// the request sent to . This tries to + /// Builds an ngram model from the text sent to + /// and predicts based on the last grams-1 tokens in + /// the request sent to . This tries to /// handle the "long tail" of suggestions for when the /// incoming query is a never before seen query string. /// @@ -56,18 +56,17 @@ namespace Lucene.Net.Search.Suggest.Analyzing /// /// /// This uses the stupid backoff language model to smooth - /// scores across ngram models; see - /// "Large language models in machine translation", - /// http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.76.1126 - /// for details. + /// scores across ngram models; see + /// + /// "Large language models in machine translation" for details. /// /// - /// From , the key of each result is the - /// ngram token; the value is Long.MAX_VALUE * score (fixed - /// point, cast to long). Divide by Long.MAX_VALUE to get + /// From , the key of each result is the + /// ngram token; the value is * score (fixed + /// point, cast to long). Divide by to get /// the score back, which ranges from 0.0 to 1.0. /// - /// onlyMorePopular is unused. + /// is unused. /// /// @lucene.experimental /// @@ -131,14 +130,14 @@ namespace Lucene.Net.Search.Suggest.Analyzing /// /// The default character used to join multiple tokens - /// into a single ngram token. The input tokens produced - /// by the analyzer must not contain this character. + /// into a single ngram token. The input tokens produced + /// by the analyzer must not contain this character. /// public const byte DEFAULT_SEPARATOR = 0x1e; /// /// Instantiate, using the provided analyzer for both - /// indexing and lookup, using bigram model by default. + /// indexing and lookup, using bigram model by default. /// public FreeTextSuggester(Analyzer analyzer) : this(analyzer, analyzer, DEFAULT_GRAMS) @@ -147,7 +146,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing /// /// Instantiate, using the provided indexing and lookup - /// analyzers, using bigram model by default. + /// analyzers, using bigram model by default. /// public FreeTextSuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer) : this(indexAnalyzer, queryAnalyzer, DEFAULT_GRAMS) @@ -156,8 +155,8 @@ namespace Lucene.Net.Search.Suggest.Analyzing /// /// Instantiate, using the provided indexing and lookup - /// analyzers, with the specified model (2 - /// = bigram, 3 = trigram, etc.). + /// analyzers, with the specified model (2 + /// = bigram, 3 = trigram, etc.). /// public FreeTextSuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer, int grams) : this(indexAnalyzer, queryAnalyzer, grams, DEFAULT_SEPARATOR) @@ -166,13 +165,13 @@ namespace Lucene.Net.Search.Suggest.Analyzing /// /// Instantiate, using the provided indexing and lookup - /// analyzers, and specified model (2 = bigram, 3 = - /// trigram ,etc.). The separator is passed to {@link - /// ShingleFilter#setTokenSeparator} to join multiple - /// tokens into a single ngram token; it must be an ascii - /// (7-bit-clean) byte. No input tokens should have this - /// byte, otherwise {@code IllegalArgumentException} is - /// thrown. + /// analyzers, and specified model (2 = bigram, 3 = + /// trigram ,etc.). The separator is passed to + /// to join multiple + /// tokens into a single ngram token; it must be an ascii + /// (7-bit-clean) byte. No input tokens should have this + /// byte, otherwise is + /// thrown. /// public FreeTextSuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer, int grams, byte separator) { @@ -886,7 +885,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing } // NOTE: copied from WFSTCompletionLookup & tweaked - private long? LookupPrefix(FST fst, FST.BytesReader bytesReader, BytesRef scratch, FST.Arc arc) //Bogus + private long? LookupPrefix(FST fst, FST.BytesReader bytesReader, BytesRef scratch, FST.Arc arc) { long? output = fst.Outputs.NoOutput;