Return-Path: X-Original-To: apmail-lucenenet-commits-archive@www.apache.org Delivered-To: apmail-lucenenet-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 646D21746F for ; Fri, 7 Nov 2014 20:35:14 +0000 (UTC) Received: (qmail 59007 invoked by uid 500); 7 Nov 2014 20:35:14 -0000 Delivered-To: apmail-lucenenet-commits-archive@lucenenet.apache.org Received: (qmail 58897 invoked by uid 500); 7 Nov 2014 20:35:14 -0000 Mailing-List: contact commits-help@lucenenet.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: lucene-net-dev@lucenenet.apache.org Delivered-To: mailing list commits@lucenenet.apache.org Received: (qmail 58867 invoked by uid 99); 7 Nov 2014 20:35:14 -0000 Received: from tyr.zones.apache.org (HELO tyr.zones.apache.org) (140.211.11.114) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 07 Nov 2014 20:35:14 +0000 Received: by tyr.zones.apache.org (Postfix, from userid 65534) id C593199D550; Fri, 7 Nov 2014 20:35:13 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit From: synhershko@apache.org To: commits@lucenenet.apache.org Date: Fri, 07 Nov 2014 20:35:14 -0000 Message-Id: <98d4717dea6947da923283a035a0aed8@git.apache.org> In-Reply-To: <3bdf45fd7742468989f4f86ecde3b744@git.apache.org> References: <3bdf45fd7742468989f4f86ecde3b744@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [2/2] lucenenet git commit: Some more work on Lucene.Net.Suggest Some more work on Lucene.Net.Suggest Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/6d26b3c7 Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/6d26b3c7 Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/6d26b3c7 Branch: refs/heads/master Commit: 6d26b3c7ec96d8a683616c0117dc6d0241e0074a Parents: 709445e Author: Itamar Syn-Hershko Authored: Fri Nov 7 22:34:22 2014 +0200 Committer: Itamar Syn-Hershko Committed: Fri Nov 7 22:34:22 2014 +0200 ---------------------------------------------------------------------- src/Lucene.Net.Core/Util/Fst/Util.cs | 2 +- .../Spell/LuceneLevenshteinDistance.cs | 21 ++--- .../Suggest/Analyzing/FreeTextSuggester.cs | 93 +++++++++----------- 3 files changed, 52 insertions(+), 64 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6d26b3c7/src/Lucene.Net.Core/Util/Fst/Util.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Core/Util/Fst/Util.cs b/src/Lucene.Net.Core/Util/Fst/Util.cs index 01ee25a..4c15642 100644 --- a/src/Lucene.Net.Core/Util/Fst/Util.cs +++ b/src/Lucene.Net.Core/Util/Fst/Util.cs @@ -385,7 +385,7 @@ namespace Lucene.Net.Util.Fst } // If back plus this arc is competitive then add to queue: - protected internal virtual void AddIfCompetitive(FSTPath path) + protected virtual void AddIfCompetitive(FSTPath path) { Debug.Assert(Queue != null); http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6d26b3c7/src/Lucene.Net.Suggest/Spell/LuceneLevenshteinDistance.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Suggest/Spell/LuceneLevenshteinDistance.cs b/src/Lucene.Net.Suggest/Spell/LuceneLevenshteinDistance.cs index ebf0738..818ab64 100644 --- a/src/Lucene.Net.Suggest/Spell/LuceneLevenshteinDistance.cs +++ b/src/Lucene.Net.Suggest/Spell/LuceneLevenshteinDistance.cs @@ -1,4 +1,5 @@ using System; +using Lucene.Net.Support; using Lucene.Net.Util; namespace Lucene.Net.Search.Spell @@ -50,7 +51,7 @@ namespace Lucene.Net.Search.Spell { } - public float getDistance(string target, string other) + public float GetDistance(string target, string other) { IntsRef targetPoints; IntsRef otherPoints; @@ -64,8 +65,8 @@ namespace Lucene.Net.Search.Spell // in "distributed spellcheck", and its inefficient in other ways too... // cheaper to do this up front once - targetPoints = toIntsRef(target); - otherPoints = toIntsRef(other); + targetPoints = ToIntsRef(target); + otherPoints = ToIntsRef(other); n = targetPoints.Length; int m = otherPoints.Length; @@ -104,15 +105,15 @@ namespace Lucene.Net.Search.Spell for (j = 1; j <= m; j++) { - t_j = otherPoints.ints[j - 1]; + t_j = otherPoints.Ints[j - 1]; for (i = 1; i <= n; i++) { - cost = targetPoints.ints[i - 1] == t_j ? 0 : 1; + cost = targetPoints.Ints[i - 1] == t_j ? 0 : 1; // minimum of cell to the left+1, to the top+1, diagonally left and up +cost d[i][j] = Math.Min(Math.Min(d[i - 1][j] + 1, d[i][j - 1] + 1), d[i - 1][j - 1] + cost); // transposition - if (i > 1 && j > 1 && targetPoints.ints[i - 1] == otherPoints.ints[j - 2] && targetPoints.ints[i - 2] == otherPoints.ints[j - 1]) + if (i > 1 && j > 1 && targetPoints.Ints[i - 1] == otherPoints.Ints[j - 2] && targetPoints.Ints[i - 2] == otherPoints.Ints[j - 1]) { d[i][j] = Math.Min(d[i][j], d[i - 2][j - 2] + cost); } @@ -122,13 +123,13 @@ namespace Lucene.Net.Search.Spell return 1.0f - ((float)d[n][m] / Math.Min(m, n)); } - private static IntsRef toIntsRef(string s) + private static IntsRef ToIntsRef(string s) { - IntsRef @ref = new IntsRef(s.Length); // worst case + var @ref = new IntsRef(s.Length); // worst case int utf16Len = s.Length; - for (int i = 0, cp = 0; i < utf16Len; i += char.charCount(cp)) + for (int i = 0, cp = 0; i < utf16Len; i += Character.CharCount(cp)) { - cp = @ref.ints[@ref.length++] = char.codePointAt(s, i); + cp = @ref.Ints[@ref.Length++] = Character.CodePointAt(s, i); } return @ref; } http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6d26b3c7/src/Lucene.Net.Suggest/Suggest/Analyzing/FreeTextSuggester.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Suggest/Suggest/Analyzing/FreeTextSuggester.cs b/src/Lucene.Net.Suggest/Suggest/Analyzing/FreeTextSuggester.cs index b430381..7a7aa40 100644 --- a/src/Lucene.Net.Suggest/Suggest/Analyzing/FreeTextSuggester.cs +++ b/src/Lucene.Net.Suggest/Suggest/Analyzing/FreeTextSuggester.cs @@ -13,6 +13,7 @@ using Lucene.Net.Util; using Lucene.Net.Util.Fst; using Directory = Lucene.Net.Store.Directory; using Version = Lucene.Net.Util.Version; +using Util = Lucene.Net.Util.Fst.Util; namespace Lucene.Net.Search.Suggest.Analyzing { @@ -254,7 +255,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing { // TODO: use ShingleAnalyzerWrapper? // Tack on ShingleFilter to the end, to generate token ngrams: - return new AnalyzerWrapperAnonymousInnerClassHelper(this, other.ReuseStrategy, other); + return new AnalyzerWrapperAnonymousInnerClassHelper(this, other.Strategy, other); } } @@ -263,7 +264,8 @@ namespace Lucene.Net.Search.Suggest.Analyzing private readonly FreeTextSuggester outerInstance; private readonly Analyzer other; - public AnalyzerWrapperAnonymousInnerClassHelper(FreeTextSuggester outerInstance, UnknownType getReuseStrategy, Analyzer other) : base(getReuseStrategy) + public AnalyzerWrapperAnonymousInnerClassHelper(FreeTextSuggester outerInstance, ReuseStrategy reuseStrategy, Analyzer other) + : base(reuseStrategy) { this.outerInstance = outerInstance; this.other = other; @@ -321,11 +323,11 @@ namespace Lucene.Net.Search.Suggest.Analyzing Directory dir = FSDirectory.Open(tempIndexPath); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_CURRENT, indexAnalyzer); - iwc.OpenMode = IndexWriterConfig.OpenMode.CREATE; + iwc.OpenMode = IndexWriterConfig.OpenMode_e.CREATE; iwc.RAMBufferSizeMB = ramBufferSizeMB; IndexWriter writer = new IndexWriter(dir, iwc); - FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); + var ft = new FieldType(TextField.TYPE_NOT_STORED); // TODO: if only we had IndexOptions.TERMS_ONLY... ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS; ft.OmitNorms = true; @@ -385,7 +387,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing totTokens += termsEnum.TotalTermFreq(); } - builder.Add(Util.ToIntsRef(term, scratchInts), EncodeWeight(termsEnum.TotalTermFreq())); + builder.Add(Lucene.Net.Util.Fst.Util.ToIntsRef(term, scratchInts), EncodeWeight(termsEnum.TotalTermFreq())); } fst = builder.Finish(); @@ -452,7 +454,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing { CodecUtil.CheckHeader(input, CODEC_NAME, VERSION_START, VERSION_START); count = input.ReadVLong(); - sbyte separatorOrig = input.ReadByte(); + var separatorOrig = (sbyte)input.ReadByte(); if (separatorOrig != separator) { throw new InvalidOperationException("separator=" + separator + " is incorrect: original model was built with separator=" + separatorOrig); @@ -464,37 +466,29 @@ namespace Lucene.Net.Search.Suggest.Analyzing } totTokens = input.ReadVLong(); - fst = new FST<>(input, PositiveIntOutputs.Singleton); + fst = new FST(input, PositiveIntOutputs.Singleton); return true; } - public override IList Lookup(string key, bool onlyMorePopular, int num) // ignored + public override IList DoLookup(string key, bool onlyMorePopular, int num) // ignored { - return Lookup(key, null, onlyMorePopular, num); + return DoLookup(key, null, onlyMorePopular, num); } /// /// Lookup, without any context. - public virtual IList Lookup(string key, int num) + public virtual IList DoLookup(string key, int num) { - return Lookup(key, null, true, num); + return DoLookup(key, null, true, num); } - public override IList Lookup(string key, HashSet contexts, bool onlyMorePopular, int num) // ignored + public override IList DoLookup(string key, HashSet contexts, bool onlyMorePopular, int num) // ignored { - try - { - return Lookup(key, contexts, num); - } - catch (IOException ioe) - { - // bogus: - throw new Exception(ioe); - } + return Lookup(key, contexts, num); } - public override long Count + public override long Count { get { @@ -604,9 +598,9 @@ namespace Lucene.Net.Search.Suggest.Analyzing lastTokens[0] = new BytesRef(); } - FST.Arc arc = new FST.Arc(); + var arc = new FST.Arc(); - FST.BytesReader bytesReader = fst.BytesReader; + var bytesReader = fst.BytesReader; // Try highest order models first, and if they return // results, return that; else, fallback: @@ -645,14 +639,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing // match the prefix portion exactly //Pair prefixOutput = null; long? prefixOutput = null; - try - { - prefixOutput = LookupPrefix(fst, bytesReader, token, arc); - } - catch (IOException bogus) - { - throw new Exception(bogus); - } + prefixOutput = LookupPrefix(fst, bytesReader, token, arc); //System.out.println(" prefixOutput=" + prefixOutput); if (prefixOutput == null) @@ -677,7 +664,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing if (token.Bytes[token.Offset + i] == separator) { BytesRef context = new BytesRef(token.Bytes, token.Offset, i); - long? output = Util.Get(fst, Util.ToIntsRef(context, new IntsRef())); + long? output = Util.Get(fst, Lucene.Net.Util.Fst.Util.ToIntsRef(context, new IntsRef())); Debug.Assert(output != null); contextCount = DecodeWeight(output); lastTokenFragment = new BytesRef(token.Bytes, token.Offset + i + 1, token.Length - i - 1); @@ -700,7 +687,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing CharsRef spare = new CharsRef(); // complete top-N - Util.TopResults completions = null; + Util.Fst.Util.TopResults completions = null; try { @@ -715,7 +702,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing // Must do num+seen.size() for queue depth because we may // reject up to seen.size() paths in acceptResult(): - Util.TopNSearcher searcher = new TopNSearcherAnonymousInnerClassHelper(this, fst, num, num + seen.Count, weightComparator, seen, finalLastToken); + Util.Fst.Util.TopNSearcher searcher = new TopNSearcherAnonymousInnerClassHelper(this, fst, num, num + seen.Count, weightComparator, seen, finalLastToken); // since this search is initialized with a single start node // it is okay to start with an empty input path here @@ -734,11 +721,11 @@ namespace Lucene.Net.Search.Suggest.Analyzing BytesRef suffix = new BytesRef(8); //System.out.println(" " + completions.length + " completions"); - foreach (Util.Result completion in completions) + foreach (Util.Fst.Util.Result completion in completions) { token.Length = prefixLength; // append suffix - Util.ToBytesRef(completion.Input, suffix); + Util.Fst.Util.ToBytesRef(completion.Input, suffix); token.Append(suffix); //System.out.println(" completion " + token.utf8ToString()); @@ -763,7 +750,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing seen.Add(BytesRef.DeepCopyOf(lastToken)); spare.Grow(token.Length); UnicodeUtil.UTF8toUTF16(token, spare); - LookupResult result = new LookupResult(spare.ToString(), (long)(long.MaxValue * backoff * ((double) decodeWeight(completion.Output)) / contextCount)); + LookupResult result = new LookupResult(spare.ToString(), (long)(long.MaxValue * backoff * ((double) DecodeWeight(completion.Output)) / contextCount)); results.Add(result); Debug.Assert(results.Count == seen.Count); //System.out.println(" add result=" + result); @@ -788,14 +775,14 @@ namespace Lucene.Net.Search.Suggest.Analyzing } } - private class TopNSearcherAnonymousInnerClassHelper : Util.TopNSearcher + private class TopNSearcherAnonymousInnerClassHelper : Util.Fst.Util.TopNSearcher { private readonly FreeTextSuggester outerInstance; private HashSet seen; private BytesRef finalLastToken; - public TopNSearcherAnonymousInnerClassHelper(FreeTextSuggester outerInstance, FST org.apache.lucene.search.suggest.fst, int num, UnknownType size, UnknownType weightComparator, HashSet seen, BytesRef finalLastToken) : base(org.apache.lucene.search.suggest.fst, num, size, weightComparator) + public TopNSearcherAnonymousInnerClassHelper(FreeTextSuggester outerInstance, FST fst, int num, UnknownType size, UnknownType weightComparator, HashSet seen, BytesRef finalLastToken) : base(org.apache.lucene.search.suggest.fst, num, size, weightComparator) { this.outerInstance = outerInstance; this.seen = seen; @@ -804,11 +791,11 @@ namespace Lucene.Net.Search.Suggest.Analyzing } - internal BytesRef scratchBytes; + private BytesRef scratchBytes; - protected internal override void addIfCompetitive(Util.FSTPath path) + protected override void AddIfCompetitive(Util.Fst.Util.FSTPath path) { - if (path.Arc.label != outerInstance.separator) + if (path.Arc.Label != outerInstance.separator) { //System.out.println(" keep path: " + Util.toBytesRef(path.input, new BytesRef()).utf8ToString() + "; " + path + "; arc=" + path.arc); base.AddIfCompetitive(path); @@ -819,16 +806,16 @@ namespace Lucene.Net.Search.Suggest.Analyzing } } - protected internal override bool AcceptResult(IntsRef input, long? output) + protected override bool AcceptResult(IntsRef input, long? output) { - Util.ToBytesRef(input, scratchBytes); + Util.Fst.Util.ToBytesRef(input, scratchBytes); finalLastToken.Grow(finalLastToken.length + scratchBytes.length); int lenSav = finalLastToken.length; - finalLastToken.append(scratchBytes); + finalLastToken.Append(scratchBytes); //System.out.println(" accept? input='" + scratchBytes.utf8ToString() + "'; lastToken='" + finalLastToken.utf8ToString() + "'; return " + (seen.contains(finalLastToken) == false)); bool ret = seen.Contains(finalLastToken) == false; - finalLastToken.length = lenSav; + finalLastToken.Length = lenSav; return ret; } } @@ -870,7 +857,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing /// /// cost -> weight //private long decodeWeight(Pair output) { - private long DecodeWeight(long? output) + private static long DecodeWeight(long? output) { Debug.Assert(output != null); return (int)(long.MaxValue - output); @@ -880,7 +867,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing private long? LookupPrefix(FST fst, FST.BytesReader bytesReader, BytesRef scratch, FST.Arc arc) //Bogus { - long? output = fst.outputs.NoOutput; + long? output = fst.Outputs.NoOutput; fst.GetFirstArc(arc); @@ -895,7 +882,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing } else { - output = fst.outputs.add(output, arc.output); + output = fst.Outputs.Add(output, arc.Output); } } @@ -904,13 +891,13 @@ namespace Lucene.Net.Search.Suggest.Analyzing internal static readonly IComparer weightComparator = new ComparatorAnonymousInnerClassHelper2(); - private class ComparatorAnonymousInnerClassHelper2 : IComparer + private sealed class ComparatorAnonymousInnerClassHelper2 : IComparer { public ComparatorAnonymousInnerClassHelper2() { } - public virtual int Compare(long? left, long? right) + public int Compare(long? left, long? right) { return left.CompareTo(right); } @@ -922,7 +909,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing /// public virtual object Get(string key) { - throw new System.NotSupportedException(); + throw new NotSupportedException(); } } } \ No newline at end of file