lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nightowl...@apache.org
Subject [14/50] [abbrv] lucenenet git commit: Lucene.Net.Core.Support.StringBuilderExtensions: Reverted Reverse() method to original Java implementation (faster), and added CodePointCount and GetChars() methods
Date Tue, 31 Jan 2017 17:55:47 GMT
Lucene.Net.Core.Support.StringBuilderExtensions: Reverted Reverse() method to original Java
implementation (faster), and added CodePointCount and GetChars() methods


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/8bb1cc92
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/8bb1cc92
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/8bb1cc92

Branch: refs/heads/api-work
Commit: 8bb1cc9209b39ab97b938f2c491a803d9f66b882
Parents: 6032fd7
Author: Shad Storhaug <shad@shadstorhaug.com>
Authored: Tue Jan 31 12:48:02 2017 +0700
Committer: Shad Storhaug <shad@shadstorhaug.com>
Committed: Tue Jan 31 12:48:02 2017 +0700

----------------------------------------------------------------------
 .../Support/StringBuilderExtensions.cs          | 111 +++++++++++++++----
 .../Suggest/Jaspell/JaspellTernarySearchTrie.cs |   5 +-
 2 files changed, 92 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8bb1cc92/src/Lucene.Net.Core/Support/StringBuilderExtensions.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Support/StringBuilderExtensions.cs b/src/Lucene.Net.Core/Support/StringBuilderExtensions.cs
index 1eb18f1..5292e0c 100644
--- a/src/Lucene.Net.Core/Support/StringBuilderExtensions.cs
+++ b/src/Lucene.Net.Core/Support/StringBuilderExtensions.cs
@@ -1,34 +1,63 @@
-´╗┐using System.Globalization;
+´╗┐using System;
 using System.Text;
 
 namespace Lucene.Net.Support
 {
     public static class StringBuilderExtensions
     {
-        public static StringBuilder Reverse(this StringBuilder text) // LUCENENET TODO: The
reverse is in-place. Returning the StringBuilder makes this confusing.
+        /// <summary>
+        /// Causes this character sequence to be replaced by the reverse of
+        /// the sequence. If there are any surrogate pairs included in the
+        /// sequence, these are treated as single characters for the
+        /// reverse operation. Thus, the order of the high-low surrogates
+        /// is never reversed.
+        /// <para/>
+        /// Let <c>n</c> be the character length of this character sequence
+        /// (not the length in <see cref="char"/> values) just prior to
+        /// execution of the <see cref="Reverse"/> method. Then the
+        /// character at index <c>k</c> in the new character sequence is
+        /// equal to the character at index <c>n-k-1</c> in the old
+        /// character sequence.
+        /// <para/>
+        /// Note that the reverse operation may result in producing
+        /// surrogate pairs that were unpaired low-surrogates and
+        /// high-surrogates before the operation. For example, reversing
+        /// "&#92;uDC00&#92;uD800" produces "&#92;uD800&#92;uDC00" which
is
+        /// a valid surrogate pair.
+        /// </summary>
+        /// <param name="text">this <see cref="StringBuilder"/></param>
+        /// <returns>a reference to this <see cref="StringBuilder"/>.</returns>
+        public static StringBuilder Reverse(this StringBuilder text)
         {
-            int textLength = text.Length;
-            if (textLength > 1)
+            bool hasSurrogate = false;
+            int codePointCount = text.Length;
+            int n = text.Length - 1;
+            for (int j = (n - 1) >> 1; j >= 0; --j)
             {
-                // Pull the string out of the StringBuilder so we
-                // can work with the various text elements (chars, glyphs, graphemes, etc)
-                // and reverse the order of the string without reversing chars that need
to be
-                // in a specific order to represent the same text as the forward string.
-                // Reference: http://stackoverflow.com/a/36310993/181087
-                int offset = textLength;
-                var enumerator = StringInfo.GetTextElementEnumerator(text.ToString());
-                while (enumerator.MoveNext())
+                char temp = text[j];
+                char temp2 = text[n - j];
+                if (!hasSurrogate)
                 {
-                    string element = enumerator.GetTextElement();
-
-                    // Back up the current offset by the length of the element
-                    offset -= element.Length;
-
-                    for (int i = 0; i < element.Length; i++)
+                    hasSurrogate = (temp >= Character.MIN_SURROGATE && temp <=
Character.MAX_SURROGATE)
+                        || (temp2 >= Character.MIN_SURROGATE && temp2 <= Character.MAX_SURROGATE);
+                }
+                text[j] = temp2;
+                text[n - j] = temp;
+            }
+            if (hasSurrogate)
+            {
+                // Reverse back all valid surrogate pairs
+                for (int i = 0; i < text.Length - 1; i++)
+                {
+                    char c2 = text[i];
+                    if (char.IsLowSurrogate(c2))
                     {
-                        // Write the chars in forward order from the element
-                        // to the StringBuilder based on the offset.
-                        text[i + offset] = element[i];
+                        char c1 = text[i + 1];
+                        if (char.IsHighSurrogate(c1))
+                        {
+                            text[i++] = c1;
+                            text[i] = c2;
+                        }
                     }
                 }
             }
@@ -37,6 +66,46 @@ namespace Lucene.Net.Support
         }
 
         /// <summary>
+        /// Returns the number of Unicode code points in the specified text
+        /// range of this <see cref="StringBuilder"/>. The text range begins at the
specified
+        /// <paramref name="beginIndex"/> and extends to the <see cref="char"/>
at
+        /// index <c>endIndex - 1</c>. Thus the length (in
+        /// <see cref="char"/>s) of the text range is
+        /// <c>endIndex-beginIndex</c>. Unpaired surrogates within
+        /// this sequence count as one code point each.
+        /// </summary>
+        /// <param name="text">this <see cref="StringBuilder"/></param>
+        /// <param name="beginIndex">the index to the first <see cref="char"/>
of the text range.</param>
+        /// <param name="endIndex">the index after the last <see cref="char"/>
of the text range.</param>
+        /// <returns>the number of Unicode code points in the specified text range.</returns>
+        /// <exception cref="IndexOutOfRangeException">
+        /// if the <paramref name="beginIndex"/> is negative, or <paramref name="endIndex"/>
+        /// is larger than the length of this sequence, or
+        /// <paramref name="beginIndex"/> is larger than <paramref name="endIndex"/>.
+        /// </exception>
+        public static int CodePointCount(this StringBuilder text, int beginIndex, int endIndex)
+        {
+            if (beginIndex < 0 || endIndex > text.Length || beginIndex > endIndex)
+            {
+                throw new IndexOutOfRangeException();
+            }
+            return Character.CodePointCountImpl(text.GetChars(), beginIndex, endIndex - beginIndex);
+        }
+
+        /// <summary>
+        /// Copies the array from the <see cref="StringBuilder"/> into a new array
+        /// and returns it.
+        /// </summary>
+        /// <param name="text">this <see cref="StringBuilder"/></param>
+        /// <returns></returns>
+        public static char[] GetChars(this StringBuilder text)
+        {
+            char[] chars = new char[text.Length];
+            text.CopyTo(0, chars, 0, text.Length);
+            return chars;
+        }
+
+        /// <summary>
         /// Appends the string representation of the <paramref name="codePoint"/>
         /// argument to this sequence.
         /// 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8bb1cc92/src/Lucene.Net.Suggest/Suggest/Jaspell/JaspellTernarySearchTrie.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Suggest/Suggest/Jaspell/JaspellTernarySearchTrie.cs b/src/Lucene.Net.Suggest/Suggest/Jaspell/JaspellTernarySearchTrie.cs
index 11fb8ca..98a91fb 100644
--- a/src/Lucene.Net.Suggest/Suggest/Jaspell/JaspellTernarySearchTrie.cs
+++ b/src/Lucene.Net.Suggest/Suggest/Jaspell/JaspellTernarySearchTrie.cs
@@ -549,9 +549,8 @@ namespace Lucene.Net.Search.Suggest.Jaspell
                 currentNode = currentNode.relatives[TSTNode.PARENT];
             }
 
-            // LUCENENET NOTE: Reverse doesn't happen in place in a .NET StringBuilder,
-            // so we need to return the reversed result.
-            return getKeyBuffer.Reverse().ToString();
+            getKeyBuffer.Reverse();
+            return getKeyBuffer.ToString();
         }
 
         /// <summary>


Mime
View raw message