lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nightowl...@apache.org
Subject [09/15] lucenenet git commit: PERFORMANCE: Lucene.Net.Core.Util.Unicode.Util.ToCharArray(): Optimized method by removing reliance on exceptions and instead proactively resize the array to accommodate any surrogate pairs. Solution provided by Vincent Van
Date Fri, 24 Mar 2017 02:02:41 GMT
PERFORMANCE: Lucene.Net.Core.Util.Unicode.Util.ToCharArray(): Optimized method by removing
reliance on exceptions and instead proactively resize the array to accommodate any surrogate
pairs. Solution provided by Vincent Van Den Berghe.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/f5d02d6d
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/f5d02d6d
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/f5d02d6d

Branch: refs/heads/api-work
Commit: f5d02d6df1913835b077336120b9dbd4f0f4fb56
Parents: 4ed5d33
Author: Shad Storhaug <shad@shadstorhaug.com>
Authored: Fri Mar 24 01:51:37 2017 +0700
Committer: Shad Storhaug <shad@shadstorhaug.com>
Committed: Fri Mar 24 01:51:37 2017 +0700

----------------------------------------------------------------------
 src/Lucene.Net.Core/Util/UnicodeUtil.cs | 46 +++++++++++++---------------
 1 file changed, 22 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f5d02d6d/src/Lucene.Net.Core/Util/UnicodeUtil.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Util/UnicodeUtil.cs b/src/Lucene.Net.Core/Util/UnicodeUtil.cs
index 2f9833c..c3bb61e 100644
--- a/src/Lucene.Net.Core/Util/UnicodeUtil.cs
+++ b/src/Lucene.Net.Core/Util/UnicodeUtil.cs
@@ -120,7 +120,7 @@ namespace Lucene.Net.Util
         private const long HALF_SHIFT = 10;
         private const long HALF_MASK = 0x3FFL;
 
-        private const int SURROGATE_OFFSET = Character.MIN_SUPPLEMENTARY_CODE_POINT - (UNI_SUR_HIGH_START
<< (int) HALF_SHIFT) - UNI_SUR_LOW_START;
+        private const int SURROGATE_OFFSET = Character.MIN_SUPPLEMENTARY_CODE_POINT - (UNI_SUR_HIGH_START
<< (int)HALF_SHIFT) - UNI_SUR_LOW_START;
 
         /// <summary>
         /// Encode characters from a char[] source, starting at
@@ -495,7 +495,7 @@ namespace Lucene.Net.Util
             return true;
         }
 
-        public static bool ValidUTF16String(char[] s, int size) 
+        public static bool ValidUTF16String(char[] s, int size)
         {
             for (int i = 0; i < size; i++)
             {
@@ -700,7 +700,7 @@ namespace Lucene.Net.Util
         /// <exception cref="IndexOutOfBoundsException"> If the offset or count are
out of bounds. </exception>
         public static string NewString(int[] codePoints, int offset, int count)
         {
-            var chars = ToCharArray(codePoints, offset, count);
+            char[] chars = ToCharArray(codePoints, offset, count);
             return new string(chars);
         }
 
@@ -713,12 +713,16 @@ namespace Lucene.Net.Util
         /// <param name="offset"> The start of the text in the code point array </param>
         /// <param name="count"> The number of code points </param>
         /// <returns> a char array representing the code points between offset and
count </returns>
+        // LUCENENET NOTE: This code was originally in the NewString() method (above).
+        // It has been refactored from the original to remove the exception throw/catch and
+        // instead proactively resizes the array instead of relying on excpetions.
         public static char[] ToCharArray(int[] codePoints, int offset, int count)
         {
             if (count < 0)
             {
                 throw new System.ArgumentException();
             }
+            // LUCENENET: as a first approximation, assume each codepoint is 1 character
             char[] chars = new char[count];
             int w = 0;
             for (int r = offset, e = offset + count; r < e; ++r)
@@ -728,30 +732,24 @@ namespace Lucene.Net.Util
                 {
                     throw new System.ArgumentException();
                 }
-                while (true)
+                if (cp < 0x010000)
                 {
-                    try
-                    {
-                        if (cp < 0x010000)
-                        {
-                            chars[w] = (char)cp;
-                            w++;
-                        }
-                        else
-                        {
-                            chars[w] = (char)(LEAD_SURROGATE_OFFSET_ + (cp >> LEAD_SURROGATE_SHIFT_));
-                            chars[w + 1] = (char)(TRAIL_SURROGATE_MIN_VALUE + (cp & TRAIL_SURROGATE_MASK_));
-                            w += 2;
-                        }
-                        break;
-                    }
-                    catch (System.IndexOutOfRangeException)
+                    chars[w++] = (char)cp;
+                }
+                else
+                {
+                    chars[w++] = (char)(LEAD_SURROGATE_OFFSET_ + (cp >> LEAD_SURROGATE_SHIFT_));
+                    // LUCENENET: resize to the exact length: it's slightly faster to check
if the resize is needed
+                    if (w >= chars.Length)
                     {
-                        int newlen = (int)(Math.Ceiling((double)codePoints.Length * (w +
2) / (r - offset + 1)));
-                        char[] temp = new char[newlen];
-                        Array.Copy(chars, 0, temp, 0, w);
-                        chars = temp;
+                        Array.Resize(ref chars, chars.Length + (e - r) * 2 - 1);
                     }
+                    chars[w++] = (char)(TRAIL_SURROGATE_MIN_VALUE + (cp & TRAIL_SURROGATE_MASK_));
+                }
+                // LUCENENET: resize to the exact length: it's slightly faster to check if
the resize is needed
+                if (w != chars.Length)
+                {
+                    Array.Resize(ref chars, w);
                 }
             }
 


Mime
View raw message