lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nightowl...@apache.org
Subject [10/15] lucenenet git commit: BUG: Lucene.Net.Core.Util.UnicodeUtil: counter for char array size not accurate and causing Lucene.Net.Misc.Util.Fst.TestFSTsMisc.TestRandomWords() to fail. Changed initial array size to count * 2 and removed resize logic, s
Date Fri, 24 Mar 2017 02:02:42 GMT
BUG: Lucene.Net.Core.Util.UnicodeUtil: counter for char array size not accurate and causing
Lucene.Net.Misc.Util.Fst.TestFSTsMisc.TestRandomWords() to fail. Changed initial array size
to count * 2 and removed resize logic, since we trim off the excess anyway at the end. Also
added a threashhold of 1024 - if the count is greater than this we do a pre-loop to determine
the exact amount of memory to allocate.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/631cfa7a
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/631cfa7a
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/631cfa7a

Branch: refs/heads/api-work
Commit: 631cfa7ad37187c918b068fef77797fffd0bfff8
Parents: f5d02d6
Author: Shad Storhaug <shad@shadstorhaug.com>
Authored: Fri Mar 24 06:24:41 2017 +0700
Committer: Shad Storhaug <shad@shadstorhaug.com>
Committed: Fri Mar 24 06:28:21 2017 +0700

----------------------------------------------------------------------
 src/Lucene.Net.Core/Util/UnicodeUtil.cs | 35 +++++++++++++++++-----------
 1 file changed, 22 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/631cfa7a/src/Lucene.Net.Core/Util/UnicodeUtil.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Util/UnicodeUtil.cs b/src/Lucene.Net.Core/Util/UnicodeUtil.cs
index c3bb61e..31a998c 100644
--- a/src/Lucene.Net.Core/Util/UnicodeUtil.cs
+++ b/src/Lucene.Net.Core/Util/UnicodeUtil.cs
@@ -715,15 +715,34 @@ namespace Lucene.Net.Util
         /// <returns> a char array representing the code points between offset and
count </returns>
         // LUCENENET NOTE: This code was originally in the NewString() method (above).
         // It has been refactored from the original to remove the exception throw/catch and
-        // instead proactively resizes the array instead of relying on excpetions.
+        // instead proactively resizes the array instead of relying on excpetions + copy
operations
         public static char[] ToCharArray(int[] codePoints, int offset, int count)
         {
             if (count < 0)
             {
                 throw new System.ArgumentException();
             }
-            // LUCENENET: as a first approximation, assume each codepoint is 1 character
-            char[] chars = new char[count];
+            int countThreashhold = 1024; // If the number of chars exceeds this, we count
them instead of allocating count * 2
+            // LUCENENET: as a first approximation, assume each codepoint 
+            // is 2 characters (since it cannot be longer than this)
+            int arrayLength = count * 2;
+            // LUCENENET: if we go over the threashhold, count the number of 
+            // chars we will need so we can allocate the precise amount of memory
+            if (count > countThreashhold)
+            {
+                arrayLength = 0;
+                for (int r = offset, e = offset + count; r < e; ++r)
+                {
+                    arrayLength += codePoints[r] < 0x010000 ? 1 : 2;
+                }
+                if (arrayLength < 1)
+                {
+                    arrayLength = count * 2;
+                }
+            }
+            // Initialize our array to our exact or oversized length.
+            // It is now safe to assume we have enough space for all of the characters.
+            char[] chars = new char[arrayLength];
             int w = 0;
             for (int r = offset, e = offset + count; r < e; ++r)
             {
@@ -739,18 +758,8 @@ namespace Lucene.Net.Util
                 else
                 {
                     chars[w++] = (char)(LEAD_SURROGATE_OFFSET_ + (cp >> LEAD_SURROGATE_SHIFT_));
-                    // LUCENENET: resize to the exact length: it's slightly faster to check
if the resize is needed
-                    if (w >= chars.Length)
-                    {
-                        Array.Resize(ref chars, chars.Length + (e - r) * 2 - 1);
-                    }
                     chars[w++] = (char)(TRAIL_SURROGATE_MIN_VALUE + (cp & TRAIL_SURROGATE_MASK_));
                 }
-                // LUCENENET: resize to the exact length: it's slightly faster to check if
the resize is needed
-                if (w != chars.Length)
-                {
-                    Array.Resize(ref chars, w);
-                }
             }
 
             var result = new char[w];


Mime
View raw message