lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject [3/5] lucenenet git commit: fixes for handling surrogate chars properly
Date Wed, 31 Dec 2014 08:39:46 GMT
fixes for handling surrogate chars properly


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/645b21d1
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/645b21d1
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/645b21d1

Branch: refs/heads/master
Commit: 645b21d198523f984c71da307beab445285dd879
Parents: aa1f8b4
Author: Laimonas Simutis <laimis@gmail.com>
Authored: Tue Dec 30 22:03:10 2014 -0500
Committer: Laimonas Simutis <laimis@gmail.com>
Committed: Tue Dec 30 22:03:10 2014 -0500

----------------------------------------------------------------------
 src/Lucene.Net.Core/Support/Character.cs | 18 +++++++++++-------
 src/Lucene.Net.Core/Util/UnicodeUtil.cs  | 18 +++++++++++++++++-
 2 files changed, 28 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/645b21d1/src/Lucene.Net.Core/Support/Character.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Support/Character.cs b/src/Lucene.Net.Core/Support/Character.cs
index 1bf1257..fc90896 100644
--- a/src/Lucene.Net.Core/Support/Character.cs
+++ b/src/Lucene.Net.Core/Support/Character.cs
@@ -80,15 +80,16 @@ namespace Lucene.Net.Support
 
         public static int ToChars(int codePoint, char[] dst, int dstIndex)
         {
-            // .NET Port: we don't have to do anything funky with surrogates here. chars
are always UTF-16.
-            dst[dstIndex] = (char)codePoint;
-            return 1; // always 1 char written in .NET
+            var converted = UnicodeUtil.ToCharArray(new[] {codePoint}, 0, 1);
+
+            Array.Copy(converted, 0, dst, dstIndex, converted.Length);
+
+            return converted.Length;
         }
 
         public static char[] ToChars(int codePoint)
         {
-            // .NET Port: we don't have to do anything funky with surrogates here. chars
are always UTF-16.
-            return new[] { (char)codePoint };
+            return UnicodeUtil.ToCharArray(new[] {codePoint}, 0, 1);
         }
 
         public static int ToCodePoint(char high, char low)
@@ -104,8 +105,11 @@ namespace Lucene.Net.Support
 
         public static int ToLowerCase(int codePoint)
         {
-            // .NET Port: chars are always UTF-16 in .NET
-            return (int)char.ToLower((char)codePoint);
+            var str = UnicodeUtil.NewString(new[] {codePoint}, 0, 1);
+
+            str = str.ToLower();
+
+            return CodePointAt(str, 0);
         }
 
         public static int CharCount(int codePoint)

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/645b21d1/src/Lucene.Net.Core/Util/UnicodeUtil.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Util/UnicodeUtil.cs b/src/Lucene.Net.Core/Util/UnicodeUtil.cs
index 931142f..8623ca4 100644
--- a/src/Lucene.Net.Core/Util/UnicodeUtil.cs
+++ b/src/Lucene.Net.Core/Util/UnicodeUtil.cs
@@ -538,6 +538,19 @@ namespace Lucene.Net.Util
         /// <exception cref="IndexOutOfBoundsException"> If the offset or count are
out of bounds. </exception>
         public static string NewString(int[] codePoints, int offset, int count)
         {
+            var chars = ToCharArray(codePoints, offset, count);
+            return new string(chars);
+        }
+
+        /// <summary>
+        /// Generates char array that represents the provided input code points
+        /// </summary>
+        /// <param name="codePoints"> The code array </param>
+        /// <param name="offset"> The start of the text in the code point array </param>
+        /// <param name="count"> The number of code points </param>
+        /// <returns> a char array representing the code points between offset and
count </returns>
+        public static char[] ToCharArray(int[] codePoints, int offset, int count)
+        {
             if (count < 0)
             {
                 throw new System.ArgumentException();
@@ -577,7 +590,10 @@ namespace Lucene.Net.Util
                     }
                 }
             }
-            return new string(chars, 0, w);
+
+            var result = new char[w];
+            Array.Copy(chars, result, w);
+            return result;
         }
 
         // for debugging


Mime
View raw message