lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nightowl...@apache.org
Subject [4/6] lucenenet git commit: BUG: Lucene.Net.Core.Util.Automaton.RegExp.Peek(): Method not taking into account surrogate pairs. Created an IndexOf extension method overload for string that accepts a codePoint, similar to Java's String class.
Date Sun, 26 Mar 2017 03:52:46 GMT
BUG: Lucene.Net.Core.Util.Automaton.RegExp.Peek(): Method not taking into account surrogate
pairs. Created an IndexOf extension method overload for string that accepts a codePoint, similar
to Java's String class.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/b3940f2e
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/b3940f2e
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/b3940f2e

Branch: refs/heads/api-work
Commit: b3940f2e41076a67b9588170be9a0017a328b9e4
Parents: fbfcb81
Author: Shad Storhaug <shad@shadstorhaug.com>
Authored: Sun Mar 26 06:33:27 2017 +0700
Committer: Shad Storhaug <shad@shadstorhaug.com>
Committed: Sun Mar 26 08:54:09 2017 +0700

----------------------------------------------------------------------
 src/Lucene.Net.Core/Support/StringExtensions.cs | 34 ++++++++++++++++++++
 src/Lucene.Net.Core/Util/Automaton/RegExp.cs    | 10 +++---
 .../Search/TestDocTermOrdsRewriteMethod.cs      |  8 ++---
 3 files changed, 41 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b3940f2e/src/Lucene.Net.Core/Support/StringExtensions.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Support/StringExtensions.cs b/src/Lucene.Net.Core/Support/StringExtensions.cs
index 8405a90..41aa6a3 100644
--- a/src/Lucene.Net.Core/Support/StringExtensions.cs
+++ b/src/Lucene.Net.Core/Support/StringExtensions.cs
@@ -85,5 +85,39 @@ namespace Lucene.Net.Support
         {
             return new StringCharSequenceWrapper(str);
         }
+
+        /// <summary>
+        /// Returns the index within this string of the first occurrence of the
+        /// specified <paramref name="codePoint"/>.
+        /// </summary>
+        /// <param name="str">this string</param>
+        /// <param name="codePoint">a codePoint representing a single character or
surrogate pair</param>
+        /// <returns>the index of the first occurrence of the character (or surrogate
pair) in the string, 
+        /// or <c>-1</c> if the character (or surrogate pair) doesn't occur.</returns>
+        public static int IndexOf(this string str, int codePoint)
+        {
+            if (codePoint >= 0 && codePoint < Character.MIN_SUPPLEMENTARY_CODE_POINT)
+            {
+                // handle most cases here (codePoint is a BMP code point)
+                return str.IndexOf((char)codePoint);
+            }
+            else if (codePoint >= Character.MIN_CODE_POINT && codePoint <=
Character.MAX_CODE_POINT)
+            {
+                // codePoint is a surogate pair
+                char[] pair = Character.ToChars(codePoint);
+                char hi = pair[0];
+                char lo = pair[1];
+                for (int i = 0; i < str.Length - 1; i++)
+                {
+                    if (str[i] == hi && str[i + 1] == lo)
+                    {
+                        return i;
+                    }
+                }
+            }
+
+            // codePoint is negative or not found in string
+            return -1;
+        }
     }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b3940f2e/src/Lucene.Net.Core/Util/Automaton/RegExp.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Util/Automaton/RegExp.cs b/src/Lucene.Net.Core/Util/Automaton/RegExp.cs
index 868f8f0..fc07d45 100644
--- a/src/Lucene.Net.Core/Util/Automaton/RegExp.cs
+++ b/src/Lucene.Net.Core/Util/Automaton/RegExp.cs
@@ -701,11 +701,11 @@ namespace Lucene.Net.Util.Automaton
                     break;
 
                 case Kind.REGEXP_CHAR:
-                    b.Append("\\").Append(Character.ToChars(c));
+                    b.Append("\\").AppendCodePoint(c);
                     break;
 
                 case Kind.REGEXP_CHAR_RANGE:
-                    b.Append("[\\").Append(Character.ToChars(from)).Append("-\\").Append(Character.ToChars(to)).Append("]");
+                    b.Append("[\\").AppendCodePoint(from).Append("-\\").AppendCodePoint(to).Append("]");
                     break;
 
                 case Kind.REGEXP_ANYCHAR:
@@ -835,7 +835,7 @@ namespace Lucene.Net.Util.Automaton
             }
             else
             {
-                b.Append(Character.ToChars(exp1.c));
+                b.AppendCodePoint(exp1.c);
             }
             if (exp2.kind == Kind.REGEXP_STRING)
             {
@@ -843,7 +843,7 @@ namespace Lucene.Net.Util.Automaton
             }
             else
             {
-                b.Append(Character.ToChars(exp2.c));
+                b.AppendCodePoint(exp2.c);
             }
             return MakeString(b.ToString());
         }
@@ -970,7 +970,7 @@ namespace Lucene.Net.Util.Automaton
 
         private bool Peek(string s)
         {
-            return More() && s.IndexOf((char)Character.CodePointAt(b, pos)) != -1;
+            return More() && s.IndexOf(b.CodePointAt(pos)) != -1;
         }
 
         private bool Match(int c)

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b3940f2e/src/Lucene.Net.Tests/Search/TestDocTermOrdsRewriteMethod.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/Search/TestDocTermOrdsRewriteMethod.cs b/src/Lucene.Net.Tests/Search/TestDocTermOrdsRewriteMethod.cs
index 2923d51..9273365 100644
--- a/src/Lucene.Net.Tests/Search/TestDocTermOrdsRewriteMethod.cs
+++ b/src/Lucene.Net.Tests/Search/TestDocTermOrdsRewriteMethod.cs
@@ -1,4 +1,4 @@
-using System;
+´╗┐using System;
 using System.Collections.Generic;
 using Lucene.Net.Attributes;
 using Lucene.Net.Documents;
@@ -116,11 +116,7 @@ namespace Lucene.Net.Search
 
         /// <summary>
         /// test a bunch of random regular expressions </summary>
-#if !NETSTANDARD
-        // LUCENENET: There is no Timeout on NUnit for .NET Core.
-        [Timeout(60000)]
-#endif
-        [Test, HasTimeout]
+        [Test]
         public virtual void TestRegexps()
         {
             int num = AtLeast(1000);


Mime
View raw message