lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nightowl...@apache.org
Subject [33/50] [abbrv] lucenenet git commit: BUG: Lucene.Net.Analysis.Common.Analysis.Util.CharacterUtils: Fixed backward compatibility support (broken Unicode) for Lucene 3.0. Fixes the TestCharArraySet.TestSupplementaryCharsBWCompat() and TestCharArraySet.Te
Date Wed, 15 Mar 2017 23:50:15 GMT
BUG: Lucene.Net.Analysis.Common.Analysis.Util.CharacterUtils: Fixed backward compatibility
support (broken Unicode) for Lucene 3.0.  Fixes the TestCharArraySet.TestSupplementaryCharsBWCompat()
and TestCharArraySet.TestSingleHighSurrogateBWComapt() tests.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/7c1f7523
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/7c1f7523
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/7c1f7523

Branch: refs/heads/api-work
Commit: 7c1f7523613b410de4dc2ff67b1c287374efe553
Parents: 331fb47
Author: Shad Storhaug <shad@shadstorhaug.com>
Authored: Wed Mar 15 18:47:10 2017 +0700
Committer: Shad Storhaug <shad@shadstorhaug.com>
Committed: Wed Mar 15 18:47:10 2017 +0700

----------------------------------------------------------------------
 .../Analysis/NGram/EdgeNGramTokenFilter.cs      |  2 +-
 .../Analysis/NGram/NGramTokenFilter.cs          |  2 +-
 .../Analysis/NGram/NGramTokenizer.cs            |  2 +-
 .../Analysis/Util/CharacterUtils.cs             | 60 +++++++++++++++++---
 .../Analysis/Util/TestCharArraySet.cs           |  4 +-
 .../Analysis/Util/TestCharacterUtils.cs         |  6 +-
 6 files changed, 59 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7c1f7523/src/Lucene.Net.Analysis.Common/Analysis/NGram/EdgeNGramTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/NGram/EdgeNGramTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/NGram/EdgeNGramTokenFilter.cs
index 8cf8172..bc5d4cc 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/NGram/EdgeNGramTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/NGram/EdgeNGramTokenFilter.cs
@@ -125,7 +125,7 @@ namespace Lucene.Net.Analysis.NGram
             }
 
             this.version = version;
-            this.charUtils = version.OnOrAfter(LuceneVersion.LUCENE_44) ? CharacterUtils.GetInstance(version)
: CharacterUtils.Java4Instance;
+            this.charUtils = version.OnOrAfter(LuceneVersion.LUCENE_44) ? CharacterUtils.GetInstance(version)
: CharacterUtils.GetJava4Instance(version);
             this.minGram = minGram;
             this.maxGram = maxGram;
             this.side = side;

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7c1f7523/src/Lucene.Net.Analysis.Common/Analysis/NGram/NGramTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/NGram/NGramTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/NGram/NGramTokenFilter.cs
index f1c82c5..2b0af35 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/NGram/NGramTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/NGram/NGramTokenFilter.cs
@@ -85,7 +85,7 @@ namespace Lucene.Net.Analysis.NGram
 #pragma warning disable 612, 618
                 LuceneVersion.LUCENE_44) ?
 #pragma warning restore 612, 618
-                CharacterUtils.GetInstance(version) : CharacterUtils.Java4Instance;
+                CharacterUtils.GetInstance(version) : CharacterUtils.GetJava4Instance(version);
             if (minGram < 1)
             {
                 throw new System.ArgumentException("minGram must be greater than zero");

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7c1f7523/src/Lucene.Net.Analysis.Common/Analysis/NGram/NGramTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/NGram/NGramTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/NGram/NGramTokenizer.cs
index b1845c8..0fe3792 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/NGram/NGramTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/NGram/NGramTokenizer.cs
@@ -180,7 +180,7 @@ namespace Lucene.Net.Analysis.NGram
 #pragma warning disable 612, 618
             charUtils = version.OnOrAfter(LuceneVersion.LUCENE_44) ?
 #pragma warning restore 612, 618
-                CharacterUtils.GetInstance(version) : CharacterUtils.Java4Instance;
+                CharacterUtils.GetInstance(version) : CharacterUtils.GetJava4Instance(version);
             if (minGram < 1)
             {
                 throw new System.ArgumentException("minGram must be greater than zero");

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7c1f7523/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs
index 19097e0..9802ba3 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs
@@ -33,6 +33,11 @@ namespace Lucene.Net.Analysis.Util
     /// </summary>
     public abstract class CharacterUtils
     {
+        // LUCENENET specific class for supporting broken Unicode support in Lucene 3.0.
+        // See the TestCharArraySet.TestSupplementaryCharsBWCompat()
+        // and TestCharArraySet.TestSingleHighSurrogateBWComapt() tests.
+        private static readonly CharacterUtils JAVA_4_BW_COMPAT = new Java4CharacterUtilsBWCompatibility();
+
         private static readonly CharacterUtils JAVA_4 = new Java4CharacterUtils();
         private static readonly CharacterUtils JAVA_5 = new Java5CharacterUtils();
 
@@ -47,18 +52,17 @@ namespace Lucene.Net.Analysis.Util
         public static CharacterUtils GetInstance(LuceneVersion matchVersion)
         {
 #pragma warning disable 612, 618
-            return matchVersion.OnOrAfter(LuceneVersion.LUCENE_31) ? JAVA_5 : JAVA_4;
+            return matchVersion.OnOrAfter(LuceneVersion.LUCENE_31) 
+                ? JAVA_5 
+                : JAVA_4_BW_COMPAT;
 #pragma warning restore 612, 618
         }
 
         /// <summary>
         /// Return a <see cref="CharacterUtils"/> instance compatible with Java 1.4.
</summary>
-        public static CharacterUtils Java4Instance
+        public static CharacterUtils GetJava4Instance(LuceneVersion matchVersion) // LUCENENET
specific - added matchVersion parameter so we can support backward compatible Unicode support
         {
-            get
-            {
-                return JAVA_4;
-            }
+            return matchVersion.OnOrAfter(LuceneVersion.LUCENE_31) ? JAVA_4 : JAVA_4_BW_COMPAT;
         }
 
         /// <summary>
@@ -133,7 +137,7 @@ namespace Lucene.Net.Analysis.Util
         /// <param name="buffer"> the char buffer to lowercase </param>
         /// <param name="offset"> the offset to start at </param>
         /// <param name="limit"> the max char in the buffer to lower case </param>
-        public void ToLower(char[] buffer, int offset, int limit)
+        public virtual void ToLower(char[] buffer, int offset, int limit) // LUCENENET specific
- marked virtual so we can override the default
         {
             Debug.Assert(buffer.Length >= limit);
             Debug.Assert(offset <= 0 && offset <= buffer.Length);
@@ -144,6 +148,7 @@ namespace Lucene.Net.Analysis.Util
                 .ToLowerInvariant()
                 .CopyTo(0, buffer, offset, limit);
 
+            // Original (slow) Lucene implementation:
             //for (int i = offset; i < limit; )
             //{
             //    i += Character.ToChars(
@@ -158,7 +163,7 @@ namespace Lucene.Net.Analysis.Util
         /// <param name="buffer"> the char buffer to UPPERCASE </param>
         /// <param name="offset"> the offset to start at </param>
         /// <param name="limit"> the max char in the buffer to lower case </param>
-        public void ToUpper(char[] buffer, int offset, int limit)
+        public virtual void ToUpper(char[] buffer, int offset, int limit) // LUCENENET specific
- marked virtual so we can override the default
         {
             Debug.Assert(buffer.Length >= limit);
             Debug.Assert(offset <= 0 && offset <= buffer.Length);
@@ -169,6 +174,7 @@ namespace Lucene.Net.Analysis.Util
                 .ToUpperInvariant()
                 .CopyTo(0, buffer, offset, limit);
 
+            // Original (slow) Lucene implementation:
             //for (int i = offset; i < limit; )
             //{
             //    i += Character.ToChars(
@@ -346,7 +352,10 @@ namespace Lucene.Net.Analysis.Util
             }
         }
 
-        private sealed class Java4CharacterUtils : CharacterUtils
+        // LUCENENET specific - not sealed so we can make another override to handle BW compatibility
+        // with broken unicode support (Lucene 3.0). See the TestCharArraySet.TestSupplementaryCharsBWCompat()
+        // and TestCharArraySet.TestSingleHighSurrogateBWComapt() tests.
+        private class Java4CharacterUtils : CharacterUtils
         {
             public override int CodePointAt(string seq, int offset)
             {
@@ -397,6 +406,39 @@ namespace Lucene.Net.Analysis.Util
             }
         }
 
+        // LUCENENET specific class to handle BW compatibility
+        // with broken unicode support (Lucene 3.0). See the TestCharArraySet.TestSupplementaryCharsBWCompat()
+        // and TestCharArraySet.TestSingleHighSurrogateBWComapt() tests. This just provides
the old (slower)
+        // implementation that represents the original Lucene toUpperCase and toLowerCase
methods.
+        private class Java4CharacterUtilsBWCompatibility : Java4CharacterUtils
+        {
+            public override void ToLower(char[] buffer, int offset, int limit)
+            {
+                Debug.Assert(buffer.Length >= limit);
+                Debug.Assert(offset <= 0 && offset <= buffer.Length);
+
+                for (int i = offset; i < limit;)
+                {
+                    i += Character.ToChars(
+                        Character.ToLowerCase(
+                            CodePointAt(buffer, i, limit)), buffer, i);
+                }
+            }
+
+            public override void ToUpper(char[] buffer, int offset, int limit)
+            {
+                Debug.Assert(buffer.Length >= limit);
+                Debug.Assert(offset <= 0 && offset <= buffer.Length);
+
+                for (int i = offset; i < limit;)
+                {
+                    i += Character.ToChars(
+                        Character.ToUpperCase(
+                            CodePointAt(buffer, i, limit)), buffer, i);
+                }
+            }
+        }
+
         /// <summary>
         /// A simple IO buffer to use with
         /// <see cref="CharacterUtils.Fill(CharacterBuffer, TextReader)"/>.

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7c1f7523/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharArraySet.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharArraySet.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharArraySet.cs
index 2026446..5e80536 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharArraySet.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharArraySet.cs
@@ -337,7 +337,7 @@ namespace Lucene.Net.Analysis.Util
         /// @deprecated (3.1) remove this test when lucene 3.0 "broken unicode 4" support
is
         ///             no longer needed. 
         [Test]
-        [Obsolete("(3.1) remove this test when lucene 3.0 'broken unicode 4' support is")]
+        [Obsolete("(3.1) remove this test when lucene 3.0 'broken unicode 4' support is no
longer needed.")]
         public virtual void TestSupplementaryCharsBWCompat()
         {
             string missing = "Term {0} is missing in the set";
@@ -371,7 +371,7 @@ namespace Lucene.Net.Analysis.Util
         /// @deprecated (3.1) remove this test when lucene 3.0 "broken unicode 4" support
is
         ///             no longer needed. 
         [Test]
-        [Obsolete("(3.1) remove this test when lucene 3.0 'broken unicode 4' support is")]
+        [Obsolete("(3.1) remove this test when lucene 3.0 'broken unicode 4' support is no
longer needed.")]
         public virtual void TestSingleHighSurrogateBWComapt()
         {
             string missing = "Term {0} is missing in the set";

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7c1f7523/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharacterUtils.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharacterUtils.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharacterUtils.cs
index 2a842c9..1f1b22f 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharacterUtils.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharacterUtils.cs
@@ -86,7 +86,7 @@ namespace Lucene.Net.Analysis.Util
         [Test]
         public virtual void TestCodePointCount()
         {
-            var java4 = CharacterUtils.Java4Instance;
+            var java4 = CharacterUtils.GetJava4Instance(TEST_VERSION_CURRENT);
             var java5 = CharacterUtils.GetInstance(TEST_VERSION_CURRENT);
             
             var s = TestUtil.RandomUnicodeString(Random());
@@ -97,7 +97,7 @@ namespace Lucene.Net.Analysis.Util
         [Test]
         public virtual void TestOffsetByCodePoint()
         {
-            var java4 = CharacterUtils.Java4Instance;
+            var java4 = CharacterUtils.GetJava4Instance(TEST_VERSION_CURRENT);
             var java5 = CharacterUtils.GetInstance(TEST_VERSION_CURRENT);
             for (int i = 0; i < 10; ++i)
             {
@@ -142,7 +142,7 @@ namespace Lucene.Net.Analysis.Util
         [Test]
         public virtual void TestConversions()
         {
-            var java4 = CharacterUtils.Java4Instance;
+            var java4 = CharacterUtils.GetJava4Instance(TEST_VERSION_CURRENT);
             var java5 = CharacterUtils.GetInstance(TEST_VERSION_CURRENT);
             TestConversions(java4);
             TestConversions(java5);


Mime
View raw message