lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From paulir...@apache.org
Subject [22/53] [abbrv] git commit: Bugfixes for BreakIterator
Date Thu, 07 Nov 2013 13:53:37 GMT
Bugfixes for BreakIterator


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/fe26d1e9
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/fe26d1e9
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/fe26d1e9

Branch: refs/heads/branch_4x
Commit: fe26d1e91cc75babb0438d3f44e61a6b5d1e0dab
Parents: d2d763c
Author: Paul Irwin <paulirwin@gmail.com>
Authored: Tue Oct 29 11:58:28 2013 -0400
Committer: Paul Irwin <paulirwin@gmail.com>
Committed: Tue Oct 29 11:58:28 2013 -0400

----------------------------------------------------------------------
 .../BreakIteratorBoundaryScanner.cs             | 50 ++++++++++----------
 .../Support/BreakIterators/BreakIteratorBase.cs |  2 +-
 .../BreakIterators/EnglishWordBreakIterator.cs  |  8 +++-
 3 files changed, 32 insertions(+), 28 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/fe26d1e9/src/contrib/Highlighter/VectorHighlight/BreakIteratorBoundaryScanner.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Highlighter/VectorHighlight/BreakIteratorBoundaryScanner.cs b/src/contrib/Highlighter/VectorHighlight/BreakIteratorBoundaryScanner.cs
index b023452..b2da99b 100644
--- a/src/contrib/Highlighter/VectorHighlight/BreakIteratorBoundaryScanner.cs
+++ b/src/contrib/Highlighter/VectorHighlight/BreakIteratorBoundaryScanner.cs
@@ -1,35 +1,35 @@
-´╗┐using System;
+´╗┐using Lucene.Net.Support;
+using System;
 using System.Collections.Generic;
 using System.Linq;
 using System.Text;
 
 namespace Lucene.Net.Search.VectorHighlight
 {
-    // .NET: without re-implementing BreakIterator from scratch, we can't use this type.
-    //public class BreakIteratorBoundaryScanner : IBoundaryScanner
-    //{
-    //    readonly BreakIterator bi;
+    public class BreakIteratorBoundaryScanner : IBoundaryScanner
+    {
+        readonly BreakIterator bi;
 
-    //    public BreakIteratorBoundaryScanner(BreakIterator bi)
-    //    {
-    //        this.bi = bi;
-    //    }
+        public BreakIteratorBoundaryScanner(BreakIterator bi)
+        {
+            this.bi = bi;
+        }
 
-    //    public override int FindStartOffset(StringBuilder buffer, int start)
-    //    {
-    //        if (start > buffer.Length || start < 1)
-    //            return start;
-    //        bi.SetText(buffer.ToString().Substring(0, start));
-    //        bi.Last();
-    //        return bi.Previous();
-    //    }
+        public override int FindStartOffset(StringBuilder buffer, int start)
+        {
+            if (start > buffer.Length || start < 1)
+                return start;
+            bi.Text = buffer.ToString().Substring(0, start);
+            bi.Last();
+            return bi.Previous();
+        }
 
-    //    public override int FindEndOffset(StringBuilder buffer, int start)
-    //    {
-    //        if (start > buffer.Length || start < 0)
-    //            return start;
-    //        bi.SetText(buffer.ToString().Substring(start));
-    //        return bi.Next() + start;
-    //    }
-    //}
+        public override int FindEndOffset(StringBuilder buffer, int start)
+        {
+            if (start > buffer.Length || start < 0)
+                return start;
+            bi.Text = buffer.ToString().Substring(start);
+            return bi.Next() + start;
+        }
+    }
 }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/fe26d1e9/src/core/Support/BreakIterators/BreakIteratorBase.cs
----------------------------------------------------------------------
diff --git a/src/core/Support/BreakIterators/BreakIteratorBase.cs b/src/core/Support/BreakIterators/BreakIteratorBase.cs
index a149907..09a08a3 100644
--- a/src/core/Support/BreakIterators/BreakIteratorBase.cs
+++ b/src/core/Support/BreakIterators/BreakIteratorBase.cs
@@ -75,7 +75,7 @@ namespace Lucene.Net.Support.BreakIterators
             if (_position == _text.Length - 1)
                 return DONE;
 
-            return Following(++_position);
+            return Following(_position);
         }
 
         public override int Next(int n)

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/fe26d1e9/src/core/Support/BreakIterators/EnglishWordBreakIterator.cs
----------------------------------------------------------------------
diff --git a/src/core/Support/BreakIterators/EnglishWordBreakIterator.cs b/src/core/Support/BreakIterators/EnglishWordBreakIterator.cs
index 0fbb39f..3b26a19 100644
--- a/src/core/Support/BreakIterators/EnglishWordBreakIterator.cs
+++ b/src/core/Support/BreakIterators/EnglishWordBreakIterator.cs
@@ -11,11 +11,15 @@ namespace Lucene.Net.Support.BreakIterators
         public override bool IsBoundary(int offset)
         {
             char c = Peek(offset);
-            char cplus = Peek(offset + 1);
-
+            
             if (char.IsLetterOrDigit(c))
                 return false;
 
+            if (char.IsWhiteSpace(c))
+                return true;
+
+            char cplus = Peek(offset + 1);
+
             if (cplus != ENDINPUT && char.IsLetterOrDigit(cplus))
                 return false;
 


Mime
View raw message