lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nightowl...@apache.org
Subject [04/52] [abbrv] lucenenet git commit: SWEEP: Moved BreakIterator-dependent functionality to a common Lucene.Net.Icu library so we can manage the icu.net dependency from one place and not make the majority of the users deal with it when they don't need to
Date Tue, 25 Apr 2017 11:50:45 GMT
SWEEP: Moved BreakIterator-dependent functionality to a common Lucene.Net.Icu library so we can manage the icu.net dependency from one place and not make the majority of the users deal with it when they don't need to


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/b1fdcca3
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/b1fdcca3
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/b1fdcca3

Branch: refs/heads/master
Commit: b1fdcca3b3c3f418dfe37aafeda6f4dab75fb6d4
Parents: 63c599e
Author: Shad Storhaug <shad@shadstorhaug.com>
Authored: Mon Apr 17 01:38:10 2017 +0700
Committer: Shad Storhaug <shad@shadstorhaug.com>
Committed: Mon Apr 17 01:38:10 2017 +0700

----------------------------------------------------------------------
 Lucene.Net.Portable.sln                         |  20 +
 Lucene.Net.sln                                  |  52 +++
 NuGet.config                                    |   1 +
 src/IcuBreakIterator.cs                         | 394 -----------------
 .../Analysis/Th/ThaiAnalyzer.cs                 |   2 +-
 .../Lucene.Net.Analysis.Common.csproj           |   3 -
 src/Lucene.Net.Analysis.Common/project.json     |   6 +-
 .../Lucene.Net.Highlighter.csproj               |   5 +-
 .../DefaultPassageFormatter.cs                  |   4 +-
 .../PostingsHighlight/MultiTermHighlighting.cs  |   4 +-
 .../PostingsHighlight/Passage.cs                |   4 +-
 .../PostingsHighlight/PassageFormatter.cs       |   4 +-
 .../PostingsHighlight/PassageScorer.cs          |   4 +-
 .../Properties/AssemblyInfo.cs                  |   2 +
 src/Lucene.Net.Highlighter/project.json         |   6 +-
 src/Lucene.Net.Icu/Analysis/Th/stopwords.txt    | 119 ++++++
 src/Lucene.Net.Icu/Lucene.Net.Icu.csproj        | 124 ++++++
 src/Lucene.Net.Icu/Lucene.Net.Icu.project.json  |  11 +
 src/Lucene.Net.Icu/Lucene.Net.Icu.xproj         |  19 +
 src/Lucene.Net.Icu/Properties/AssemblyInfo.cs   |  31 ++
 src/Lucene.Net.Icu/Support/BreakIterator.cs     | 231 ++++++++++
 src/Lucene.Net.Icu/Support/CharacterIterator.cs |  50 +++
 src/Lucene.Net.Icu/Support/IcuBreakIterator.cs  | 394 +++++++++++++++++
 .../Support/StringCharacterIterator.cs          | 232 ++++++++++
 src/Lucene.Net.Icu/project.json                 |  63 +++
 .../Lucene.Net.Tests.Highlighter.csproj         |   3 +-
 .../TestBreakIterator.cs                        | 421 -------------------
 src/Lucene.Net.Tests.Highlighter/project.json   |   4 +-
 .../Lucene.Net.Tests.Icu.csproj                 | 121 ++++++
 .../Lucene.Net.Tests.Icu.project.json           |  12 +
 .../Lucene.Net.Tests.Icu.xproj                  |  22 +
 .../Properties/AssemblyInfo.cs                  |  21 +
 .../Search/PostingsHighlight/CambridgeMA.utf8   |   1 +
 .../Support/TestApiConsistency.cs               | 126 ++++++
 .../Support/TestExceptionSerialization.cs       |  54 +++
 .../Support/TestIcuBreakIterator.cs             | 421 +++++++++++++++++++
 src/Lucene.Net.Tests.Icu/project.json           |  67 +++
 src/Lucene.Net/Lucene.Net.csproj                |   3 -
 src/Lucene.Net/Properties/AssemblyInfo.cs       |   2 +
 src/Lucene.Net/Support/BreakIterator.cs         | 231 ----------
 src/Lucene.Net/Support/CharacterIterator.cs     |  50 ---
 .../Support/StringCharacterIterator.cs          | 232 ----------
 42 files changed, 2220 insertions(+), 1356 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/Lucene.Net.Portable.sln
----------------------------------------------------------------------
diff --git a/Lucene.Net.Portable.sln b/Lucene.Net.Portable.sln
index 8044aed..7f4edad 100644
--- a/Lucene.Net.Portable.sln
+++ b/Lucene.Net.Portable.sln
@@ -79,6 +79,10 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "build", "build", "{EFA10A77
 		build\build.ps1 = build\build.ps1
 	EndProjectSection
 EndProject
+Project("{8BB2217D-0F2D-49D1-97BC-3654ED321F3B}") = "Lucene.Net.Icu", "src\Lucene.Net.Icu\Lucene.Net.Icu.xproj", "{44A5341B-0F52-429D-977A-C35E10ECCADF}"
+EndProject
+Project("{8BB2217D-0F2D-49D1-97BC-3654ED321F3B}") = "Lucene.Net.Tests.Icu", "src\Lucene.Net.Tests.Icu\Lucene.Net.Tests.Icu.xproj", "{32FD3471-E862-4055-B969-79C12A656366}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
@@ -367,6 +371,22 @@ Global
 		{C708701D-4318-469F-9822-49A80386CFEA}.Release|Any CPU.Build.0 = Release|Any CPU
 		{C708701D-4318-469F-9822-49A80386CFEA}.Release|x86.ActiveCfg = Release|Any CPU
 		{C708701D-4318-469F-9822-49A80386CFEA}.Release|x86.Build.0 = Release|Any CPU
+		{44A5341B-0F52-429D-977A-C35E10ECCADF}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{44A5341B-0F52-429D-977A-C35E10ECCADF}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{44A5341B-0F52-429D-977A-C35E10ECCADF}.Debug|x86.ActiveCfg = Debug|Any CPU
+		{44A5341B-0F52-429D-977A-C35E10ECCADF}.Debug|x86.Build.0 = Debug|Any CPU
+		{44A5341B-0F52-429D-977A-C35E10ECCADF}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{44A5341B-0F52-429D-977A-C35E10ECCADF}.Release|Any CPU.Build.0 = Release|Any CPU
+		{44A5341B-0F52-429D-977A-C35E10ECCADF}.Release|x86.ActiveCfg = Release|Any CPU
+		{44A5341B-0F52-429D-977A-C35E10ECCADF}.Release|x86.Build.0 = Release|Any CPU
+		{32FD3471-E862-4055-B969-79C12A656366}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{32FD3471-E862-4055-B969-79C12A656366}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{32FD3471-E862-4055-B969-79C12A656366}.Debug|x86.ActiveCfg = Debug|Any CPU
+		{32FD3471-E862-4055-B969-79C12A656366}.Debug|x86.Build.0 = Debug|Any CPU
+		{32FD3471-E862-4055-B969-79C12A656366}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{32FD3471-E862-4055-B969-79C12A656366}.Release|Any CPU.Build.0 = Release|Any CPU
+		{32FD3471-E862-4055-B969-79C12A656366}.Release|x86.ActiveCfg = Release|Any CPU
+		{32FD3471-E862-4055-B969-79C12A656366}.Release|x86.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/Lucene.Net.sln
----------------------------------------------------------------------
diff --git a/Lucene.Net.sln b/Lucene.Net.sln
index b218f0d..66e91a6 100644
--- a/Lucene.Net.sln
+++ b/Lucene.Net.sln
@@ -88,6 +88,10 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "build", "build", "{9811D53E
 		build\build.ps1 = build\build.ps1
 	EndProjectSection
 EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Icu", "src\Lucene.Net.Icu\Lucene.Net.Icu.csproj", "{349CB7C9-7534-4E1D-9B0A-5521441AF0AE}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Tests.Icu", "src\Lucene.Net.Tests.Icu\Lucene.Net.Tests.Icu.csproj", "{D5AA1A22-1B28-4DF6-BFDA-02519A189839}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
@@ -849,6 +853,54 @@ Global
 		{FBCD6AFE-0A5C-4399-8044-99C58D2912D1}.Release35|Mixed Platforms.Build.0 = Release|Any CPU
 		{FBCD6AFE-0A5C-4399-8044-99C58D2912D1}.Release35|x86.ActiveCfg = Release|Any CPU
 		{FBCD6AFE-0A5C-4399-8044-99C58D2912D1}.Release35|x86.Build.0 = Release|Any CPU
+		{349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU
+		{349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU
+		{349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Debug|x86.ActiveCfg = Debug|Any CPU
+		{349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Debug|x86.Build.0 = Debug|Any CPU
+		{349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Debug35|Any CPU.ActiveCfg = Debug|Any CPU
+		{349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Debug35|Any CPU.Build.0 = Debug|Any CPU
+		{349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Debug35|Mixed Platforms.ActiveCfg = Debug|Any CPU
+		{349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Debug35|Mixed Platforms.Build.0 = Debug|Any CPU
+		{349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Debug35|x86.ActiveCfg = Debug|Any CPU
+		{349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Debug35|x86.Build.0 = Debug|Any CPU
+		{349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Release|Any CPU.Build.0 = Release|Any CPU
+		{349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
+		{349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Release|Mixed Platforms.Build.0 = Release|Any CPU
+		{349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Release|x86.ActiveCfg = Release|Any CPU
+		{349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Release|x86.Build.0 = Release|Any CPU
+		{349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Release35|Any CPU.ActiveCfg = Release|Any CPU
+		{349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Release35|Any CPU.Build.0 = Release|Any CPU
+		{349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Release35|Mixed Platforms.ActiveCfg = Release|Any CPU
+		{349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Release35|Mixed Platforms.Build.0 = Release|Any CPU
+		{349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Release35|x86.ActiveCfg = Release|Any CPU
+		{349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Release35|x86.Build.0 = Release|Any CPU
+		{D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU
+		{D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU
+		{D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Debug|x86.ActiveCfg = Debug|Any CPU
+		{D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Debug|x86.Build.0 = Debug|Any CPU
+		{D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Debug35|Any CPU.ActiveCfg = Debug|Any CPU
+		{D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Debug35|Any CPU.Build.0 = Debug|Any CPU
+		{D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Debug35|Mixed Platforms.ActiveCfg = Debug|Any CPU
+		{D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Debug35|Mixed Platforms.Build.0 = Debug|Any CPU
+		{D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Debug35|x86.ActiveCfg = Debug|Any CPU
+		{D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Debug35|x86.Build.0 = Debug|Any CPU
+		{D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Release|Any CPU.Build.0 = Release|Any CPU
+		{D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
+		{D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Release|Mixed Platforms.Build.0 = Release|Any CPU
+		{D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Release|x86.ActiveCfg = Release|Any CPU
+		{D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Release|x86.Build.0 = Release|Any CPU
+		{D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Release35|Any CPU.ActiveCfg = Release|Any CPU
+		{D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Release35|Any CPU.Build.0 = Release|Any CPU
+		{D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Release35|Mixed Platforms.ActiveCfg = Release|Any CPU
+		{D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Release35|Mixed Platforms.Build.0 = Release|Any CPU
+		{D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Release35|x86.ActiveCfg = Release|Any CPU
+		{D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Release35|x86.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/NuGet.config
----------------------------------------------------------------------
diff --git a/NuGet.config b/NuGet.config
index 8df6c0f..e0c6211 100644
--- a/NuGet.config
+++ b/NuGet.config
@@ -2,6 +2,7 @@
 <configuration>
   <packageSources>
     <clear />
+	<add key="icunet" value="https://www.myget.org/F/icu-dotnet/api/v2" />
     <add key="dotnet-cat" value="https://www.myget.org/F/dotnetcat/api/v2" />
 	<add key="spatial4n" value="https://www.myget.org/F/spatial4n/api/v2" />
     <add key="nugetorg" value="https://www.nuget.org/api/v2" />

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/IcuBreakIterator.cs
----------------------------------------------------------------------
diff --git a/src/IcuBreakIterator.cs b/src/IcuBreakIterator.cs
deleted file mode 100644
index cc0f7cd..0000000
--- a/src/IcuBreakIterator.cs
+++ /dev/null
@@ -1,394 +0,0 @@
-#if FEATURE_BREAKITERATOR
-using Lucene.Net.Support;
-using System;
-using System.Collections.Generic;
-using System.Globalization;
-using System.Linq;
-using System.Text;
-
-namespace Lucene.Net
-{
-    /*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
-
-    /// <summary>
-    /// A <see cref="BreakIterator"/> implementation that encapsulates the functionality
-    /// of icu.net's <see cref="Icu.BreakIterator"/> static class. A <see cref="BreakIterator"/>
-    /// provides methods to move forward, reverse, and randomly through a set of text breaks
-    /// defined by the <see cref="Icu.BreakIterator.UBreakIteratorType"/> enumeration.
-    /// </summary>
-    // LUCENENET specific type
-    internal class IcuBreakIterator : BreakIterator
-    {
-        private readonly Icu.Locale locale;
-        private readonly Icu.BreakIterator.UBreakIteratorType type;
-
-        private List<int> boundaries = new List<int>();
-        private int currentBoundaryIndex; // Index (not the value) of the current boundary in boundaries
-        private string text;
-
-        /// <summary>
-        /// The start offset for the string, if supplied by a <see cref="CharacterIterator"/>
-        /// </summary>
-        protected int m_start;
-
-        /// <summary>
-        /// The end offset for the string, if supplied by a <see cref="CharacterIterator"/>
-        /// </summary>
-        protected int m_end;
-
-        private bool enableHacks = false;
-
-        public IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType type)
-            : this(type, CultureInfo.CurrentCulture)
-        {
-        }
-
-        public IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType type, CultureInfo locale)
-        {
-            if (locale == null)
-                throw new ArgumentNullException("locale");
-            this.locale = new Icu.Locale(locale.Name);
-            this.type = type;
-        }
-
-        
-        public virtual bool EnableHacks
-        {
-            get { return enableHacks; }
-            set { enableHacks = value; }
-        }
-
-        /// <summary>
-        /// Sets the current iteration position to the beginning of the text.
-        /// </summary>
-        /// <returns>The offset of the beginning of the text.</returns>
-        public override int First()
-        {
-            currentBoundaryIndex = 0;
-            return ReturnCurrent();
-        }
-
-        /// <summary>
-        /// Sets the current iteration position to the end of the text.
-        /// </summary>
-        /// <returns>The text's past-the-end offset.</returns>
-        public override int Last()
-        {
-            currentBoundaryIndex = boundaries.Count - 1;
-            return ReturnCurrent();
-        }
-
-        /// <summary>
-        /// Advances the iterator either forward or backward the specified number of steps.
-        /// Negative values move backward, and positive values move forward.  This is
-        /// equivalent to repeatedly calling <see cref="Next()"/> or <see cref="Previous()"/>.
-        /// </summary>
-        /// <param name="n">The number of steps to move.  The sign indicates the direction
-        /// (negative is backwards, and positive is forwards).</param>
-        /// <returns>The character offset of the boundary position n boundaries away from
-        /// the current one.</returns>
-        public override int Next(int n)
-        {
-            int result = Current;
-            while (n > 0)
-            {
-                result = Next();
-                --n;
-            }
-            while (n < 0)
-            {
-                result = Previous();
-                ++n;
-            }
-            return result;
-        }
-
-        /// <summary>
-        /// Advances the iterator to the next boundary position.
-        /// </summary>
-        /// <returns>The position of the first boundary after this one.</returns>
-        public override int Next()
-        {
-            if (currentBoundaryIndex >= boundaries.Count - 1 || boundaries.Count == 0)
-            {
-                return DONE;
-            }
-            currentBoundaryIndex++;
-            return ReturnCurrent();
-        }
-
-        /// <summary>
-        /// Advances the iterator backwards, to the last boundary preceding this one.
-        /// </summary>
-        /// <returns>The position of the last boundary position preceding this one.</returns>
-        public override int Previous()
-        {
-            if (currentBoundaryIndex == 0 || boundaries.Count == 0)
-            {
-                return DONE;
-            }
-            currentBoundaryIndex--;
-            return ReturnCurrent();
-        }
-
-        /// <summary>
-        /// Throw <see cref="ArgumentException"/> unless begin &lt;= offset &lt; end.
-        /// </summary>
-        /// <param name="offset"></param>
-        private void CheckOffset(int offset)
-        {
-            if (offset < m_start || offset > m_end)
-            {
-                throw new ArgumentException("offset out of bounds");
-            }
-        }
-
-        /// <summary>
-        /// Sets the iterator to refer to the first boundary position following
-        /// the specified position.
-        /// </summary>
-        /// <param name="offset">The position from which to begin searching for a break position.</param>
-        /// <returns>The position of the first break after the current position.</returns>
-        public override int Following(int offset)
-        {
-            CheckOffset(offset);
-
-            if (boundaries.Count == 0)
-            {
-                return DONE;
-            }
-
-            int following = GetLowestIndexGreaterThan(offset);
-            if (following == -1)
-            {
-                currentBoundaryIndex = boundaries.Count - 1;
-                return DONE;
-            }
-            else
-            {
-                currentBoundaryIndex = following;
-            }
-            return ReturnCurrent();
-        }
-
-        private int GetLowestIndexGreaterThan(int offset)
-        {
-            int index = boundaries.BinarySearch(offset);
-            if (index < 0)
-            {
-                return ~index;
-            }
-            else if (index + 1 < boundaries.Count)
-            {
-                return index + 1;
-            }
-
-            return -1;
-        }
-
-        /// <summary>
-        /// Sets the iterator to refer to the last boundary position before the
-        /// specified position.
-        /// </summary>
-        /// <param name="offset">The position to begin searching for a break from.</param>
-        /// <returns>The position of the last boundary before the starting position.</returns>
-        public override int Preceding(int offset)
-        {
-            CheckOffset(offset);
-
-            if (boundaries.Count == 0)
-            {
-                return DONE;
-            }
-
-            int preceeding = GetHighestIndexLessThan(offset);
-            if (preceeding == -1)
-            {
-                currentBoundaryIndex = 0;
-                return DONE;
-            }
-            else
-            {
-                currentBoundaryIndex = preceeding;
-            }
-            return ReturnCurrent();
-        }
-
-        private int GetHighestIndexLessThan(int offset)
-        {
-            int index = boundaries.BinarySearch(offset);
-            if (index < 0)
-            {
-                return ~index - 1;
-            }
-            else
-            {
-                // NOTE: This is intentionally allowed to return -1 in the case
-                // where index == 0. This state indicates we are before the first boundary.
-                return index - 1;
-            }
-        }
-
-        /// <summary>
-        /// Returns the current iteration position.
-        /// </summary>
-        public override int Current
-        {
-            get { return ReturnCurrent(); }
-        }
-
-        /// <summary>
-        /// Gets the text being analyzed.
-        /// </summary>
-        public override string Text
-        {
-            get
-            {
-                return text;
-            }
-        }
-
-        /// <summary>
-        /// Set the iterator to analyze a new piece of text.  This function resets
-        /// the current iteration position to the beginning of the text.
-        /// </summary>
-        /// <param name="newText">The text to analyze.</param>
-        public override void SetText(string newText)
-        {
-            text = newText;
-            currentBoundaryIndex = 0;
-            m_start = 0;
-            m_end = newText.Length;
-
-            LoadBoundaries(m_start, m_end);
-        }
-
-        public override void SetText(CharacterIterator newText)
-        {
-            text = newText.GetTextAsString();
-            currentBoundaryIndex = 0;
-            m_start = newText.BeginIndex;
-            m_end = newText.EndIndex;
-
-            LoadBoundaries(m_start, m_end);
-        }
-
-        private void LoadBoundaries(int start, int end)
-        {
-            IEnumerable<Icu.Boundary> icuBoundaries;
-            string offsetText = text.Substring(start, end - start);
-
-#if !NETSTANDARD
-            try
-            {
-#endif
-                if (type == Icu.BreakIterator.UBreakIteratorType.WORD)
-                {
-                    if (enableHacks)
-                    {
-                        // LUCENENET TODO: HACK - replacing hyphen with "a" so hyphenated words aren't broken
-                        offsetText = offsetText.Replace("-", "a");
-                    }
-
-                    icuBoundaries = Icu.BreakIterator.GetWordBoundaries(locale, offsetText, true);
-                }
-                else
-                {
-                    if (enableHacks && type == Icu.BreakIterator.UBreakIteratorType.SENTENCE)
-                    {
-                        // LUCENENET TODO: HACK - newline character causes incorrect sentence breaking.
-                        offsetText = offsetText.Replace("\n", " ");
-                        // LUCENENET TODO: HACK - the ICU sentence logic doesn't work (in English anyway) when sentences don't
-                        // begin with capital letters.
-                        offsetText = CapitalizeFirst(offsetText);
-                    }
-
-                    icuBoundaries = Icu.BreakIterator.GetBoundaries(type, locale, offsetText);
-                }
-#if !NETSTANDARD
-            }
-            catch (AccessViolationException ace)
-            {
-                // LUCENENET TODO: Find a reliable way to reproduce and report the 
-                // AccessViolationException that happens here to the icu-dotnet project team
-                throw new Exception("Hit AccessViolationException: " + ace.ToString(), ace);
-            }
-#endif
-
-            boundaries = icuBoundaries
-                .Select(t => new[] { t.Start + start, t.End + start })
-                .SelectMany(b => b)
-                .Distinct()
-                .ToList();
-        }
-
-        /// <summary>
-        /// Returns true if the specified character offset is a text boundary.
-        /// </summary>
-        /// <param name="offset">the character offset to check.</param>
-        /// <returns><c>true</c> if "offset" is a boundary position, <c>false</c> otherwise.</returns>
-        public override bool IsBoundary(int offset)
-        {
-            CheckOffset(offset);
-            return boundaries.Contains(offset);
-        }
-
-        private int ReturnCurrent()
-        {
-            if (boundaries.Count > 0)
-            {
-                return currentBoundaryIndex < boundaries.Count && currentBoundaryIndex > -1
-                    ? boundaries[currentBoundaryIndex]
-                    : DONE;
-            }
-
-            // If there are no boundaries, we must return the start offset
-            return m_start;
-        }
-
-        /// <summary>
-        /// LUCENENET TODO: This is a temporary workaround for an issue with icu-dotnet
-        /// where it doesn't correctly break sentences unless they begin with a capital letter.
-        /// If/when ICU is fixed, this method should be deleted and the IcuBreakIterator 
-        /// code changed to remove calls to this method.
-        /// </summary>
-        public static string CapitalizeFirst(string s)
-        {
-            bool isNewSentence = true;
-            var result = new StringBuilder(s.Length);
-            for (int i = 0; i < s.Length; i++)
-            {
-                if (isNewSentence && char.IsLetter(s[i]))
-                {
-                    result.Append(char.ToUpper(s[i]));
-                    isNewSentence = false;
-                }
-                else
-                    result.Append(s[i]);
-
-                if (s[i] == '!' || s[i] == '?' || s[i] == '.')
-                {
-                    isNewSentence = true;
-                }
-            }
-
-            return result.ToString();
-        }
-    }
-}
-#endif

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiAnalyzer.cs
index aa6e1d7..0885069 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiAnalyzer.cs
@@ -111,7 +111,7 @@ namespace Lucene.Net.Analysis.Th
         ///         built from a <see cref="StandardTokenizer"/> filtered with
         ///         <see cref="StandardFilter"/>, <see cref="LowerCaseFilter"/>, <see cref="ThaiWordFilter"/>, and
         ///         <see cref="StopFilter"/> </returns>
-        protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+        protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
         {
             if (m_matchVersion.OnOrAfter(LuceneVersion.LUCENE_48))
             {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj b/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
index fb403aa..02545b2 100644
--- a/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
+++ b/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
@@ -41,9 +41,6 @@
     <Reference Include="System.XML" />
   </ItemGroup>
   <ItemGroup>
-    <Compile Include="..\IcuBreakIterator.cs">
-      <Link>IcuBreakIterator.cs</Link>
-    </Compile>
     <Compile Include="Analysis\Bg\BulgarianAnalyzer.cs" />
     <Compile Include="Analysis\Bg\BulgarianStemFilter.cs" />
     <Compile Include="Analysis\Bg\BulgarianStemFilterFactory.cs" />

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Analysis.Common/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/project.json b/src/Lucene.Net.Analysis.Common/project.json
index ca771a3..556a89e 100644
--- a/src/Lucene.Net.Analysis.Common/project.json
+++ b/src/Lucene.Net.Analysis.Common/project.json
@@ -26,8 +26,7 @@
         "define": [ "NETSTANDARD" ],
         "compile": {
           "includeFiles": [
-            "../CommonAssemblyInfo.cs",
-            "../IcuBreakIterator.cs"
+            "../CommonAssemblyInfo.cs"
           ]
         },
         "embed": {
@@ -52,8 +51,7 @@
         "define": [ "FEATURE_CLONEABLE", "FEATURE_DTD_PROCESSING", "FEATURE_SERIALIZABLE" ],
         "compile": {
           "includeFiles": [
-            "../CommonAssemblyInfo.cs",
-            "../IcuBreakIterator.cs"
+            "../CommonAssemblyInfo.cs"
           ]
         },
         "embed": {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Highlighter/Lucene.Net.Highlighter.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Highlighter/Lucene.Net.Highlighter.csproj b/src/Lucene.Net.Highlighter/Lucene.Net.Highlighter.csproj
index 31ac251..9c885d4 100644
--- a/src/Lucene.Net.Highlighter/Lucene.Net.Highlighter.csproj
+++ b/src/Lucene.Net.Highlighter/Lucene.Net.Highlighter.csproj
@@ -44,9 +44,6 @@
     <Reference Include="System.Xml" />
   </ItemGroup>
   <ItemGroup>
-    <Compile Include="..\IcuBreakIterator.cs">
-      <Link>IcuBreakIterator.cs</Link>
-    </Compile>
     <Compile Include="Highlight\DefaultEncoder.cs" />
     <Compile Include="Highlight\GradientFormatter.cs" />
     <Compile Include="Highlight\Highlighter.cs" />
@@ -101,7 +98,7 @@
     <Compile Include="VectorHighlight\SingleFragListBuilder.cs" />
     <Compile Include="VectorHighlight\WeightedFieldFragList.cs" />
     <Compile Include="VectorHighlight\WeightedFragListBuilder.cs" />
-	<Compile Include="..\CommonAssemblyInfo.cs">
+    <Compile Include="..\CommonAssemblyInfo.cs">
       <Link>Properties\CommonAssemblyInfo.cs</Link>
     </Compile>
   </ItemGroup>

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Highlighter/PostingsHighlight/DefaultPassageFormatter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Highlighter/PostingsHighlight/DefaultPassageFormatter.cs b/src/Lucene.Net.Highlighter/PostingsHighlight/DefaultPassageFormatter.cs
index 4538d46..6a38bec 100644
--- a/src/Lucene.Net.Highlighter/PostingsHighlight/DefaultPassageFormatter.cs
+++ b/src/Lucene.Net.Highlighter/PostingsHighlight/DefaultPassageFormatter.cs
@@ -1,4 +1,5 @@
-using System;
+#if FEATURE_BREAKITERATOR
+using System;
 using System.Text;
 
 namespace Lucene.Net.Search.PostingsHighlight
@@ -161,3 +162,4 @@ namespace Lucene.Net.Search.PostingsHighlight
         }
     }
 }
+#endif
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Highlighter/PostingsHighlight/MultiTermHighlighting.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Highlighter/PostingsHighlight/MultiTermHighlighting.cs b/src/Lucene.Net.Highlighter/PostingsHighlight/MultiTermHighlighting.cs
index e5a5bcd..bd79c80 100644
--- a/src/Lucene.Net.Highlighter/PostingsHighlight/MultiTermHighlighting.cs
+++ b/src/Lucene.Net.Highlighter/PostingsHighlight/MultiTermHighlighting.cs
@@ -1,4 +1,5 @@
-using Lucene.Net.Analysis;
+#if FEATURE_BREAKITERATOR
+using Lucene.Net.Analysis;
 using Lucene.Net.Analysis.TokenAttributes;
 using Lucene.Net.Index;
 using Lucene.Net.Search.Spans;
@@ -344,3 +345,4 @@ namespace Lucene.Net.Search.PostingsHighlight
         }
     }
 }
+#endif
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Highlighter/PostingsHighlight/Passage.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Highlighter/PostingsHighlight/Passage.cs b/src/Lucene.Net.Highlighter/PostingsHighlight/Passage.cs
index 54a2446..b9a664f 100644
--- a/src/Lucene.Net.Highlighter/PostingsHighlight/Passage.cs
+++ b/src/Lucene.Net.Highlighter/PostingsHighlight/Passage.cs
@@ -1,4 +1,5 @@
-using Lucene.Net.Util;
+#if FEATURE_BREAKITERATOR
+using Lucene.Net.Util;
 using System.Collections.Generic;
 using System.Diagnostics;
 
@@ -183,3 +184,4 @@ namespace Lucene.Net.Search.PostingsHighlight
         }
     }
 }
+#endif
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Highlighter/PostingsHighlight/PassageFormatter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Highlighter/PostingsHighlight/PassageFormatter.cs b/src/Lucene.Net.Highlighter/PostingsHighlight/PassageFormatter.cs
index ce367a6..770a6fa 100644
--- a/src/Lucene.Net.Highlighter/PostingsHighlight/PassageFormatter.cs
+++ b/src/Lucene.Net.Highlighter/PostingsHighlight/PassageFormatter.cs
@@ -1,4 +1,5 @@
-namespace Lucene.Net.Search.PostingsHighlight
+#if FEATURE_BREAKITERATOR
+namespace Lucene.Net.Search.PostingsHighlight
 {
     /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -42,3 +43,4 @@
         public abstract object Format(Passage[] passages, string content); // LUCENENET TODO: Make return type generic?
     }
 }
+#endif

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Highlighter/PostingsHighlight/PassageScorer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Highlighter/PostingsHighlight/PassageScorer.cs b/src/Lucene.Net.Highlighter/PostingsHighlight/PassageScorer.cs
index af398da..de0fd45 100644
--- a/src/Lucene.Net.Highlighter/PostingsHighlight/PassageScorer.cs
+++ b/src/Lucene.Net.Highlighter/PostingsHighlight/PassageScorer.cs
@@ -1,4 +1,5 @@
-using System;
+#if FEATURE_BREAKITERATOR
+using System;
 
 namespace Lucene.Net.Search.PostingsHighlight
 {
@@ -110,3 +111,4 @@ namespace Lucene.Net.Search.PostingsHighlight
         }
     }
 }
+#endif
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Highlighter/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Highlighter/Properties/AssemblyInfo.cs b/src/Lucene.Net.Highlighter/Properties/AssemblyInfo.cs
index 6d2eedf..8969ff6 100644
--- a/src/Lucene.Net.Highlighter/Properties/AssemblyInfo.cs
+++ b/src/Lucene.Net.Highlighter/Properties/AssemblyInfo.cs
@@ -24,7 +24,9 @@ using System.Runtime.InteropServices;
 // The following GUID is for the ID of the typelib if this project is exposed to COM
 [assembly: Guid("e9e769ea-8504-44bc-8dc9-ccf958765f8f")]
 
+[assembly: InternalsVisibleTo("Lucene.Net.Icu")]
 // for testing
 [assembly: InternalsVisibleTo("Lucene.Net.Tests.Highlighter")]
+[assembly: InternalsVisibleTo("Lucene.Net.Tests.Icu")]
 
 // NOTE: Version information is in CommonAssemblyInfo.cs

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Highlighter/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Highlighter/project.json b/src/Lucene.Net.Highlighter/project.json
index 5016f93..ce4b726 100644
--- a/src/Lucene.Net.Highlighter/project.json
+++ b/src/Lucene.Net.Highlighter/project.json
@@ -25,8 +25,7 @@
         "define": [ "NETSTANDARD" ],
         "compile": {
           "includeFiles": [
-            "../CommonAssemblyInfo.cs",
-            "../IcuBreakIterator.cs"
+            "../CommonAssemblyInfo.cs"
           ]
         }
       },
@@ -40,8 +39,7 @@
         "define": [ "FEATURE_SERIALIZABLE" ],
         "compile": {
           "includeFiles": [
-            "../CommonAssemblyInfo.cs",
-            "../IcuBreakIterator.cs"
+            "../CommonAssemblyInfo.cs"
           ]
         }
       }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Icu/Analysis/Th/stopwords.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Icu/Analysis/Th/stopwords.txt b/src/Lucene.Net.Icu/Analysis/Th/stopwords.txt
new file mode 100644
index 0000000..07f0fab
--- /dev/null
+++ b/src/Lucene.Net.Icu/Analysis/Th/stopwords.txt
@@ -0,0 +1,119 @@
+# Thai stopwords from:
+# "Opinion Detection in Thai Political News Columns
+# Based on Subjectivity Analysis"
+# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak
+ไว้
+ไม่
+ไป
+ได้
+ให้
+ใน
+โดย
+แห่ง
+แล้ว
+และ
+แรก
+แบบ
+แต่
+เอง
+เห็น
+เลย
+เริ่ม
+เรา
+เมื่อ
+เพื่อ
+เพราะ
+เป็นการ
+เป็น
+เปิดเผย
+เปิด
+เนื่องจาก
+เดียวกัน
+เดียว
+เช่น
+เฉพาะ
+เคย
+เข้า
+เขา
+อีก
+อาจ
+อะไร
+ออก
+อย่าง
+อยู่
+อยาก
+หาก
+หลาย
+หลังจาก
+หลัง
+หรือ
+หนึ่ง
+ส่วน
+ส่ง
+สุด
+สําหรับ
+ว่า
+วัน
+ลง
+ร่วม
+ราย
+รับ
+ระหว่าง
+รวม
+ยัง
+มี
+มาก
+มา
+พร้อม
+พบ
+ผ่าน
+ผล
+บาง
+น่า
+นี้
+นํา
+นั้น
+นัก
+นอกจาก
+ทุก
+ที่สุด
+ที่
+ทําให้
+ทํา
+ทาง
+ทั้งนี้
+ทั้ง
+ถ้า
+ถูก
+ถึง
+ต้อง
+ต่างๆ
+ต่าง
+ต่อ
+ตาม
+ตั้งแต่
+ตั้ง
+ด้าน
+ด้วย
+ดัง
+ซึ่ง
+ช่วง
+จึง
+จาก
+จัด
+จะ
+คือ
+ความ
+ครั้ง
+คง
+ขึ้น
+ของ
+ขอ
+ขณะ
+ก่อน
+ก็
+การ
+กับ
+กัน
+กว่า
+กล่าว

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Icu/Lucene.Net.Icu.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Icu/Lucene.Net.Icu.csproj b/src/Lucene.Net.Icu/Lucene.Net.Icu.csproj
new file mode 100644
index 0000000..267132e
--- /dev/null
+++ b/src/Lucene.Net.Icu/Lucene.Net.Icu.csproj
@@ -0,0 +1,124 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <ProjectGuid>{349CB7C9-7534-4E1D-9B0A-5521441AF0AE}</ProjectGuid>
+    <OutputType>Library</OutputType>
+    <AppDesignerFolder>Properties</AppDesignerFolder>
+    <RootNamespace>Lucene.Net</RootNamespace>
+    <AssemblyName>Lucene.Net.Icu</AssemblyName>
+    <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
+    <FileAlignment>512</FileAlignment>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+    <DebugSymbols>true</DebugSymbols>
+    <DebugType>full</DebugType>
+    <Optimize>false</Optimize>
+    <OutputPath>bin\Debug\</OutputPath>
+    <DefineConstants>DEBUG;TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+    <DebugType>pdbonly</DebugType>
+    <Optimize>true</Optimize>
+    <OutputPath>bin\Release\</OutputPath>
+    <DefineConstants>TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DefineConstants>$(DefineConstants);FEATURE_BREAKITERATOR;FEATURE_SERIALIZABLE</DefineConstants>
+  </PropertyGroup>
+  <ItemGroup>
+    <Reference Include="System" />
+    <Reference Include="System.Core" />
+    <Reference Include="Microsoft.CSharp" />
+    <Reference Include="System.Data" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="..\Lucene.Net.Analysis.Common\Analysis\Th\ThaiAnalyzer.cs">
+      <Link>Analysis\Th\ThaiAnalyzer.cs</Link>
+    </Compile>
+    <Compile Include="..\Lucene.Net.Analysis.Common\Analysis\Th\ThaiTokenizer.cs">
+      <Link>Analysis\Th\ThaiTokenizer.cs</Link>
+    </Compile>
+    <Compile Include="..\Lucene.Net.Analysis.Common\Analysis\Th\ThaiTokenizerFactory.cs">
+      <Link>Analysis\Th\ThaiTokenizerFactory.cs</Link>
+    </Compile>
+    <Compile Include="..\Lucene.Net.Analysis.Common\Analysis\Th\ThaiWordFilter.cs">
+      <Link>Analysis\Th\ThaiWordFilter.cs</Link>
+    </Compile>
+    <Compile Include="..\Lucene.Net.Analysis.Common\Analysis\Th\ThaiWordFilterFactory.cs">
+      <Link>Analysis\Th\ThaiWordFilterFactory.cs</Link>
+    </Compile>
+    <Compile Include="..\Lucene.Net.Analysis.Common\Analysis\Util\CharArrayIterator.cs">
+      <Link>Analysis\Util\CharArrayIterator.cs</Link>
+    </Compile>
+    <Compile Include="..\Lucene.Net.Analysis.Common\Analysis\Util\SegmentingTokenizerBase.cs">
+      <Link>Analysis\Util\SegmentingTokenizerBase.cs</Link>
+    </Compile>
+    <Compile Include="..\Lucene.Net.Highlighter\PostingsHighlight\DefaultPassageFormatter.cs">
+      <Link>Search\PostingsHighlight\DefaultPassageFormatter.cs</Link>
+    </Compile>
+    <Compile Include="..\Lucene.Net.Highlighter\PostingsHighlight\MultiTermHighlighting.cs">
+      <Link>Search\PostingsHighlight\MultiTermHighlighting.cs</Link>
+    </Compile>
+    <Compile Include="..\Lucene.Net.Highlighter\PostingsHighlight\Passage.cs">
+      <Link>Search\PostingsHighlight\Passage.cs</Link>
+    </Compile>
+    <Compile Include="..\Lucene.Net.Highlighter\PostingsHighlight\PassageFormatter.cs">
+      <Link>Search\PostingsHighlight\PassageFormatter.cs</Link>
+    </Compile>
+    <Compile Include="..\Lucene.Net.Highlighter\PostingsHighlight\PassageScorer.cs">
+      <Link>Search\PostingsHighlight\PassageScorer.cs</Link>
+    </Compile>
+    <Compile Include="..\Lucene.Net.Highlighter\PostingsHighlight\PostingsHighlighter.cs">
+      <Link>Search\PostingsHighlight\PostingsHighlighter.cs</Link>
+    </Compile>
+    <Compile Include="..\Lucene.Net.Highlighter\PostingsHighlight\WholeBreakIterator.cs">
+      <Link>Search\PostingsHighlight\WholeBreakIterator.cs</Link>
+    </Compile>
+    <Compile Include="..\Lucene.Net.Highlighter\VectorHighlight\BreakIteratorBoundaryScanner.cs">
+      <Link>Search\VectorHighlight\BreakIteratorBoundaryScanner.cs</Link>
+    </Compile>
+    <Compile Include="Support\BreakIterator.cs" />
+    <Compile Include="Support\CharacterIterator.cs" />
+    <Compile Include="Support\IcuBreakIterator.cs" />
+    <Compile Include="Properties\AssemblyInfo.cs" />
+    <Compile Include="..\CommonAssemblyInfo.cs">
+      <Link>Properties\CommonAssemblyInfo.cs</Link>
+    </Compile>
+    <Compile Include="Support\StringCharacterIterator.cs" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\Lucene.Net.Analysis.Common\Lucene.Net.Analysis.Common.csproj">
+      <Project>{4add0bbc-b900-4715-9526-d871de8eea64}</Project>
+      <Name>Lucene.Net.Analysis.Common</Name>
+    </ProjectReference>
+    <ProjectReference Include="..\Lucene.Net.Highlighter\Lucene.Net.Highlighter.csproj">
+      <Project>{e9e769ea-8504-44bc-8dc9-ccf958765f8f}</Project>
+      <Name>Lucene.Net.Highlighter</Name>
+    </ProjectReference>
+    <ProjectReference Include="..\Lucene.Net\Lucene.Net.csproj">
+      <Project>{5d4ad9be-1ffb-41ab-9943-25737971bf57}</Project>
+      <Name>Lucene.Net</Name>
+    </ProjectReference>
+  </ItemGroup>
+  <ItemGroup>
+    <None Include="Lucene.Net.Icu.project.json" />
+  </ItemGroup>
+  <ItemGroup>
+    <EmbeddedResource Include="Analysis\Th\stopwords.txt" />
+  </ItemGroup>
+  <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+  <!-- To modify your build process, add your task inside one of the targets below and uncomment it. 
+       Other similar extension points exist, see Microsoft.Common.targets.
+  <Target Name="BeforeBuild">
+  </Target>
+  <Target Name="AfterBuild">
+  </Target>
+  -->
+</Project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Icu/Lucene.Net.Icu.project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Icu/Lucene.Net.Icu.project.json b/src/Lucene.Net.Icu/Lucene.Net.Icu.project.json
new file mode 100644
index 0000000..af28fc8
--- /dev/null
+++ b/src/Lucene.Net.Icu/Lucene.Net.Icu.project.json
@@ -0,0 +1,11 @@
+{
+  "runtimes": {
+    "win": {}
+  },
+  "dependencies": {
+    "icu.net": "54.1.1-alpha"
+  },
+  "frameworks": {
+    "net451": {}
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Icu/Lucene.Net.Icu.xproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Icu/Lucene.Net.Icu.xproj b/src/Lucene.Net.Icu/Lucene.Net.Icu.xproj
new file mode 100644
index 0000000..dd48901
--- /dev/null
+++ b/src/Lucene.Net.Icu/Lucene.Net.Icu.xproj
@@ -0,0 +1,19 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <PropertyGroup>
+    <VisualStudioVersion Condition="'$(VisualStudioVersion)' == ''">14.0</VisualStudioVersion>
+    <VSToolsPath Condition="'$(VSToolsPath)' == ''">$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)</VSToolsPath>
+  </PropertyGroup>
+  <Import Project="$(VSToolsPath)\DotNet\Microsoft.DotNet.Props" Condition="'$(VSToolsPath)' != ''" />
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>44a5341b-0f52-429d-977a-c35e10eccadf</ProjectGuid>
+    <RootNamespace>Lucene.Net.Search</RootNamespace>
+    <BaseIntermediateOutputPath Condition="'$(BaseIntermediateOutputPath)'=='' ">.\obj</BaseIntermediateOutputPath>
+    <OutputPath Condition="'$(OutputPath)'=='' ">.\bin\</OutputPath>
+    <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
+  </PropertyGroup>
+  <PropertyGroup>
+    <SchemaVersion>2.0</SchemaVersion>
+  </PropertyGroup>
+  <Import Project="$(VSToolsPath)\DotNet\Microsoft.DotNet.targets" Condition="'$(VSToolsPath)' != ''" />
+</Project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Icu/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Icu/Properties/AssemblyInfo.cs b/src/Lucene.Net.Icu/Properties/AssemblyInfo.cs
new file mode 100644
index 0000000..3cdd9b2
--- /dev/null
+++ b/src/Lucene.Net.Icu/Properties/AssemblyInfo.cs
@@ -0,0 +1,31 @@
+using System;
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following 
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("Lucene.Net.Icu")]
+[assembly: AssemblyDescription(
+    "International Components for Unicode-based features including Thai analyzer support, " +
+    "an international postings highlighter, and BreakIterator support for the vector highlighter in Lucene.Net.Highlighter " +
+    "for the Lucene.Net full-text search engine library from The Apache Software Foundation.")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyDefaultAlias("Lucene.Net.Icu")]
+[assembly: AssemblyCulture("")]
+
+[assembly: CLSCompliant(true)]
+
+// Setting ComVisible to false makes the types in this assembly not visible 
+// to COM components.  If you need to access a type in this assembly from 
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("349cb7c9-7534-4e1d-9b0a-5521441af0ae")]
+
+// for testing
+[assembly: InternalsVisibleTo("Lucene.Net.Tests.Icu")]
+
+// NOTE: Version information is in CommonAssemblyInfo.cs

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Icu/Support/BreakIterator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Icu/Support/BreakIterator.cs b/src/Lucene.Net.Icu/Support/BreakIterator.cs
new file mode 100644
index 0000000..ded1c9c
--- /dev/null
+++ b/src/Lucene.Net.Icu/Support/BreakIterator.cs
@@ -0,0 +1,231 @@
+#if FEATURE_BREAKITERATOR
+using System;
+
+namespace Lucene.Net.Support
+{
+    /// <summary>
+    /// The <code>BreakIterator</code> class implements methods for finding
+    /// the location of boundaries in text. Instances of <code>BreakIterator</code>
+    /// maintain a current position and scan over text
+    /// returning the index of characters where boundaries occur.
+    /// </summary>
+    public abstract class BreakIterator
+#if FEATURE_CLONEABLE
+        : ICloneable
+#endif
+    {
+        /// <summary>
+        /// Constructor. BreakIterator is stateless and has no default behavior.
+        /// </summary>
+        protected BreakIterator()
+        {
+        }
+
+        /// <summary>
+        /// Create a copy of this iterator
+        /// </summary>
+        /// <returns>A member-wise copy of this</returns>
+        public object Clone()
+        {
+            return MemberwiseClone();
+        }
+
+        /// <summary>
+        /// DONE is returned by Previous(), Next(), Next(int), Preceding(int)
+        /// and Following(int) when either the first or last text boundary has been
+        /// reached.
+        /// </summary>
+        public static readonly int DONE = -1;
+
+        /// <summary>
+        /// Returns the first boundary. The iterator's current position is set
+        /// to the first text boundary.
+        /// </summary>
+        /// <returns>The character index of the first text boundary</returns>
+        public abstract int First();
+
+        /// <summary>
+        /// Returns the last boundary. The iterator's current position is set
+        /// to the last text boundary.
+        /// </summary>
+        /// <returns>The character index of the last text boundary.</returns>
+        public abstract int Last();
+
+        /// <summary>
+        /// Returns the nth boundary from the current boundary. If either
+        /// the first or last text boundary has been reached, it returns
+        /// <see cref="BreakIterator.DONE"/> and the current position is set to either
+        /// the first or last text boundary depending on which one is reached. Otherwise,
+        /// the iterator's current position is set to the new boundary.
+        /// For example, if the iterator's current position is the mth text boundary
+        /// and three more boundaries exist from the current boundary to the last text
+        /// boundary, the Next(2) call will return m + 2. The new text position is set
+        /// to the (m + 2)th text boundary. A Next(4) call would return
+        /// <see cref="BreakIterator.DONE"/> and the last text boundary would become the
+        /// new text position.
+        /// </summary>
+        /// <param name="n">
+        /// which boundary to return.  A value of 0
+        /// does nothing.  Negative values move to previous boundaries
+        /// and positive values move to later boundaries.
+        /// </param>
+        /// <returns>
+        /// The character index of the nth boundary from the current position
+        /// or <see cref="BreakIterator.DONE"/> if either first or last text boundary
+        /// has been reached.
+        /// </returns>
+        public abstract int Next(int n);
+
+        /// <summary>
+        /// Returns the boundary following the current boundary. If the current boundary
+        /// is the last text boundary, it returns <c>BreakIterator.DONE</c> and
+        /// the iterator's current position is unchanged. Otherwise, the iterator's
+        /// current position is set to the boundary following the current boundary.
+        /// </summary>
+        /// <returns>
+        /// The character index of the next text boundary or
+        /// <see cref="BreakIterator.DONE"/> if the current boundary is the last text
+        /// boundary.
+        /// Equivalent to Next(1).
+        /// </returns>
+        /// <seealso cref="Next(int)"/>
+        public abstract int Next();
+
+        /// <summary>
+        /// Returns the boundary preceding the current boundary. If the current boundary
+        /// is the first text boundary, it returns <code>BreakIterator.DONE</code> and
+        /// the iterator's current position is unchanged. Otherwise, the iterator's
+        /// current position is set to the boundary preceding the current boundary.
+        /// </summary>
+        /// <returns>
+        /// The character index of the previous text boundary or
+        /// <see cref="BreakIterator.DONE"/> if the current boundary is the first text
+        /// boundary.
+        /// </returns>
+        public abstract int Previous();
+
+        /// <summary>
+        /// Returns the first boundary following the specified character offset. If the
+        /// specified offset equals to the last text boundary, it returns
+        /// <see cref="BreakIterator.DONE"/> and the iterator's current position is unchanged.
+        /// Otherwise, the iterator's current position is set to the returned boundary.
+        /// The value returned is always greater than the offset or the value
+        /// <see cref="BreakIterator.DONE"/>.
+        /// </summary>
+        /// <param name="offset">the character offset to begin scanning.</param>
+        /// <returns>
+        /// The first boundary after the specified offset or
+        /// <see cref="BreakIterator.DONE"/> if the last text boundary is passed in
+        /// as the offset.
+        /// </returns>
+        /// <exception cref="ArgumentException">
+        /// if the specified offset is less than
+        /// the first text boundary or greater than the last text boundary.
+        /// </exception>
+        public abstract int Following(int offset);
+
+        /// <summary>
+        /// Returns the last boundary preceding the specified character offset. If the
+        /// specified offset equals to the first text boundary, it returns
+        /// <see cref="BreakIterator.DONE"/> and the iterator's current position is unchanged.
+        /// Otherwise, the iterator's current position is set to the returned boundary.
+        /// The value returned is always less than the offset or the value
+        /// <see cref="BreakIterator.DONE"/>.
+        /// </summary>
+        /// <param name="offset">the character offset to begin scanning.</param>
+        /// <returns>
+        /// The last boundary before the specified offset or
+        /// <see cref="BreakIterator.DONE"/> if the first text boundary is passed in
+        /// as the offset.
+        /// </returns>
+        public abstract int Preceding(int offset);
+        //{
+        //    // NOTE:  This implementation is here solely because we can't add new
+        //    // abstract methods to an existing class.  There is almost ALWAYS a
+        //    // better, faster way to do this.
+        //    int pos = Following(offset);
+        //    while (pos >= offset && pos != DONE)
+        //    {
+        //        pos = Previous();
+        //    }
+        //    return pos;
+        //}
+
+        /// <summary>
+        /// Returns true if the specified character offset is a text boundary.
+        /// </summary>
+        /// <param name="offset">the character offset to check.</param>
+        /// <returns><c>true</c> if "offset" is a boundary position, <c>false</c> otherwise.</returns>
+        /// <exception cref="ArgumentException">
+        /// if the specified offset is less than
+        /// the first text boundary or greater than the last text boundary.
+        /// </exception>
+        public abstract bool IsBoundary(int offset);
+        //{
+        //    // NOTE: This implementation probably is wrong for most situations
+        //    // because it fails to take into account the possibility that a
+        //    // CharacterIterator passed to setText() may not have a begin offset
+        //    // of 0.  But since the abstract BreakIterator doesn't have that
+        //    // knowledge, it assumes the begin offset is 0.  If you subclass
+        //    // BreakIterator, copy the SimpleTextBoundary implementation of this
+        //    // function into your subclass.  [This should have been abstract at
+        //    // this level, but it's too late to fix that now.]
+        //    if (offset == 0)
+        //    {
+        //        return true;
+        //    }
+        //    int boundary = Following(offset - 1);
+        //    if (boundary == DONE)
+        //    {
+        //        throw new ArgumentException();
+        //    }
+        //    return boundary == offset;
+        //}
+
+        /// <summary>
+        /// Returns character index of the text boundary that was most
+        /// recently returned by Next(), Next(int), Previous(), First(), Last(),
+        /// Following(int) or Preceding(int). If any of these methods returns
+        /// <see cref="BreakIterator.DONE"/> because either first or last text boundary
+        /// has been reached, it returns the first or last text boundary depending on
+        /// which one is reached.
+        /// </summary>
+        /// <returns>
+        /// The text boundary returned from the above methods, first or last
+        /// text boundary.
+        /// </returns>
+        /// <seealso cref="Next()"/>
+        /// <seealso cref="Next(int)"/>
+        /// <seealso cref="Previous()"/>
+        /// <seealso cref="First()"/>
+        /// <seealso cref="Last()"/>
+        /// <seealso cref="Following(int)"/>
+        /// <seealso cref="Preceding(int)"/>
+        public abstract int Current { get; }
+
+        /// <summary>
+        /// Get the text being scanned
+        /// </summary>
+        /// <returns>the text being scanned</returns>
+        //public abstract CharacterIterator GetText();
+        public abstract string Text { get; }
+
+        /// <summary>
+        /// Set a new text string to be scanned.  The current scan
+        /// position is reset to First().
+        /// </summary>
+        /// <param name="newText">new text to scan.</param>
+        public virtual void SetText(string newText)
+        {
+            SetText(new StringCharacterIterator(newText));
+        }
+
+        /// <summary>
+        /// Set a new text string to be scanned.  The current scan
+        /// position is reset to First().
+        /// </summary>
+        /// <param name="newText">new text to scan.</param>
+        public abstract void SetText(CharacterIterator newText);
+    }
+}
+#endif
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Icu/Support/CharacterIterator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Icu/Support/CharacterIterator.cs b/src/Lucene.Net.Icu/Support/CharacterIterator.cs
new file mode 100644
index 0000000..0c81629
--- /dev/null
+++ b/src/Lucene.Net.Icu/Support/CharacterIterator.cs
@@ -0,0 +1,50 @@
+#if FEATURE_BREAKITERATOR
+using System;
+
+namespace Lucene.Net.Support
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public abstract class CharacterIterator
+    {
+        public static readonly char DONE = '\uFFFF';
+
+        public abstract char Current { get; }
+
+        public abstract char First();
+
+        public abstract char Last();
+
+        public abstract char Next();
+
+        public abstract char Previous();
+
+        public abstract char SetIndex(int position);
+
+        public abstract int BeginIndex { get; }
+
+        public abstract int EndIndex { get; }
+
+        public abstract int Index { get; }
+
+        public abstract object Clone();
+
+        public abstract string GetTextAsString();
+    }
+}
+#endif
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Icu/Support/IcuBreakIterator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Icu/Support/IcuBreakIterator.cs b/src/Lucene.Net.Icu/Support/IcuBreakIterator.cs
new file mode 100644
index 0000000..79819ed
--- /dev/null
+++ b/src/Lucene.Net.Icu/Support/IcuBreakIterator.cs
@@ -0,0 +1,394 @@
+#if FEATURE_BREAKITERATOR
+using Lucene.Net.Support;
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net
+{
+    /*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+    /// <summary>
+    /// A <see cref="BreakIterator"/> implementation that encapsulates the functionality
+    /// of icu.net's <see cref="Icu.BreakIterator"/> static class. A <see cref="BreakIterator"/>
+    /// provides methods to move forward, reverse, and randomly through a set of text breaks
+    /// defined by the <see cref="Icu.BreakIterator.UBreakIteratorType"/> enumeration.
+    /// </summary>
+    // LUCENENET specific type
+    public class IcuBreakIterator : BreakIterator
+    {
+        private readonly Icu.Locale locale;
+        private readonly Icu.BreakIterator.UBreakIteratorType type;
+
+        private List<int> boundaries = new List<int>();
+        private int currentBoundaryIndex; // Index (not the value) of the current boundary in boundaries
+        private string text;
+
+        /// <summary>
+        /// The start offset for the string, if supplied by a <see cref="CharacterIterator"/>
+        /// </summary>
+        protected int m_start;
+
+        /// <summary>
+        /// The end offset for the string, if supplied by a <see cref="CharacterIterator"/>
+        /// </summary>
+        protected int m_end;
+
+        private bool enableHacks = false;
+
+        public IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType type)
+            : this(type, CultureInfo.CurrentCulture)
+        {
+        }
+
+        public IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType type, CultureInfo locale)
+        {
+            if (locale == null)
+                throw new ArgumentNullException("locale");
+            this.locale = new Icu.Locale(locale.Name);
+            this.type = type;
+        }
+
+        
+        public virtual bool EnableHacks
+        {
+            get { return enableHacks; }
+            set { enableHacks = value; }
+        }
+
+        /// <summary>
+        /// Sets the current iteration position to the beginning of the text.
+        /// </summary>
+        /// <returns>The offset of the beginning of the text.</returns>
+        public override int First()
+        {
+            currentBoundaryIndex = 0;
+            return ReturnCurrent();
+        }
+
+        /// <summary>
+        /// Sets the current iteration position to the end of the text.
+        /// </summary>
+        /// <returns>The text's past-the-end offset.</returns>
+        public override int Last()
+        {
+            currentBoundaryIndex = boundaries.Count - 1;
+            return ReturnCurrent();
+        }
+
+        /// <summary>
+        /// Advances the iterator either forward or backward the specified number of steps.
+        /// Negative values move backward, and positive values move forward.  This is
+        /// equivalent to repeatedly calling <see cref="Next()"/> or <see cref="Previous()"/>.
+        /// </summary>
+        /// <param name="n">The number of steps to move.  The sign indicates the direction
+        /// (negative is backwards, and positive is forwards).</param>
+        /// <returns>The character offset of the boundary position n boundaries away from
+        /// the current one.</returns>
+        public override int Next(int n)
+        {
+            int result = Current;
+            while (n > 0)
+            {
+                result = Next();
+                --n;
+            }
+            while (n < 0)
+            {
+                result = Previous();
+                ++n;
+            }
+            return result;
+        }
+
+        /// <summary>
+        /// Advances the iterator to the next boundary position.
+        /// </summary>
+        /// <returns>The position of the first boundary after this one.</returns>
+        public override int Next()
+        {
+            if (currentBoundaryIndex >= boundaries.Count - 1 || boundaries.Count == 0)
+            {
+                return DONE;
+            }
+            currentBoundaryIndex++;
+            return ReturnCurrent();
+        }
+
+        /// <summary>
+        /// Advances the iterator backwards, to the last boundary preceding this one.
+        /// </summary>
+        /// <returns>The position of the last boundary position preceding this one.</returns>
+        public override int Previous()
+        {
+            if (currentBoundaryIndex == 0 || boundaries.Count == 0)
+            {
+                return DONE;
+            }
+            currentBoundaryIndex--;
+            return ReturnCurrent();
+        }
+
+        /// <summary>
+        /// Throw <see cref="ArgumentException"/> unless begin &lt;= offset &lt; end.
+        /// </summary>
+        /// <param name="offset"></param>
+        private void CheckOffset(int offset)
+        {
+            if (offset < m_start || offset > m_end)
+            {
+                throw new ArgumentException("offset out of bounds");
+            }
+        }
+
+        /// <summary>
+        /// Sets the iterator to refer to the first boundary position following
+        /// the specified position.
+        /// </summary>
+        /// <param name="offset">The position from which to begin searching for a break position.</param>
+        /// <returns>The position of the first break after the current position.</returns>
+        public override int Following(int offset)
+        {
+            CheckOffset(offset);
+
+            if (boundaries.Count == 0)
+            {
+                return DONE;
+            }
+
+            int following = GetLowestIndexGreaterThan(offset);
+            if (following == -1)
+            {
+                currentBoundaryIndex = boundaries.Count - 1;
+                return DONE;
+            }
+            else
+            {
+                currentBoundaryIndex = following;
+            }
+            return ReturnCurrent();
+        }
+
+        private int GetLowestIndexGreaterThan(int offset)
+        {
+            int index = boundaries.BinarySearch(offset);
+            if (index < 0)
+            {
+                return ~index;
+            }
+            else if (index + 1 < boundaries.Count)
+            {
+                return index + 1;
+            }
+
+            return -1;
+        }
+
+        /// <summary>
+        /// Sets the iterator to refer to the last boundary position before the
+        /// specified position.
+        /// </summary>
+        /// <param name="offset">The position to begin searching for a break from.</param>
+        /// <returns>The position of the last boundary before the starting position.</returns>
+        public override int Preceding(int offset)
+        {
+            CheckOffset(offset);
+
+            if (boundaries.Count == 0)
+            {
+                return DONE;
+            }
+
+            int preceeding = GetHighestIndexLessThan(offset);
+            if (preceeding == -1)
+            {
+                currentBoundaryIndex = 0;
+                return DONE;
+            }
+            else
+            {
+                currentBoundaryIndex = preceeding;
+            }
+            return ReturnCurrent();
+        }
+
+        private int GetHighestIndexLessThan(int offset)
+        {
+            int index = boundaries.BinarySearch(offset);
+            if (index < 0)
+            {
+                return ~index - 1;
+            }
+            else
+            {
+                // NOTE: This is intentionally allowed to return -1 in the case
+                // where index == 0. This state indicates we are before the first boundary.
+                return index - 1;
+            }
+        }
+
+        /// <summary>
+        /// Returns the current iteration position.
+        /// </summary>
+        public override int Current
+        {
+            get { return ReturnCurrent(); }
+        }
+
+        /// <summary>
+        /// Gets the text being analyzed.
+        /// </summary>
+        public override string Text
+        {
+            get
+            {
+                return text;
+            }
+        }
+
+        /// <summary>
+        /// Set the iterator to analyze a new piece of text.  This function resets
+        /// the current iteration position to the beginning of the text.
+        /// </summary>
+        /// <param name="newText">The text to analyze.</param>
+        public override void SetText(string newText)
+        {
+            text = newText;
+            currentBoundaryIndex = 0;
+            m_start = 0;
+            m_end = newText.Length;
+
+            LoadBoundaries(m_start, m_end);
+        }
+
+        public override void SetText(CharacterIterator newText)
+        {
+            text = newText.GetTextAsString();
+            currentBoundaryIndex = 0;
+            m_start = newText.BeginIndex;
+            m_end = newText.EndIndex;
+
+            LoadBoundaries(m_start, m_end);
+        }
+
+        private void LoadBoundaries(int start, int end)
+        {
+            IEnumerable<Icu.Boundary> icuBoundaries;
+            string offsetText = text.Substring(start, end - start);
+
+#if !NETSTANDARD
+            try
+            {
+#endif
+                if (type == Icu.BreakIterator.UBreakIteratorType.WORD)
+                {
+                    if (enableHacks)
+                    {
+                        // LUCENENET TODO: HACK - replacing hyphen with "a" so hyphenated words aren't broken
+                        offsetText = offsetText.Replace("-", "a");
+                    }
+
+                    icuBoundaries = Icu.BreakIterator.GetWordBoundaries(locale, offsetText, true);
+                }
+                else
+                {
+                    if (enableHacks && type == Icu.BreakIterator.UBreakIteratorType.SENTENCE)
+                    {
+                        // LUCENENET TODO: HACK - newline character causes incorrect sentence breaking.
+                        offsetText = offsetText.Replace("\n", " ");
+                        // LUCENENET TODO: HACK - the ICU sentence logic doesn't work (in English anyway) when sentences don't
+                        // begin with capital letters.
+                        offsetText = CapitalizeFirst(offsetText);
+                    }
+
+                    icuBoundaries = Icu.BreakIterator.GetBoundaries(type, locale, offsetText);
+                }
+#if !NETSTANDARD
+            }
+            catch (AccessViolationException ace)
+            {
+                // LUCENENET TODO: Find a reliable way to reproduce and report the 
+                // AccessViolationException that happens here to the icu-dotnet project team
+                throw new Exception("Hit AccessViolationException: " + ace.ToString(), ace);
+            }
+#endif
+
+            boundaries = icuBoundaries
+                .Select(t => new[] { t.Start + start, t.End + start })
+                .SelectMany(b => b)
+                .Distinct()
+                .ToList();
+        }
+
+        /// <summary>
+        /// Returns true if the specified character offset is a text boundary.
+        /// </summary>
+        /// <param name="offset">the character offset to check.</param>
+        /// <returns><c>true</c> if "offset" is a boundary position, <c>false</c> otherwise.</returns>
+        public override bool IsBoundary(int offset)
+        {
+            CheckOffset(offset);
+            return boundaries.Contains(offset);
+        }
+
+        private int ReturnCurrent()
+        {
+            if (boundaries.Count > 0)
+            {
+                return currentBoundaryIndex < boundaries.Count && currentBoundaryIndex > -1
+                    ? boundaries[currentBoundaryIndex]
+                    : DONE;
+            }
+
+            // If there are no boundaries, we must return the start offset
+            return m_start;
+        }
+
+        /// <summary>
+        /// LUCENENET TODO: This is a temporary workaround for an issue with icu-dotnet
+        /// where it doesn't correctly break sentences unless they begin with a capital letter.
+        /// If/when ICU is fixed, this method should be deleted and the IcuBreakIterator 
+        /// code changed to remove calls to this method.
+        /// </summary>
+        public static string CapitalizeFirst(string s)
+        {
+            bool isNewSentence = true;
+            var result = new StringBuilder(s.Length);
+            for (int i = 0; i < s.Length; i++)
+            {
+                if (isNewSentence && char.IsLetter(s[i]))
+                {
+                    result.Append(char.ToUpper(s[i]));
+                    isNewSentence = false;
+                }
+                else
+                    result.Append(s[i]);
+
+                if (s[i] == '!' || s[i] == '?' || s[i] == '.')
+                {
+                    isNewSentence = true;
+                }
+            }
+
+            return result.ToString();
+        }
+    }
+}
+#endif

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Icu/Support/StringCharacterIterator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Icu/Support/StringCharacterIterator.cs b/src/Lucene.Net.Icu/Support/StringCharacterIterator.cs
new file mode 100644
index 0000000..a91e49a
--- /dev/null
+++ b/src/Lucene.Net.Icu/Support/StringCharacterIterator.cs
@@ -0,0 +1,232 @@
+#if FEATURE_BREAKITERATOR
+/*
+ * Copyright (c) 1996, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
+ * (C) Copyright IBM Corp. 1996 - 1998 - All Rights Reserved
+ *
+ * The original version of this source code and documentation
+ * is copyrighted and owned by Taligent, Inc., a wholly-owned
+ * subsidiary of IBM. These materials are provided under terms
+ * of a License Agreement between Taligent and Sun. This technology
+ * is protected by multiple US and International patents.
+ *
+ * This notice and attribution to Taligent may not be removed.
+ * Taligent is a registered trademark of Taligent, Inc.
+ *
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Lucene.Net.Support
+{
+    /// <summary>
+    /// <see cref="StringCharacterIterator"/> implements the
+    /// <see cref="CharacterIterator"/> protocol for a <see cref="string"/>.
+    /// The <see cref="StringCharacterIterator"/> class iterates over the
+    /// entire <see cref="string"/>.
+    /// </summary>
+    /// <seealso cref="CharacterIterator"/>
+    public class StringCharacterIterator : CharacterIterator
+    {
+        private string text;
+        private int begin;
+        private int end;
+        // invariant: begin <= pos <= end
+        private int pos;
+
+
+        public StringCharacterIterator(string text)
+            : this(text, 0)
+        {
+        }
+
+        public StringCharacterIterator(string text, int pos)
+            : this(text, 0, text.Length, pos)
+        {
+        }
+
+        public StringCharacterIterator(string text, int begin, int end, int pos)
+        {
+            if (text == null)
+                throw new ArgumentNullException("text");
+            this.text = text;
+
+            if (begin < 0 || begin > end || end > text.Length)
+                throw new ArgumentException("Invalid substring range");
+
+            if (pos < begin || pos > end)
+                throw new ArgumentException("Invalid position");
+
+            this.begin = begin;
+            this.end = end;
+            this.pos = pos;
+        }
+
+        public void SetText(string text)
+        {
+            if (text == null)
+                throw new ArgumentNullException("text");
+            this.text = text;
+            this.begin = 0;
+            this.end = text.Length;
+            this.pos = 0;
+        }
+
+        public override char First()
+        {
+            pos = begin;
+            return Current;
+        }
+
+        public override char Last()
+        {
+            if (end != begin)
+            {
+                pos = end - 1;
+            }
+            else
+            {
+                pos = end;
+            }
+            return Current;
+        }
+
+        public override char SetIndex(int position)
+        {
+            if (position < begin || position > end)
+                throw new ArgumentException("Invalid index");
+            pos = position;
+            return Current;
+        }
+
+        public override char Current
+        {
+            get
+            {
+                if (pos >= begin && pos < end)
+                {
+                    return text[pos];
+                }
+                else
+                {
+                    return DONE;
+                }
+            }
+        }
+
+        public override char Next()
+        {
+            if (pos < end - 1)
+            {
+                pos++;
+                return text[pos];
+            }
+            else
+            {
+                pos = end;
+                return DONE;
+            }
+        }
+
+        public override char Previous()
+        {
+            if (pos > begin)
+            {
+                pos--;
+                return text[pos];
+            }
+            else
+            {
+                return DONE;
+            }
+        }
+
+
+        public override int BeginIndex
+        {
+            get
+            {
+                return begin;
+            }
+        }
+
+        public override int EndIndex
+        {
+            get
+            {
+                return end;
+            }
+        }
+
+        public override int Index
+        {
+            get
+            {
+                return pos;
+            }
+        }
+
+        public override string GetTextAsString()
+        {
+            return text;
+        }
+
+        public override bool Equals(object obj)
+        {
+            if (this == obj)
+                return true;
+            if (!(obj is StringCharacterIterator))
+            return false;
+
+            StringCharacterIterator that = (StringCharacterIterator)obj;
+
+            if (GetHashCode() != that.GetHashCode())
+                return false;
+            if (!text.Equals(that.text, StringComparison.Ordinal))
+                return false;
+            if (pos != that.pos || begin != that.begin || end != that.end)
+                return false;
+            return true;
+        }
+
+        public override int GetHashCode()
+        {
+            return base.GetHashCode() ^ pos ^ begin ^ end;
+        }
+
+        public override object Clone()
+        {
+            return MemberwiseClone();
+        }
+    }
+}
+#endif

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Icu/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Icu/project.json b/src/Lucene.Net.Icu/project.json
new file mode 100644
index 0000000..2e8f212
--- /dev/null
+++ b/src/Lucene.Net.Icu/project.json
@@ -0,0 +1,63 @@
+{
+  "version": "4.8.0",
+  "dependencies": {
+    "icu.net": "54.1.1-alpha",
+    "Lucene.Net": "4.8.0",
+    "Lucene.Net.Analysis.Common": "4.8.0",
+    "Lucene.Net.Highlighter": "4.8.0"
+  },
+  "buildOptions": {
+    "debugType": "portable",
+    "compile": {
+      "includeFiles": [
+        "../CommonAssemblyInfo.cs",
+        "../Lucene.Net.Analysis.Common/Analysis/Th/ThaiAnalyzer.cs",
+        "../Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs",
+        "../Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizerFactory.cs",
+        "../Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilter.cs",
+        "../Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilterFactory.cs",
+        "../Lucene.Net.Analysis.Common/Analysis/Util/CharArrayIterator.cs",
+        "../Lucene.Net.Analysis.Common/Analysis/Util/SegmentingTokenizerBase.cs",
+        "../Lucene.Net.Highlighter/PostingsHighlight/DefaultPassageFormatter.cs",
+        "../Lucene.Net.Highlighter/PostingsHighlight/MultiTermHighlighting.cs",
+        "../Lucene.Net.Highlighter/PostingsHighlight/Passage.cs",
+        "../Lucene.Net.Highlighter/PostingsHighlight/PassageFormatter.cs",
+        "../Lucene.Net.Highlighter/PostingsHighlight/PassageScorer.cs",
+        "../Lucene.Net.Highlighter/PostingsHighlight/PostingsHighlighter.cs",
+        "../Lucene.Net.Highlighter/PostingsHighlight/WholeBreakIterator.cs",
+        "../Lucene.Net.Highlighter/VectorHighlight/BreakIteratorBoundaryScanner.cs"
+      ]
+    },
+    "embed": {
+      "includeFiles": [ "Analysis/Th/stopwords.txt" ]
+    }
+  },
+  "packOptions": {
+    "summary": "<Added from AssemblyDescriptionAttribute by the build script - do not remove this>",
+    "licenseUrl": "https://github.com/apache/lucenenet/blob/master/LICENSE.txt",
+    "iconUrl": "https://github.com/apache/lucenenet/blob/master/branding/logo/lucene-net-icon-128x128.png?raw=true",
+    "owners": [
+      "The Apache Software Foundation"
+    ],
+    "repository": {
+      "url": "https://github.com/apache/lucenenet"
+    },
+    "tags": [ "lucene.net", "core", "text", "search", "information", "retrieval", "lucene", "apache", "analysis", "index", "query" ]
+  },
+  "frameworks": {
+    "netstandard1.5": {
+      "imports": "dnxcore50",
+      "buildOptions": {
+        "define": [ "NETSTANDARD", "FEATURE_BREAKITERATOR" ]
+      },
+      "dependencies": {
+        "NETStandard.Library": "1.6.0"
+      }
+    },
+    "net451": {
+      "buildOptions": {
+        "define": [ "FEATURE_BREAKITERATOR", "FEATURE_SERIALIZABLE" ]
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Tests.Highlighter/Lucene.Net.Tests.Highlighter.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Highlighter/Lucene.Net.Tests.Highlighter.csproj b/src/Lucene.Net.Tests.Highlighter/Lucene.Net.Tests.Highlighter.csproj
index 3ed7239..d87e43d 100644
--- a/src/Lucene.Net.Tests.Highlighter/Lucene.Net.Tests.Highlighter.csproj
+++ b/src/Lucene.Net.Tests.Highlighter/Lucene.Net.Tests.Highlighter.csproj
@@ -56,7 +56,6 @@
     <Compile Include="Properties\AssemblyInfo.cs" />
     <Compile Include="Support\TestExceptionSerialization.cs" />
     <Compile Include="Support\TestApiConsistency.cs" />
-    <Compile Include="TestBreakIterator.cs" />
     <Compile Include="VectorHighlight\AbstractTestCase.cs" />
     <Compile Include="VectorHighlight\BreakIteratorBoundaryScannerTest.cs" />
     <Compile Include="VectorHighlight\FastVectorHighlighterTest.cs" />
@@ -70,7 +69,7 @@
     <Compile Include="VectorHighlight\SimpleFragmentsBuilderTest.cs" />
     <Compile Include="VectorHighlight\SingleFragListBuilderTest.cs" />
     <Compile Include="VectorHighlight\WeightedFragListBuilderTest.cs" />
-	<Compile Include="..\CommonAssemblyInfo.cs">
+    <Compile Include="..\CommonAssemblyInfo.cs">
       <Link>Properties\CommonAssemblyInfo.cs</Link>
     </Compile>
   </ItemGroup>


Mime
View raw message