lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nightowl...@apache.org
Subject [lucenenet] 13/13: Ported Lucene.Net.Analysis.OpenNLP + tests
Date Sat, 26 Oct 2019 18:52:08 GMT
This is an automated email from the ASF dual-hosted git repository.

nightowl888 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucenenet.git

commit 5c434545b31e8131485e00657b37685610358677
Author: Shad Storhaug <shad@shadstorhaug.com>
AuthorDate: Tue Oct 22 11:36:01 2019 +0700

    Ported Lucene.Net.Analysis.OpenNLP + tests
---
 Lucene.Net.sln                                     |  12 +
 README.md                                          |   5 +-
 build/Dependencies.props                           |   1 +
 .../publish-test-results-for-test-projects.yml     |  10 +
 build/build.ps1                                    |  14 +-
 .../Lucene.Net.Analysis.OpenNLP.csproj             |  55 +++
 .../OpenNLPChunkerFilter.cs                        | 115 +++++++
 .../OpenNLPChunkerFilterFactory.cs                 |  90 +++++
 .../OpenNLPLemmatizerFilter.cs                     | 129 +++++++
 .../OpenNLPLemmatizerFilterFactory.cs              |  95 ++++++
 .../OpenNLPPOSFilter.cs                            | 102 ++++++
 .../OpenNLPPOSFilterFactory.cs                     |  79 +++++
 .../OpenNLPSentenceBreakIterator.cs                | 277 +++++++++++++++
 .../OpenNLPTokenizer.cs                            | 112 +++++++
 .../OpenNLPTokenizerFactory.cs                     |  85 +++++
 .../Tools/NLPChunkerOp.cs                          |  47 +++
 .../Tools/NLPLemmatizerOp.cs                       |  93 ++++++
 .../Tools/NLPNERTaggerOp.cs                        |  61 ++++
 .../Tools/NLPPOSTaggerOp.cs                        |  43 +++
 .../Tools/NLPSentenceDetectorOp.cs                 |  58 ++++
 .../Tools/NLPTokenizerOp.cs                        |  55 +++
 .../Tools/OpenNLPOpsFactory.cs                     | 225 +++++++++++++
 .../Analysis/BaseTokenStreamTestCase.cs            |  80 ++++-
 .../Analysis/Core/TestStopFilter.cs                |   2 +-
 .../Lucene.Net.Tests.Analysis.OpenNLP.csproj       |  71 ++++
 .../TestOpenNLPChunkerFilterFactory.cs             | 122 +++++++
 .../TestOpenNLPLemmatizerFilterFactory.cs          | 372 +++++++++++++++++++++
 .../TestOpenNLPPOSFilterFactory.cs                 | 160 +++++++++
 .../TestOpenNLPSentenceBreakIterator.cs            | 252 ++++++++++++++
 .../TestOpenNLPTokenizerFactory.cs                 | 140 ++++++++
 .../en-test-chunker.bin                            | Bin 0 -> 89914 bytes
 .../en-test-lemmas.dict                            |  12 +
 .../en-test-lemmatizer.bin                         | Bin 0 -> 7370 bytes
 .../en-test-ner.bin                                | Bin 0 -> 2049 bytes
 .../en-test-pos-maxent.bin                         | Bin 0 -> 18494 bytes
 .../en-test-sent.bin                               | Bin 0 -> 1051 bytes
 .../en-test-tokenizer.bin                          | Bin 0 -> 15100 bytes
 .../TestOpenNLPChunkerFilterFactory.cs             |  86 +++++
 38 files changed, 3055 insertions(+), 5 deletions(-)

diff --git a/Lucene.Net.sln b/Lucene.Net.sln
index 70c88dc..192f434 100644
--- a/Lucene.Net.sln
+++ b/Lucene.Net.sln
@@ -193,6 +193,10 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Lucene.Net.Tests.TestFramew
 EndProject
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Lucene.Net.TestFramework.MSTest", "src\Lucene.Net.TestFramework.MSTest\Lucene.Net.TestFramework.MSTest.csproj", "{48520313-3B78-40D9-AE34-4864BFADF747}"
 EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Analysis.OpenNLP", "src\Lucene.Net.Analysis.OpenNLP\Lucene.Net.Analysis.OpenNLP.csproj", "{CC2CE069-5BBB-429E-8510-7C3FBA8069D5}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Tests.Analysis.OpenNLP", "src\Lucene.Net.Tests.Analysis.OpenNLP\Lucene.Net.Tests.Analysis.OpenNLP.csproj", "{88D6D124-711D-4232-AD70-F22AB6AF9EA1}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
@@ -439,6 +443,14 @@ Global
 		{48520313-3B78-40D9-AE34-4864BFADF747}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{48520313-3B78-40D9-AE34-4864BFADF747}.Release|Any CPU.ActiveCfg = Release|Any CPU
 		{48520313-3B78-40D9-AE34-4864BFADF747}.Release|Any CPU.Build.0 = Release|Any CPU
+		{CC2CE069-5BBB-429E-8510-7C3FBA8069D5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{CC2CE069-5BBB-429E-8510-7C3FBA8069D5}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{CC2CE069-5BBB-429E-8510-7C3FBA8069D5}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{CC2CE069-5BBB-429E-8510-7C3FBA8069D5}.Release|Any CPU.Build.0 = Release|Any CPU
+		{88D6D124-711D-4232-AD70-F22AB6AF9EA1}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{88D6D124-711D-4232-AD70-F22AB6AF9EA1}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{88D6D124-711D-4232-AD70-F22AB6AF9EA1}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{88D6D124-711D-4232-AD70-F22AB6AF9EA1}.Release|Any CPU.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
diff --git a/README.md b/README.md
index 7d0a5da..385ff7f 100644
--- a/README.md
+++ b/README.md
@@ -60,7 +60,10 @@ PM> Install-Package Lucene.Net -Pre
 
 <!--- TO BE ADDED WHEN RELEASED 
 
-- [Lucene.Net.Analysis.UIMA](https://www.nuget.org/packages/Lucene.Net.Analysis.UIMA/) - Analysis integration with Apache UIMA)
+- [Lucene.Net.Analysis.OpenNLP](https://www.nuget.org/packages/Lucene.Net.Analysis.OpenNLP/) - OpenNLP Library Integration
+- [Lucene.Net.Analysis.Nori](https://www.nuget.org/packages/Lucene.Net.Analysis.Nori/) - Korean Morphological Analyzer
+- [Lucene.Net.Analysis.Morfologik](https://www.nuget.org/packages/Lucene.Net.Analysis.Morfologik/) - Analyzer for dictionary stemming, built-in Polish dictionary
+- [Lucene.Net.TestFramework](https://www.nuget.org/packages/Lucene.Net.TestFramework/) - Framework for testing Lucene-based applications
 
 -->
 
diff --git a/build/Dependencies.props b/build/Dependencies.props
index 1cb92e6..6852efd 100644
--- a/build/Dependencies.props
+++ b/build/Dependencies.props
@@ -50,6 +50,7 @@
     <NewtonsoftJsonPackageVersion>10.0.3</NewtonsoftJsonPackageVersion>
     <NUnit3TestAdapterPackageVersion>3.13.0</NUnit3TestAdapterPackageVersion>
     <NUnitPackageVersion>3.9.0</NUnitPackageVersion>
+    <OpenNLPNETPackageVersion>1.9.1</OpenNLPNETPackageVersion>
     <SharpZipLibPackageVersion>1.1.0</SharpZipLibPackageVersion>
     <SharpZipLibNETStandardPackageVersion>0.86.0.1</SharpZipLibNETStandardPackageVersion>
     <Spatial4nCorePackageVersion>0.4.1</Spatial4nCorePackageVersion>
diff --git a/build/azure-templates/publish-test-results-for-test-projects.yml b/build/azure-templates/publish-test-results-for-test-projects.yml
index af30e2c..1b61c3c 100644
--- a/build/azure-templates/publish-test-results-for-test-projects.yml
+++ b/build/azure-templates/publish-test-results-for-test-projects.yml
@@ -81,6 +81,16 @@ steps:
     testResultsArtifactName: '${{ parameters.testResultsArtifactName }}'
     testResultsFileName: '${{ parameters.testResultsFileName }}'
 
+# Special case: Only supports .net45
+- template: publish-test-results.yml
+  parameters:
+    framework: 'net451'
+    testProjectName: 'Lucene.Net.Tests.Analysis.OpenNLP'
+    osName: '${{ parameters.osName }}'
+    testResultsFormat: '${{ parameters.testResultsFormat }}'
+    testResultsArtifactName: '${{ parameters.testResultsArtifactName }}'
+    testResultsFileName: '${{ parameters.testResultsFileName }}'
+
 - template: publish-test-results-for-target-frameworks.yml
   parameters:
     testProjectName: 'Lucene.Net.Tests._A-D'
diff --git a/build/build.ps1 b/build/build.ps1
index 14513f3..d9725b6 100644
--- a/build/build.ps1
+++ b/build/build.ps1
@@ -215,6 +215,11 @@ task Publish -depends Compile -description "This task uses dotnet publish to pac
 					continue
 				}
 
+				# Special case - OpenNLP.NET only supports .NET Framework
+				if ($projectName.Contains("Tests.Analysis.OpenNLP") -and (!$framework.StartsWith("net45"))) {
+					continue
+				}
+
 				$logPath = "$outDirectory/$framework"
 				$outputPath = "$logPath/$projectName"
 
@@ -272,7 +277,7 @@ task Test -depends InstallSDK, UpdateLocalSDKVersion, Restore -description "This
 	$testProjects = $testProjects | Sort-Object -Property FullName
 
 	$frameworksToTest = Get-FrameworksToTest
-	
+
 	Write-Host "frameworksToTest: $frameworksToTest" -ForegroundColor Yellow
 
 	[int]$totalProjects = $testProjects.Length * $frameworksToTest.Length
@@ -291,6 +296,13 @@ task Test -depends InstallSDK, UpdateLocalSDKVersion, Restore -description "This
 				$remainingProjects--
 				continue
 			}
+			
+			# Special case - OpenNLP.NET only supports .NET Framework
+			if ($testName.Contains("Tests.Analysis.OpenNLP") -and (!$framework.StartsWith("net45"))) {
+				$totalProjects--
+				$remainingProjects--
+				continue
+			}
 
 			Write-Host "  Next Project in Queue: $testName, Framework: $framework" -ForegroundColor Yellow
 
diff --git a/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj b/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj
new file mode 100644
index 0000000..aabca38
--- /dev/null
+++ b/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj
@@ -0,0 +1,55 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <Import Project="$(SolutionDir)build/NuGet.props" />
+
+  <PropertyGroup>
+    <!--<TargetFrameworks>netstandard2.0;netstandard1.6</TargetFrameworks>-->
+    <TargetFrameworks Condition="$([MSBuild]::IsOsPlatform('Windows'))">$(TargetFrameworks);net45</TargetFrameworks>
+    <PackageTargetFallback Condition=" '$(TargetFramework)' == 'netstandard1.6' ">$(PackageTargetFallback);dnxcore50</PackageTargetFallback>
+
+    <AssemblyTitle>Lucene.Net.Analysis.OpenNLP</AssemblyTitle>
+    <Description>Analyzer for indexing phonetic signatures (for sounds-alike search) for the Lucene.Net full-text search engine library from The Apache Software Foundation.</Description>
+    <PackageTags>$(PackageTags);analysis;soundex;double;metaphone;sounds;like;beider;morse;cologne;caverphone;nysiis;match;rating</PackageTags>
+    <DocumentationFile>bin\$(Configuration)\$(TargetFramework)\$(AssemblyName).xml</DocumentationFile>
+    <NoWarn>$(NoWarn);1591;1573</NoWarn>
+    <RootNamespace>Lucene.Net.Analysis.OpenNlp</RootNamespace>
+  </PropertyGroup>
+
+  <!-- OpenNLP is not strong-named -->
+  <PropertyGroup Label="Assembly Signing">
+    <SignAssembly>false</SignAssembly>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <Compile Include="..\CommonAssemblyKeys.cs" Link="Properties\CommonAssemblyKeys.cs" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\dotnet\Lucene.Net.ICU\Lucene.Net.ICU.csproj" />
+    <ProjectReference Include="..\Lucene.Net\Lucene.Net.csproj" />
+    <ProjectReference Include="..\Lucene.Net.Analysis.Common\Lucene.Net.Analysis.Common.csproj" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <PackageReference Include="ICU4N" Version="$(ICU4NPackageVersion)" />
+    <PackageReference Include="OpenNLP.NET" Version="$(OpenNLPNETPackageVersion)" />
+  </ItemGroup>
+
+  <PropertyGroup Condition=" '$(TargetFramework)' == 'netstandard2.0' ">
+    <DebugType>portable</DebugType>
+  </PropertyGroup>
+
+  <PropertyGroup Condition=" '$(TargetFramework)' == 'netstandard1.6' ">
+    <DebugType>portable</DebugType>
+  </PropertyGroup>
+
+  <PropertyGroup Condition=" '$(TargetFramework)' == 'net45' ">
+    <DebugType>full</DebugType>
+  </PropertyGroup>
+
+  <ItemGroup Condition=" '$(TargetFramework)' == 'net45' ">
+    <Reference Include="System" />
+    <Reference Include="Microsoft.CSharp" />
+  </ItemGroup>
+
+</Project>
diff --git a/src/Lucene.Net.Analysis.OpenNLP/OpenNLPChunkerFilter.cs b/src/Lucene.Net.Analysis.OpenNLP/OpenNLPChunkerFilter.cs
new file mode 100644
index 0000000..9144475
--- /dev/null
+++ b/src/Lucene.Net.Analysis.OpenNLP/OpenNLPChunkerFilter.cs
@@ -0,0 +1,115 @@
+using Lucene.Net.Analysis.OpenNlp.Tools;
+using Lucene.Net.Analysis.TokenAttributes;
+using Lucene.Net.Util;
+using System.Collections.Generic;
+using System.Linq;
+
+namespace Lucene.Net.Analysis.OpenNlp
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Run OpenNLP chunker. Prerequisite: the <see cref="OpenNLPTokenizer"/> and <see cref="OpenNLPPOSFilter"/> must precede this filter.
+    /// Tags terms in the TypeAttribute, replacing the POS tags previously put there by <see cref="OpenNLPPOSFilter"/>.
+    /// </summary>
+    public sealed class OpenNLPChunkerFilter : TokenFilter
+    {
+        private List<AttributeSource> sentenceTokenAttrs = new List<AttributeSource>();
+        private int tokenNum = 0;
+        private bool moreTokensAvailable = true;
+        private string[] sentenceTerms = null;
+        private string[] sentenceTermPOSTags = null;
+
+        private readonly NLPChunkerOp chunkerOp;
+        private readonly ITypeAttribute typeAtt;
+        private readonly IFlagsAttribute flagsAtt;
+        private readonly ICharTermAttribute termAtt;
+
+        public OpenNLPChunkerFilter(TokenStream input, NLPChunkerOp chunkerOp)
+                  : base(input)
+        {
+            this.chunkerOp = chunkerOp;
+            this.typeAtt = AddAttribute<ITypeAttribute>();
+            this.flagsAtt = AddAttribute<IFlagsAttribute>();
+            this.termAtt = AddAttribute<ICharTermAttribute>();
+        }
+
+        public override sealed bool IncrementToken()
+        {
+            if (!moreTokensAvailable)
+            {
+                Clear();
+                return false;
+            }
+            if (tokenNum == sentenceTokenAttrs.Count)
+            {
+                NextSentence();
+                if (sentenceTerms == null)
+                {
+                    Clear();
+                    return false;
+                }
+                AssignTokenTypes(chunkerOp.GetChunks(sentenceTerms, sentenceTermPOSTags, null));
+                tokenNum = 0;
+            }
+            ClearAttributes();
+            sentenceTokenAttrs[tokenNum++].CopyTo(this);
+            return true;
+        }
+
+        private void NextSentence()
+        {
+            IList<string> termList = new List<string>();
+            IList<string> posTagList = new List<string>();
+            sentenceTokenAttrs.Clear();
+            bool endOfSentence = false;
+            while (!endOfSentence && (moreTokensAvailable = m_input.IncrementToken()))
+            {
+                termList.Add(termAtt.ToString());
+                posTagList.Add(typeAtt.Type);
+                endOfSentence = 0 != (flagsAtt.Flags & OpenNLPTokenizer.EOS_FLAG_BIT);
+                sentenceTokenAttrs.Add(m_input.CloneAttributes());
+            }
+            sentenceTerms = termList.Count > 0 ? termList.ToArray() : null;
+            sentenceTermPOSTags = posTagList.Count > 0 ? posTagList.ToArray() : null;
+        }
+
+        private void AssignTokenTypes(string[] tags)
+        {
+            for (int i = 0; i < tags.Length; ++i)
+            {
+                sentenceTokenAttrs[i].GetAttribute<ITypeAttribute>().Type = tags[i];
+            }
+        }
+
+        public override void Reset()
+        {
+            base.Reset();
+            moreTokensAvailable = true;
+            Clear();
+        }
+
+        private void Clear()
+        {
+            sentenceTokenAttrs.Clear();
+            sentenceTerms = null;
+            sentenceTermPOSTags = null;
+            tokenNum = 0;
+        }
+    }
+}
diff --git a/src/Lucene.Net.Analysis.OpenNLP/OpenNLPChunkerFilterFactory.cs b/src/Lucene.Net.Analysis.OpenNLP/OpenNLPChunkerFilterFactory.cs
new file mode 100644
index 0000000..5669603
--- /dev/null
+++ b/src/Lucene.Net.Analysis.OpenNLP/OpenNLPChunkerFilterFactory.cs
@@ -0,0 +1,90 @@
+using Lucene.Net.Analysis.OpenNlp.Tools;
+using Lucene.Net.Analysis.Util;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+
+namespace Lucene.Net.Analysis.OpenNlp
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Factory for <see cref="OpenNLPChunkerFilter"/>.
+    /// <code>
+    /// &lt;fieldType name="text_opennlp_chunked" class="solr.TextField" positionIncrementGap="100"&gt;
+    ///   &lt;analyzer&gt;
+    ///     &lt;tokenizer class="solr.OpenNLPTokenizerFactory" sentenceModel="filename" tokenizerModel="filename"/&gt;
+    ///     &lt;filter class="solr.OpenNLPPOSFilterFactory" posTaggerModel="filename"/&gt;
+    ///     &lt;filter class="solr.OpenNLPChunkerFilterFactory" chunkerModel="filename"/&gt;
+    ///   &lt;/analyzer&gt;
+    /// &lt;/fieldType&gt;
+    /// </code>
+    /// </summary>
+    /// <since>7.3.0</since>
+    public class OpenNLPChunkerFilterFactory : TokenFilterFactory, IResourceLoaderAware
+    {
+        public static readonly string CHUNKER_MODEL = "chunkerModel";
+
+        private readonly string chunkerModelFile;
+
+        public OpenNLPChunkerFilterFactory(IDictionary<string, string> args)
+                  : base(args)
+        {
+            chunkerModelFile = Get(args, CHUNKER_MODEL);
+            if (args.Any())
+            {
+                throw new ArgumentException("Unknown parameters: " + args);
+            }
+        }
+
+        public override TokenStream Create(TokenStream input)
+        {
+            try
+            {
+                NLPChunkerOp chunkerOp = null;
+
+                if (chunkerModelFile != null)
+                {
+                    chunkerOp = OpenNLPOpsFactory.GetChunker(chunkerModelFile);
+                }
+                return new OpenNLPChunkerFilter(input, chunkerOp);
+            }
+            catch (IOException e)
+            {
+                throw new ArgumentException(e.ToString(), e);
+            }
+        }
+
+        public virtual void Inform(IResourceLoader loader)
+        {
+            try
+            {
+                // load and register read-only models in cache with file/resource names
+                if (chunkerModelFile != null)
+                {
+                    OpenNLPOpsFactory.GetChunkerModel(chunkerModelFile, loader);
+                }
+            }
+            catch (IOException e)
+            {
+                throw new ArgumentException(e.ToString(), e);
+            }
+        }
+    }
+}
diff --git a/src/Lucene.Net.Analysis.OpenNLP/OpenNLPLemmatizerFilter.cs b/src/Lucene.Net.Analysis.OpenNLP/OpenNLPLemmatizerFilter.cs
new file mode 100644
index 0000000..977d98a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.OpenNLP/OpenNLPLemmatizerFilter.cs
@@ -0,0 +1,129 @@
+using Lucene.Net.Analysis.OpenNlp.Tools;
+using Lucene.Net.Analysis.TokenAttributes;
+using Lucene.Net.Util;
+using System.Collections.Generic;
+using System.Linq;
+
+namespace Lucene.Net.Analysis.OpenNlp
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Runs OpenNLP dictionary-based and/or MaxEnt lemmatizers.
+    /// <para/>
+    /// Both a dictionary-based lemmatizer and a MaxEnt lemmatizer are supported,
+    /// via the "dictionary" and "lemmatizerModel" params, respectively.
+    /// If both are configured, the dictionary-based lemmatizer is tried first,
+    /// and then the MaxEnt lemmatizer is consulted for out-of-vocabulary tokens.
+    /// <para/>
+    /// The dictionary file must be encoded as UTF-8, with one entry per line,
+    /// in the form <c>word[tab]lemma[tab]part-of-speech</c>
+    /// </summary>
+    public class OpenNLPLemmatizerFilter : TokenFilter
+    {
+        private readonly NLPLemmatizerOp lemmatizerOp;
+        private readonly ICharTermAttribute termAtt;
+        private readonly ITypeAttribute typeAtt;
+        private readonly IKeywordAttribute keywordAtt;
+        private readonly IFlagsAttribute flagsAtt;
+        private IList<AttributeSource> sentenceTokenAttrs = new List<AttributeSource>();
+        private IEnumerator<AttributeSource> sentenceTokenAttrsIter = null;
+        private bool moreTokensAvailable = true;
+        private string[] sentenceTokens = null;     // non-keyword tokens
+        private string[] sentenceTokenTypes = null; // types for non-keyword tokens
+        private string[] lemmas = null;             // lemmas for non-keyword tokens
+        private int lemmaNum = 0;                   // lemma counter
+
+        public OpenNLPLemmatizerFilter(TokenStream input, NLPLemmatizerOp lemmatizerOp)
+            : base(input)
+        {
+            this.lemmatizerOp = lemmatizerOp;
+            this.termAtt = AddAttribute<ICharTermAttribute>();
+            this.typeAtt = AddAttribute<ITypeAttribute>();
+            this.keywordAtt = AddAttribute<IKeywordAttribute>();
+            this.flagsAtt = AddAttribute<IFlagsAttribute>();
+        }
+
+        public override sealed bool IncrementToken()
+        {
+            if (!moreTokensAvailable)
+            {
+                Clear();
+                return false;
+            }
+            if (sentenceTokenAttrsIter == null || !sentenceTokenAttrsIter.MoveNext())
+            {
+                NextSentence();
+                if (sentenceTokens == null)
+                { // zero non-keyword tokens
+                    Clear();
+                    return false;
+                }
+                lemmas = lemmatizerOp.Lemmatize(sentenceTokens, sentenceTokenTypes);
+                lemmaNum = 0;
+                sentenceTokenAttrsIter = sentenceTokenAttrs.GetEnumerator();
+                sentenceTokenAttrsIter.MoveNext();
+            }
+            ClearAttributes();
+            sentenceTokenAttrsIter.Current.CopyTo(this);
+            if (!keywordAtt.IsKeyword)
+            {
+                termAtt.SetEmpty().Append(lemmas[lemmaNum++]);
+            }
+            return true;
+
+        }
+
+        private void NextSentence()
+        {
+            IList<string> tokenList = new List<string>();
+            IList<string> typeList = new List<string>();
+            sentenceTokenAttrs.Clear();
+            bool endOfSentence = false;
+            while (!endOfSentence && (moreTokensAvailable = m_input.IncrementToken()))
+            {
+                if (!keywordAtt.IsKeyword)
+                {
+                    tokenList.Add(termAtt.ToString());
+                    typeList.Add(typeAtt.Type);
+                }
+                endOfSentence = 0 != (flagsAtt.Flags & OpenNLPTokenizer.EOS_FLAG_BIT);
+                sentenceTokenAttrs.Add(m_input.CloneAttributes());
+            }
+            sentenceTokens = tokenList.Count > 0 ? tokenList.ToArray() : null;
+            sentenceTokenTypes = typeList.Count > 0 ? typeList.ToArray() : null;
+        }
+
+        public override void Reset()
+        {
+            base.Reset();
+            moreTokensAvailable = true;
+            Clear();
+        }
+
+        private void Clear()
+        {
+            sentenceTokenAttrs.Clear();
+            sentenceTokenAttrsIter = null;
+            sentenceTokens = null;
+            sentenceTokenTypes = null;
+            lemmas = null;
+            lemmaNum = 0;
+        }
+    }
+}
diff --git a/src/Lucene.Net.Analysis.OpenNLP/OpenNLPLemmatizerFilterFactory.cs b/src/Lucene.Net.Analysis.OpenNLP/OpenNLPLemmatizerFilterFactory.cs
new file mode 100644
index 0000000..e8fa98c
--- /dev/null
+++ b/src/Lucene.Net.Analysis.OpenNLP/OpenNLPLemmatizerFilterFactory.cs
@@ -0,0 +1,95 @@
+using Lucene.Net.Analysis.OpenNlp.Tools;
+using Lucene.Net.Analysis.Util;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+
+namespace Lucene.Net.Analysis.OpenNlp
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Factory for <see cref="OpenNLPLemmatizerFilter"/>.
+    /// <code>
+    /// &lt;fieldType name="text_opennlp_lemma" class="solr.TextField" positionIncrementGap="100"
+    ///   &lt;analyzer&gt;
+    ///     &lt;tokenizer class="solr.OpenNLPTokenizerFactory"
+    ///                sentenceModel="filename"
+    ///                tokenizerModel="filename"/&gt;
+    ///     /&gt;
+    ///     &lt;filter class="solr.OpenNLPLemmatizerFilterFactory"
+    ///             dictionary="filename"
+    ///             lemmatizerModel="filename"/&gt;
+    ///   &lt;/analyzer&gt;
+    /// &lt;/fieldType&gt;
+    /// </code>
+    /// </summary>
+    public class OpenNLPLemmatizerFilterFactory : TokenFilterFactory, IResourceLoaderAware
+    {
+        public static readonly string DICTIONARY = "dictionary";
+        public static readonly string LEMMATIZER_MODEL = "lemmatizerModel";
+
+        private readonly string dictionaryFile;
+        private readonly string lemmatizerModelFile;
+
+        public OpenNLPLemmatizerFilterFactory(IDictionary<string, string> args)
+                  : base(args)
+        {
+            dictionaryFile = Get(args, DICTIONARY);
+            lemmatizerModelFile = Get(args, LEMMATIZER_MODEL);
+
+            if (dictionaryFile == null && lemmatizerModelFile == null)
+            {
+                throw new ArgumentException("Configuration Error: missing parameter: at least one of '"
+                    + DICTIONARY + "' and '" + LEMMATIZER_MODEL + "' must be provided.");
+            }
+
+            if (args.Any())
+            {
+                throw new ArgumentException("Unknown parameters: " + args);
+            }
+        }
+
+        public override TokenStream Create(TokenStream input)
+        {
+            try
+            {
+                NLPLemmatizerOp lemmatizerOp = OpenNLPOpsFactory.GetLemmatizer(dictionaryFile, lemmatizerModelFile);
+                return new OpenNLPLemmatizerFilter(input, lemmatizerOp);
+            }
+            catch (IOException e)
+            {
+                throw new Exception(e.ToString(), e);
+            }
+        }
+
+        public virtual void Inform(IResourceLoader loader)
+        {
+            // register models in cache with file/resource names
+            if (dictionaryFile != null)
+            {
+                OpenNLPOpsFactory.GetLemmatizerDictionary(dictionaryFile, loader);
+            }
+            if (lemmatizerModelFile != null)
+            {
+                OpenNLPOpsFactory.GetLemmatizerModel(lemmatizerModelFile, loader);
+            }
+        }
+    }
+}
diff --git a/src/Lucene.Net.Analysis.OpenNLP/OpenNLPPOSFilter.cs b/src/Lucene.Net.Analysis.OpenNLP/OpenNLPPOSFilter.cs
new file mode 100644
index 0000000..449f5ef
--- /dev/null
+++ b/src/Lucene.Net.Analysis.OpenNLP/OpenNLPPOSFilter.cs
@@ -0,0 +1,102 @@
+using Lucene.Net.Analysis.OpenNlp.Tools;
+using Lucene.Net.Analysis.TokenAttributes;
+using Lucene.Net.Util;
+using System.Collections.Generic;
+using System.Linq;
+
+namespace Lucene.Net.Analysis.OpenNlp
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Run OpenNLP POS tagger.  Tags all terms in the <see cref="ITypeAttribute"/>.
+    /// </summary>
+    public sealed class OpenNLPPOSFilter : TokenFilter
+    {
+        private IList<AttributeSource> sentenceTokenAttrs = new List<AttributeSource>();
+        string[] tags = null;
+        private int tokenNum = 0;
+        private bool moreTokensAvailable = true;
+
+        private readonly NLPPOSTaggerOp posTaggerOp;
+        private readonly ITypeAttribute typeAtt;
+        private readonly IFlagsAttribute flagsAtt;
+        private readonly ICharTermAttribute termAtt;
+
+        public OpenNLPPOSFilter(TokenStream input, NLPPOSTaggerOp posTaggerOp)
+                  : base(input)
+        {
+            this.posTaggerOp = posTaggerOp;
+            this.typeAtt = AddAttribute<ITypeAttribute>();
+            this.flagsAtt = AddAttribute<IFlagsAttribute>();
+            this.termAtt = AddAttribute<ICharTermAttribute>();
+        }
+
+        public override sealed bool IncrementToken()
+        {
+            if (!moreTokensAvailable)
+            {
+                Clear();
+                return false;
+            }
+            if (tokenNum == sentenceTokenAttrs.Count)
+            { // beginning of stream, or previous sentence exhausted
+                string[] sentenceTokens = NextSentence();
+                if (sentenceTokens == null)
+                {
+                    Clear();
+                    return false;
+                }
+                tags = posTaggerOp.GetPOSTags(sentenceTokens);
+                tokenNum = 0;
+            }
+            ClearAttributes();
+            sentenceTokenAttrs[tokenNum].CopyTo(this);
+            typeAtt.Type = tags[tokenNum++];
+            return true;
+        }
+
+        private string[] NextSentence()
+        {
+            IList<string> termList = new List<string>();
+            sentenceTokenAttrs.Clear();
+            bool endOfSentence = false;
+            while (!endOfSentence && (moreTokensAvailable = m_input.IncrementToken()))
+            {
+                termList.Add(termAtt.ToString());
+                endOfSentence = 0 != (flagsAtt.Flags & OpenNLPTokenizer.EOS_FLAG_BIT);
+                sentenceTokenAttrs.Add(m_input.CloneAttributes());
+            }
+            return termList.Count > 0 ? termList.ToArray() : null;
+        }
+
+        public override void Reset()
+        {
+            base.Reset();
+            moreTokensAvailable = true;
+            Clear();
+        }
+
+        private void Clear()
+        {
+            sentenceTokenAttrs.Clear();
+            tags = null;
+            tokenNum = 0;
+        }
+    }
+}
diff --git a/src/Lucene.Net.Analysis.OpenNLP/OpenNLPPOSFilterFactory.cs b/src/Lucene.Net.Analysis.OpenNLP/OpenNLPPOSFilterFactory.cs
new file mode 100644
index 0000000..57c3262
--- /dev/null
+++ b/src/Lucene.Net.Analysis.OpenNLP/OpenNLPPOSFilterFactory.cs
@@ -0,0 +1,79 @@
+using Lucene.Net.Analysis.OpenNlp.Tools;
+using Lucene.Net.Analysis.Util;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+
+namespace Lucene.Net.Analysis.OpenNlp
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Factory for <see cref="OpenNLPPOSFilter"/>.
+    /// <code>
+    /// &lt;fieldType name="text_opennlp_pos" class="solr.TextField" positionIncrementGap="100"&gt;
+    ///   &lt;analyzer&gt;
+    ///     &lt;tokenizer class="solr.OpenNLPTokenizerFactory" sentenceModel="filename" tokenizerModel="filename"/&gt;
+    ///     &lt;filter class="solr.OpenNLPPOSFilterFactory" posTaggerModel="filename"/&gt;
+    ///   &lt;/analyzer&gt;
+    /// &lt;/fieldType&gt;
+    /// </code>
+    /// </summary>
+    /// <since>7.3.0</since>
+    public class OpenNLPPOSFilterFactory : TokenFilterFactory, IResourceLoaderAware
+    {
+        public const string POS_TAGGER_MODEL = "posTaggerModel";
+
+        private readonly string posTaggerModelFile;
+
+        public OpenNLPPOSFilterFactory(IDictionary<string, string> args)
+                  : base(args)
+        {
+            posTaggerModelFile = Require(args, POS_TAGGER_MODEL);
+            if (args.Any())
+            {
+                throw new ArgumentException("Unknown parameters: " + args);
+            }
+        }
+
+        public override TokenStream Create(TokenStream input)
+        {
+            try
+            {
+                return new OpenNLPPOSFilter(input, OpenNLPOpsFactory.GetPOSTagger(posTaggerModelFile));
+            }
+            catch (IOException e)
+            {
+                throw new ArgumentException(e.ToString(), e);
+            }
+        }
+
+        public virtual void Inform(IResourceLoader loader)
+        {
+            try
+            { // load and register the read-only model in cache with file/resource name
+                OpenNLPOpsFactory.GetPOSTaggerModel(posTaggerModelFile, loader);
+            }
+            catch (IOException e)
+            {
+                throw new ArgumentException(e.ToString(), e);
+            }
+        }
+    }
+}
diff --git a/src/Lucene.Net.Analysis.OpenNLP/OpenNLPSentenceBreakIterator.cs b/src/Lucene.Net.Analysis.OpenNLP/OpenNLPSentenceBreakIterator.cs
new file mode 100644
index 0000000..426f7b6
--- /dev/null
+++ b/src/Lucene.Net.Analysis.OpenNLP/OpenNLPSentenceBreakIterator.cs
@@ -0,0 +1,277 @@
+using ICU4N.Support.Text;
+using ICU4N.Text;
+using Lucene.Net.Analysis.OpenNlp.Tools;
+using Lucene.Net.Analysis.Util;
+using opennlp.tools.util;
+using System;
+using System.Diagnostics;
+using System.Text;
+
+namespace Lucene.Net.Analysis.OpenNlp
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// A <see cref="BreakIterator"/> that splits sentences using an OpenNLP sentence chunking model.
+    /// </summary>
+    public sealed class OpenNLPSentenceBreakIterator : BreakIterator
+    {
+        private CharacterIterator text;
+        private int currentSentence;
+        private int[] sentenceStarts;
+        private NLPSentenceDetectorOp sentenceOp;
+
+        public OpenNLPSentenceBreakIterator(NLPSentenceDetectorOp sentenceOp)
+        {
+            this.sentenceOp = sentenceOp;
+        }
+
+        public override int Current => text.Index;
+
+        public override int First()
+        {
+            currentSentence = 0;
+            text.SetIndex(text.BeginIndex);
+            return Current;
+        }
+
+        public override int Last()
+        {
+            if (sentenceStarts.Length > 0)
+            {
+                currentSentence = sentenceStarts.Length - 1;
+                text.SetIndex(text.EndIndex);
+            }
+            else
+            { // there are no sentences; both the first and last positions are the begin index
+                currentSentence = 0;
+                text.SetIndex(text.BeginIndex);
+            }
+            return Current;
+        }
+
+        public override int Next()
+        {
+            if (text.Index == text.EndIndex || 0 == sentenceStarts.Length)
+            {
+                return Done;
+            }
+            else if (currentSentence < sentenceStarts.Length - 1)
+            {
+                text.SetIndex(sentenceStarts[++currentSentence]);
+                return Current;
+            }
+            else
+            {
+                return Last();
+            }
+        }
+
+        public override int Following(int pos)
+        {
+            if (pos < text.BeginIndex || pos > text.EndIndex)
+            {
+                throw new ArgumentException("offset out of bounds");
+            }
+            else if (0 == sentenceStarts.Length)
+            {
+                text.SetIndex(text.BeginIndex);
+                return Done;
+            }
+            else if (pos >= sentenceStarts[sentenceStarts.Length - 1])
+            {
+                // this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something)
+                // https://bugs.openjdk.java.net/browse/JDK-8015110
+                text.SetIndex(text.EndIndex);
+                currentSentence = sentenceStarts.Length - 1;
+                return Done;
+            }
+            else
+            { // there are at least two sentences
+                currentSentence = (sentenceStarts.Length - 1) / 2; // start search from the middle
+                MoveToSentenceAt(pos, 0, sentenceStarts.Length - 2);
+                text.SetIndex(sentenceStarts[++currentSentence]);
+                return Current;
+            }
+        }
+
+        /// <summary>Binary search over sentences</summary>
+        private void MoveToSentenceAt(int pos, int minSentence, int maxSentence)
+        {
+            if (minSentence != maxSentence)
+            {
+                if (pos < sentenceStarts[currentSentence])
+                {
+                    int newMaxSentence = currentSentence - 1;
+                    currentSentence = minSentence + (currentSentence - minSentence) / 2;
+                    MoveToSentenceAt(pos, minSentence, newMaxSentence);
+                }
+                else if (pos >= sentenceStarts[currentSentence + 1])
+                {
+                    int newMinSentence = currentSentence + 1;
+                    currentSentence = maxSentence - (maxSentence - currentSentence) / 2;
+                    MoveToSentenceAt(pos, newMinSentence, maxSentence);
+                }
+            }
+            else
+            {
+                Debug.Assert(currentSentence == minSentence);
+                Debug.Assert(pos >= sentenceStarts[currentSentence]);
+                Debug.Assert((currentSentence == sentenceStarts.Length - 1 && pos <= text.EndIndex)
+                    || pos < sentenceStarts[currentSentence + 1]);
+            }
+            // we have arrived - nothing to do
+        }
+
+        public override int Previous()
+        {
+            if (text.Index == text.BeginIndex)
+            {
+                return Done;
+            }
+            else
+            {
+                if (0 == sentenceStarts.Length)
+                {
+                    text.SetIndex(text.BeginIndex);
+                    return Done;
+                }
+                if (text.Index == text.EndIndex)
+                {
+                    text.SetIndex(sentenceStarts[currentSentence]);
+                }
+                else
+                {
+                    text.SetIndex(sentenceStarts[--currentSentence]);
+                }
+                return Current;
+            }
+        }
+
+        public override int Preceding(int pos)
+        {
+            if (pos < text.BeginIndex || pos > text.EndIndex)
+            {
+                throw new ArgumentException("offset out of bounds");
+            }
+            else if (0 == sentenceStarts.Length)
+            {
+                text.SetIndex(text.BeginIndex);
+                currentSentence = 0;
+                return Done;
+            }
+            else if (pos < sentenceStarts[0])
+            {
+                // this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something)
+                // https://bugs.openjdk.java.net/browse/JDK-8015110
+                text.SetIndex(text.BeginIndex);
+                currentSentence = 0;
+                return Done;
+            }
+            else
+            {
+                currentSentence = sentenceStarts.Length / 2; // start search from the middle
+                MoveToSentenceAt(pos, 0, sentenceStarts.Length - 1);
+                if (0 == currentSentence)
+                {
+                    text.SetIndex(text.BeginIndex);
+                    return Done;
+                }
+                else
+                {
+                    text.SetIndex(sentenceStarts[--currentSentence]);
+                    return Current;
+                }
+            }
+        }
+
+        public override int Next(int n)
+        {
+            currentSentence += n;
+            if (n < 0)
+            {
+                if (text.Index == text.EndIndex)
+                {
+                    ++currentSentence;
+                }
+                if (currentSentence < 0)
+                {
+                    currentSentence = 0;
+                    text.SetIndex(text.BeginIndex);
+                    return Done;
+                }
+                else
+                {
+                    text.SetIndex(sentenceStarts[currentSentence]);
+                }
+            }
+            else if (n > 0)
+            {
+                if (currentSentence >= sentenceStarts.Length)
+                {
+                    currentSentence = sentenceStarts.Length - 1;
+                    text.SetIndex(text.EndIndex);
+                    return Done;
+                }
+                else
+                {
+                    text.SetIndex(sentenceStarts[currentSentence]);
+                }
+            }
+            return Current;
+        }
+
+        public override CharacterIterator Text => text;
+
+        public override void SetText(CharacterIterator newText)
+        {
+            text = newText;
+            text.SetIndex(text.BeginIndex);
+            currentSentence = 0;
+            Span[] spans = sentenceOp.SplitSentences(CharacterIteratorToString());
+            sentenceStarts = new int[spans.Length];
+            for (int i = 0; i < spans.Length; ++i)
+            {
+                // Adjust start positions to match those of the passed-in CharacterIterator
+                sentenceStarts[i] = spans[i].getStart() + text.BeginIndex;
+            }
+        }
+
+        private string CharacterIteratorToString()
+        {
+            string fullText;
+            if (text is CharArrayIterator)
+            {
+                CharArrayIterator charArrayIterator = (CharArrayIterator)text;
+                fullText = new string(charArrayIterator.Text, charArrayIterator.Start, charArrayIterator.Length);
+            }
+            else
+            {
+                // TODO: is there a better way to extract full text from arbitrary CharacterIterators?
+                StringBuilder builder = new StringBuilder();
+                for (char ch = text.First(); ch != CharacterIterator.Done; ch = text.Next())
+                {
+                    builder.Append(ch);
+                }
+                fullText = builder.ToString();
+                text.SetIndex(text.BeginIndex);
+            }
+            return fullText;
+        }
+    }
+}
diff --git a/src/Lucene.Net.Analysis.OpenNLP/OpenNLPTokenizer.cs b/src/Lucene.Net.Analysis.OpenNLP/OpenNLPTokenizer.cs
new file mode 100644
index 0000000..0a45b1e
--- /dev/null
+++ b/src/Lucene.Net.Analysis.OpenNLP/OpenNLPTokenizer.cs
@@ -0,0 +1,112 @@
+using Lucene.Net.Analysis.OpenNlp.Tools;
+using Lucene.Net.Analysis.TokenAttributes;
+using Lucene.Net.Analysis.Util;
+using opennlp.tools.util;
+using System;
+using System.IO;
+
+namespace Lucene.Net.Analysis.OpenNlp
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Run OpenNLP SentenceDetector and <see cref="Tokenizer"/>.
+    /// The last token in each sentence is marked by setting the <see cref="EOS_FLAG_BIT"/> in the <see cref="IFlagsAttribute"/>;
+    /// following filters can use this information to apply operations to tokens one sentence at a time.
+    /// </summary>
+    public sealed class OpenNLPTokenizer : SegmentingTokenizerBase
+    {
+        public static int EOS_FLAG_BIT = 1;
+
+        private readonly ICharTermAttribute termAtt;
+        private readonly IFlagsAttribute flagsAtt;
+        private readonly IOffsetAttribute offsetAtt;
+
+        private Span[] termSpans = null;
+        private int termNum = 0;
+        private int sentenceStart = 0;
+
+        private NLPSentenceDetectorOp sentenceOp = null;
+        private NLPTokenizerOp tokenizerOp = null;
+
+        /// <summary>
+        /// Creates a new <see cref="OpenNLPTokenizer"/> </summary>
+        public OpenNLPTokenizer(TextReader reader, NLPSentenceDetectorOp sentenceOp, NLPTokenizerOp tokenizerOp) // LUCENENET 4.8.0 specific overload to default AttributeFactory
+            : this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, reader, sentenceOp, tokenizerOp)
+        {
+        }
+
+        public OpenNLPTokenizer(AttributeFactory factory, TextReader reader, NLPSentenceDetectorOp sentenceOp, NLPTokenizerOp tokenizerOp) // LUCENENET: Added reader param for compatibility with 4.8 - remove when upgrading
+            : base(factory, reader, new OpenNLPSentenceBreakIterator(sentenceOp))
+        {
+            if (sentenceOp == null || tokenizerOp == null)
+            {
+                throw new ArgumentException("OpenNLPTokenizer: both a Sentence Detector and a Tokenizer are required");
+            }
+            this.sentenceOp = sentenceOp;
+            this.tokenizerOp = tokenizerOp;
+            this.termAtt = AddAttribute<ICharTermAttribute>();
+            this.flagsAtt = AddAttribute<IFlagsAttribute>();
+            this.offsetAtt = AddAttribute<IOffsetAttribute>();
+        }
+
+        protected override void Dispose(bool disposing)
+        {
+            base.Dispose(disposing);
+            if (disposing)
+            {
+                termSpans = null;
+                termNum = sentenceStart = 0;
+            }
+        }
+
+        protected override void SetNextSentence(int sentenceStart, int sentenceEnd)
+        {
+            this.sentenceStart = sentenceStart;
+            string sentenceText = new string(m_buffer, sentenceStart, sentenceEnd - sentenceStart);
+            termSpans = tokenizerOp.GetTerms(sentenceText);
+            termNum = 0;
+        }
+
+        protected override bool IncrementWord()
+        {
+            if (termSpans == null || termNum == termSpans.Length)
+            {
+                return false;
+            }
+            ClearAttributes();
+            Span term = termSpans[termNum];
+            termAtt.CopyBuffer(m_buffer, sentenceStart + term.getStart(), term.length());
+            offsetAtt.SetOffset(CorrectOffset(m_offset + sentenceStart + term.getStart()),
+                                CorrectOffset(m_offset + sentenceStart + term.getEnd()));
+            if (termNum == termSpans.Length - 1)
+            {
+                flagsAtt.Flags = flagsAtt.Flags | EOS_FLAG_BIT; // mark the last token in the sentence with EOS_FLAG_BIT
+            }
+            ++termNum;
+            return true;
+        }
+
+        public override void Reset()
+        {
+            base.Reset();
+            termSpans = null;
+            termNum = sentenceStart = 0;
+        }
+    }
+}
diff --git a/src/Lucene.Net.Analysis.OpenNLP/OpenNLPTokenizerFactory.cs b/src/Lucene.Net.Analysis.OpenNLP/OpenNLPTokenizerFactory.cs
new file mode 100644
index 0000000..d015d10
--- /dev/null
+++ b/src/Lucene.Net.Analysis.OpenNLP/OpenNLPTokenizerFactory.cs
@@ -0,0 +1,85 @@
+using Lucene.Net.Analysis.OpenNlp.Tools;
+using Lucene.Net.Analysis.Util;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using AttributeFactory = Lucene.Net.Util.AttributeSource.AttributeFactory;
+
+namespace Lucene.Net.Analysis.OpenNlp
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Factory for <see cref="OpenNLPTokenizer"/>.
+    /// <code>
+    /// &lt;fieldType name="text_opennlp" class="solr.TextField" positionIncrementGap="100"
+    ///   &lt;analyzer&gt;
+    ///     &lt;tokenizer class="solr.OpenNLPTokenizerFactory" sentenceModel="filename" tokenizerModel="filename"/&gt;
+    ///   &lt;/analyzer&gt;
+    /// &lt;/fieldType&gt;
+    /// </code>
+    /// </summary>
+    /// <since>7.3.0</since>
+    public class OpenNLPTokenizerFactory : TokenizerFactory, IResourceLoaderAware
+    {
+        public const string SENTENCE_MODEL = "sentenceModel";
+        public const string TOKENIZER_MODEL = "tokenizerModel";
+
+        private readonly string sentenceModelFile;
+        private readonly string tokenizerModelFile;
+
+        public OpenNLPTokenizerFactory(IDictionary<string, string> args)
+            : base(args)
+        {
+            sentenceModelFile = Require(args, SENTENCE_MODEL);
+            tokenizerModelFile = Require(args, TOKENIZER_MODEL);
+            if (args.Any())
+            {
+                throw new ArgumentException("Unknown parameters: " + args);
+            }
+        }
+
+        public override Tokenizer Create(AttributeFactory factory, TextReader reader)
+        {
+            try
+            {
+                NLPSentenceDetectorOp sentenceOp = OpenNLPOpsFactory.GetSentenceDetector(sentenceModelFile);
+                NLPTokenizerOp tokenizerOp = OpenNLPOpsFactory.GetTokenizer(tokenizerModelFile);
+                return new OpenNLPTokenizer(factory, reader, sentenceOp, tokenizerOp);
+            }
+            catch (IOException e)
+            {
+                throw new Exception(e.ToString(), e);
+            }
+        }
+
+        public virtual void Inform(IResourceLoader loader)
+        {
+            // register models in cache with file/resource names
+            if (sentenceModelFile != null)
+            {
+                OpenNLPOpsFactory.GetSentenceModel(sentenceModelFile, loader);
+            }
+            if (tokenizerModelFile != null)
+            {
+                OpenNLPOpsFactory.GetTokenizerModel(tokenizerModelFile, loader);
+            }
+        }
+    }
+}
diff --git a/src/Lucene.Net.Analysis.OpenNLP/Tools/NLPChunkerOp.cs b/src/Lucene.Net.Analysis.OpenNLP/Tools/NLPChunkerOp.cs
new file mode 100644
index 0000000..1c3ecc8
--- /dev/null
+++ b/src/Lucene.Net.Analysis.OpenNLP/Tools/NLPChunkerOp.cs
@@ -0,0 +1,47 @@
+using opennlp.tools.chunker;
+
+
+namespace Lucene.Net.Analysis.OpenNlp.Tools
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Supply OpenNLP Chunking tool
+    /// Requires binary models from OpenNLP project on SourceForge.
+    /// </summary>
+    public class NLPChunkerOp
+    {
+        private ChunkerME chunker = null;
+
+        public NLPChunkerOp(ChunkerModel chunkerModel) 
+        {
+            chunker = new ChunkerME(chunkerModel);
+        }
+
+        public virtual string[] GetChunks(string[] words, string[] tags, double[] probs)
+        {
+            lock (this)
+            {
+                string[] chunks = chunker.chunk(words, tags);
+                if (probs != null)
+                    chunker.probs(probs);
+                return chunks;
+            }
+        }
+    }
+}
diff --git a/src/Lucene.Net.Analysis.OpenNLP/Tools/NLPLemmatizerOp.cs b/src/Lucene.Net.Analysis.OpenNLP/Tools/NLPLemmatizerOp.cs
new file mode 100644
index 0000000..56b1d2b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.OpenNLP/Tools/NLPLemmatizerOp.cs
@@ -0,0 +1,93 @@
+using opennlp.tools.lemmatizer;
+using System.Diagnostics;
+using System.IO;
+
+namespace Lucene.Net.Analysis.OpenNlp.Tools
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Supply OpenNLP Lemmatizer tools.
+    /// <para/>
+    /// Both a dictionary-based lemmatizer and a MaxEnt lemmatizer are supported.
+    /// If both are configured, the dictionary-based lemmatizer is tried first,
+    /// and then the MaxEnt lemmatizer is consulted for out-of-vocabulary tokens.
+    /// <para/>
+    /// The MaxEnt implementation requires binary models from OpenNLP project on SourceForge.
+    /// </summary>
+    public class NLPLemmatizerOp
+    {
+        private readonly DictionaryLemmatizer dictionaryLemmatizer;
+        private readonly LemmatizerME lemmatizerME;
+
+        public NLPLemmatizerOp(Stream dictionary, LemmatizerModel lemmatizerModel)
+        {
+            Debug.Assert(dictionary != null || lemmatizerModel != null, "At least one parameter must be non-null");
+            dictionaryLemmatizer = dictionary == null ? null : new DictionaryLemmatizer(new ikvm.io.InputStreamWrapper(dictionary));
+            lemmatizerME = lemmatizerModel == null ? null : new LemmatizerME(lemmatizerModel);
+        }
+
+        public virtual string[] Lemmatize(string[] words, string[] postags)
+        {
+            string[] lemmas = null;
+            string[] maxEntLemmas = null;
+            if (dictionaryLemmatizer != null)
+            {
+                lemmas = dictionaryLemmatizer.lemmatize(words, postags);
+                for (int i = 0; i < lemmas.Length; ++i)
+                {
+                    if (lemmas[i].Equals("O"))
+                    {   // this word is not in the dictionary
+                        if (lemmatizerME != null)
+                        {  // fall back to the MaxEnt lemmatizer if it's enabled
+                            if (maxEntLemmas == null)
+                            {
+                                maxEntLemmas = lemmatizerME.lemmatize(words, postags);
+                            }
+                            if ("_".Equals(maxEntLemmas[i]))
+                            {
+                                lemmas[i] = words[i];    // put back the original word if no lemma is found
+                            }
+                            else
+                            {
+                                lemmas[i] = maxEntLemmas[i];
+                            }
+                        }
+                        else
+                        {                     // there is no MaxEnt lemmatizer
+                            lemmas[i] = words[i];      // put back the original word if no lemma is found
+                        }
+                    }
+                }
+            }
+            else
+            {                           // there is only a MaxEnt lemmatizer
+                maxEntLemmas = lemmatizerME.lemmatize(words, postags);
+                for (int i = 0; i < maxEntLemmas.Length; ++i)
+                {
+                    if ("_".Equals(maxEntLemmas[i]))
+                    {
+                        maxEntLemmas[i] = words[i];  // put back the original word if no lemma is found
+                    }
+                }
+                lemmas = maxEntLemmas;
+            }
+            return lemmas;
+        }
+    }
+}
diff --git a/src/Lucene.Net.Analysis.OpenNLP/Tools/NLPNERTaggerOp.cs b/src/Lucene.Net.Analysis.OpenNLP/Tools/NLPNERTaggerOp.cs
new file mode 100644
index 0000000..e4b43ac
--- /dev/null
+++ b/src/Lucene.Net.Analysis.OpenNLP/Tools/NLPNERTaggerOp.cs
@@ -0,0 +1,61 @@
+using opennlp.tools.namefind;
+using opennlp.tools.util;
+
+namespace Lucene.Net.Analysis.OpenNlp.Tools
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Supply OpenNLP Named Entity Resolution tool
+    /// Requires binary models from OpenNLP project on SourceForge.
+    /// <para/>
+    /// Usage: from <a href="http://opennlp.apache.org/docs/1.8.3/manual/opennlp.html#tools.namefind.recognition.api">
+    /// the OpenNLP documentation</a>:
+    /// <para/>
+    /// "The NameFinderME class is not thread safe, it must only be called from one thread.
+    /// To use multiple threads multiple NameFinderME instances sharing the same model instance
+    /// can be created. The input text should be segmented into documents, sentences and tokens.
+    /// To perform entity detection an application calls the find method for every sentence in
+    /// the document. After every document clearAdaptiveData must be called to clear the adaptive
+    /// data in the feature generators. Not calling clearAdaptiveData can lead to a sharp drop
+    /// in the detection rate after a few documents."
+    /// </summary>
+    public class NLPNERTaggerOp
+    {
+        private readonly TokenNameFinder nameFinder;
+
+        public NLPNERTaggerOp(TokenNameFinderModel model)
+        {
+            this.nameFinder = new NameFinderME(model);
+        }
+
+        public virtual Span[] GetNames(string[] words)
+        {
+            Span[] names = nameFinder.find(words);
+            return names;
+        }
+
+        public virtual void Reset()
+        {
+            lock (this)
+            {
+                nameFinder.clearAdaptiveData();
+            }
+        }
+    }
+}
diff --git a/src/Lucene.Net.Analysis.OpenNLP/Tools/NLPPOSTaggerOp.cs b/src/Lucene.Net.Analysis.OpenNLP/Tools/NLPPOSTaggerOp.cs
new file mode 100644
index 0000000..a69a7d0
--- /dev/null
+++ b/src/Lucene.Net.Analysis.OpenNLP/Tools/NLPPOSTaggerOp.cs
@@ -0,0 +1,43 @@
+using opennlp.tools.postag;
+
+namespace Lucene.Net.Analysis.OpenNlp.Tools
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Supply OpenNLP Parts-Of-Speech Tagging tool.
+    /// Requires binary models from OpenNLP project on SourceForge.
+    /// </summary>
+    public class NLPPOSTaggerOp
+    {
+        private POSTagger tagger = null;
+
+        public NLPPOSTaggerOp(POSModel model)
+        {
+            tagger = new POSTaggerME(model);
+        }
+
+        public virtual string[] GetPOSTags(string[] words)
+        {
+            lock (this)
+            {
+                return tagger.tag(words);
+            }
+        }
+    }
+}
diff --git a/src/Lucene.Net.Analysis.OpenNLP/Tools/NLPSentenceDetectorOp.cs b/src/Lucene.Net.Analysis.OpenNLP/Tools/NLPSentenceDetectorOp.cs
new file mode 100644
index 0000000..ebca1c5
--- /dev/null
+++ b/src/Lucene.Net.Analysis.OpenNLP/Tools/NLPSentenceDetectorOp.cs
@@ -0,0 +1,58 @@
+using opennlp.tools.sentdetect;
+using opennlp.tools.util;
+
+namespace Lucene.Net.Analysis.OpenNlp.Tools
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Supply OpenNLP Sentence Detector tool.
+    /// Requires binary models from OpenNLP project on SourceForge.
+    /// </summary>
+    public class NLPSentenceDetectorOp
+    {
+        private readonly SentenceDetectorME sentenceSplitter;
+
+        public NLPSentenceDetectorOp(SentenceModel model)
+        {
+            sentenceSplitter = new SentenceDetectorME(model);
+        }
+
+        public NLPSentenceDetectorOp()
+        {
+            sentenceSplitter = null;
+        }
+
+        public virtual Span[] SplitSentences(string line)
+        {
+            lock (this)
+            {
+                if (sentenceSplitter != null)
+                {
+                    return sentenceSplitter.sentPosDetect(line);
+                }
+                else
+                {
+                    Span[] shorty = new Span[1];
+                    shorty[0] = new Span(0, line.Length);
+                    return shorty;
+                }
+            }
+        }
+    }
+}
diff --git a/src/Lucene.Net.Analysis.OpenNLP/Tools/NLPTokenizerOp.cs b/src/Lucene.Net.Analysis.OpenNLP/Tools/NLPTokenizerOp.cs
new file mode 100644
index 0000000..1e96163
--- /dev/null
+++ b/src/Lucene.Net.Analysis.OpenNLP/Tools/NLPTokenizerOp.cs
@@ -0,0 +1,55 @@
+using opennlp.tools.tokenize;
+using opennlp.tools.util;
+
+namespace Lucene.Net.Analysis.OpenNlp.Tools
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Supply OpenNLP Sentence Tokenizer tool.
+    /// Requires binary models from OpenNLP project on SourceForge.
+    /// </summary>
+    public class NLPTokenizerOp
+    {
+        private readonly TokenizerME tokenizer;
+
+        public NLPTokenizerOp(TokenizerModel model)
+        {
+            tokenizer = new TokenizerME(model);
+        }
+
+        public NLPTokenizerOp()
+        {
+            tokenizer = null;
+        }
+
+        public virtual Span[] GetTerms(string sentence)
+        {
+            lock (this)
+            {
+                if (tokenizer == null)
+                {
+                    Span[] span1 = new Span[1];
+                    span1[0] = new Span(0, sentence.Length);
+                    return span1;
+                }
+                return tokenizer.tokenizePos(sentence);
+            }
+        }
+    }
+}
diff --git a/src/Lucene.Net.Analysis.OpenNLP/Tools/OpenNLPOpsFactory.cs b/src/Lucene.Net.Analysis.OpenNLP/Tools/OpenNLPOpsFactory.cs
new file mode 100644
index 0000000..c49e429
--- /dev/null
+++ b/src/Lucene.Net.Analysis.OpenNLP/Tools/OpenNLPOpsFactory.cs
@@ -0,0 +1,225 @@
+using Lucene.Net.Analysis.Util;
+using opennlp.tools.chunker;
+using opennlp.tools.lemmatizer;
+using opennlp.tools.namefind;
+using opennlp.tools.postag;
+using opennlp.tools.sentdetect;
+using opennlp.tools.tokenize;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using System.Text;
+
+namespace Lucene.Net.Analysis.OpenNlp.Tools
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Supply OpenNLP Named Entity Recognizer.
+    /// Cache model file objects. Assumes model files are thread-safe.
+    /// </summary>
+    public static class OpenNLPOpsFactory // LUCENENET: Made static because all members are static
+    {
+        private static readonly IDictionary<string, SentenceModel> sentenceModels = new ConcurrentDictionary<string, SentenceModel>();
+        private static readonly ConcurrentDictionary<string, TokenizerModel> tokenizerModels = new ConcurrentDictionary<string, TokenizerModel>();
+        private static readonly ConcurrentDictionary<string, POSModel> posTaggerModels = new ConcurrentDictionary<string, POSModel>();
+        private static readonly ConcurrentDictionary<string, ChunkerModel> chunkerModels = new ConcurrentDictionary<string, ChunkerModel>();
+        private static readonly IDictionary<string, TokenNameFinderModel> nerModels = new ConcurrentDictionary<string, TokenNameFinderModel>();
+        private static readonly IDictionary<string, LemmatizerModel> lemmatizerModels = new ConcurrentDictionary<string, LemmatizerModel>();
+        private static readonly IDictionary<string, string> lemmaDictionaries = new ConcurrentDictionary<string, string>();
+
+        public static NLPSentenceDetectorOp GetSentenceDetector(string modelName)
+        {
+            if (modelName != null)
+            {
+                sentenceModels.TryGetValue(modelName, out SentenceModel model);
+                return new NLPSentenceDetectorOp(model);
+            }
+            else
+            {
+                return new NLPSentenceDetectorOp();
+            }
+        }
+
+        public static SentenceModel GetSentenceModel(string modelName, IResourceLoader loader)
+        {
+            //SentenceModel model = sentenceModels.get(modelName);
+            sentenceModels.TryGetValue(modelName, out SentenceModel model);
+            if (model == null)
+            {
+                using (Stream resource = loader.OpenResource(modelName))
+                {
+                    model = new SentenceModel(new ikvm.io.InputStreamWrapper(resource));
+                }
+                sentenceModels[modelName] = model;
+            }
+            return model;
+        }
+
+        public static NLPTokenizerOp GetTokenizer(string modelName)
+        {
+            if (modelName == null)
+            {
+                return new NLPTokenizerOp();
+            }
+            else
+            {
+                TokenizerModel model = tokenizerModels[modelName];
+                return new NLPTokenizerOp(model);
+            }
+        }
+
+        public static TokenizerModel GetTokenizerModel(string modelName, IResourceLoader loader)
+        {
+            tokenizerModels.TryGetValue(modelName, out TokenizerModel model);
+            if (model == null)
+            {
+                using (Stream resource = loader.OpenResource(modelName))
+                {
+                    model = new TokenizerModel(new ikvm.io.InputStreamWrapper(resource));
+                }
+                tokenizerModels[modelName] = model;
+            }
+            return model;
+        }
+
+        public static NLPPOSTaggerOp GetPOSTagger(string modelName)
+        {
+            posTaggerModels.TryGetValue(modelName, out POSModel model);
+            return new NLPPOSTaggerOp(model);
+        }
+
+        public static POSModel GetPOSTaggerModel(string modelName, IResourceLoader loader)
+        {
+            posTaggerModels.TryGetValue(modelName, out POSModel model);
+            if (model == null)
+            {
+                using (Stream resource = loader.OpenResource(modelName))
+                {
+                    model = new POSModel(new ikvm.io.InputStreamWrapper(resource));
+                }
+                posTaggerModels[modelName] = model;
+            }
+            return model;
+        }
+
+        public static NLPChunkerOp GetChunker(string modelName)
+        {
+            chunkerModels.TryGetValue(modelName, out ChunkerModel model);
+            return new NLPChunkerOp(model);
+        }
+
+        public static ChunkerModel GetChunkerModel(string modelName, IResourceLoader loader)
+        {
+            chunkerModels.TryGetValue(modelName, out ChunkerModel model);
+            if (model == null)
+            {
+                using (Stream resource = loader.OpenResource(modelName))
+                {
+                    model = new ChunkerModel(new ikvm.io.InputStreamWrapper(resource));
+                }
+                chunkerModels[modelName] = model;
+            }
+            return model;
+        }
+
+        public static NLPNERTaggerOp GetNERTagger(string modelName)
+        {
+            nerModels.TryGetValue(modelName, out TokenNameFinderModel model);
+            return new NLPNERTaggerOp(model);
+        }
+
+        public static TokenNameFinderModel GetNERTaggerModel(string modelName, IResourceLoader loader)
+        {
+            nerModels.TryGetValue(modelName, out TokenNameFinderModel model);
+            if (model == null)
+            {
+                using (Stream resource = loader.OpenResource(modelName))
+                {
+                    model = new TokenNameFinderModel(new ikvm.io.InputStreamWrapper(resource));
+                }
+                nerModels[modelName] = model;
+            }
+            return model;
+        }
+
+        public static NLPLemmatizerOp GetLemmatizer(string dictionaryFile, string lemmatizerModelFile)
+        {
+            Debug.Assert(dictionaryFile != null || lemmatizerModelFile != null, "At least one parameter must be non-null");
+            Stream dictionaryInputStream = null;
+            if (dictionaryFile != null)
+            {
+                string dictionary = lemmaDictionaries[dictionaryFile];
+                dictionaryInputStream = new MemoryStream(Encoding.UTF8.GetBytes(dictionary));
+            }
+            LemmatizerModel lemmatizerModel = lemmatizerModelFile == null ? null : lemmatizerModels[lemmatizerModelFile];
+            return new NLPLemmatizerOp(dictionaryInputStream, lemmatizerModel);
+        }
+
+        public static string GetLemmatizerDictionary(string dictionaryFile, IResourceLoader loader)
+        {
+            lemmaDictionaries.TryGetValue(dictionaryFile, out string dictionary);
+            if (dictionary == null)
+            {
+                using (TextReader reader = new StreamReader(loader.OpenResource(dictionaryFile), Encoding.UTF8))
+                {
+                    StringBuilder builder = new StringBuilder();
+                    char[] chars = new char[8092];
+                    int numRead = 0;
+                    do
+                    {
+                        numRead = reader.Read(chars, 0, chars.Length);
+                        if (numRead > 0)
+                        {
+                            builder.Append(chars, 0, numRead);
+                        }
+                    } while (numRead > 0);
+                    dictionary = builder.ToString();
+                    lemmaDictionaries[dictionaryFile] = dictionary;
+                }
+            }
+            return dictionary;
+        }
+
+        public static LemmatizerModel GetLemmatizerModel(string modelName, IResourceLoader loader)
+        {
+            lemmatizerModels.TryGetValue(modelName, out LemmatizerModel model);
+            if (model == null)
+            {
+                using (Stream resource = loader.OpenResource(modelName))
+                {
+                    model = new LemmatizerModel(new ikvm.io.InputStreamWrapper(resource));
+                }
+                lemmatizerModels[modelName] = model;
+            }
+            return model;
+        }
+
+        // keeps unit test from blowing out memory
+        public static void ClearModels()
+        {
+            sentenceModels.Clear();
+            tokenizerModels.Clear();
+            posTaggerModels.Clear();
+            chunkerModels.Clear();
+            nerModels.Clear();
+            lemmaDictionaries.Clear();
+        }
+    }
+}
diff --git a/src/Lucene.Net.TestFramework/Analysis/BaseTokenStreamTestCase.cs b/src/Lucene.Net.TestFramework/Analysis/BaseTokenStreamTestCase.cs
index 483036d..dd0b059 100644
--- a/src/Lucene.Net.TestFramework/Analysis/BaseTokenStreamTestCase.cs
+++ b/src/Lucene.Net.TestFramework/Analysis/BaseTokenStreamTestCase.cs
@@ -11,9 +11,11 @@ using System.IO;
 using System.Linq;
 using System.Text;
 using System.Threading;
+using AttributeFactory = Lucene.Net.Util.AttributeSource.AttributeFactory;
 using Assert = Lucene.Net.TestFramework.Assert;
 using AssertionError = Lucene.Net.Diagnostics.AssertionException;
 using Console = Lucene.Net.Support.SystemConsole;
+using Lucene.Net.Util;
 
 namespace Lucene.Net.Analysis
 {
@@ -139,7 +141,7 @@ namespace Lucene.Net.Analysis
         //     arriving to pos Y have the same endOffset)
         //   - offsets only move forwards (startOffset >=
         //     lastStartOffset)
-        public static void AssertTokenStreamContents(TokenStream ts, string[] output, int[] startOffsets, int[] endOffsets, string[] types, int[] posIncrements, int[] posLengths, int? finalOffset, int? finalPosInc, bool[] keywordAtts, bool offsetsAreCorrect)
+        public static void AssertTokenStreamContents(TokenStream ts, string[] output, int[] startOffsets, int[] endOffsets, string[] types, int[] posIncrements, int[] posLengths, int? finalOffset, int? finalPosInc, bool[] keywordAtts, bool offsetsAreCorrect, byte[][] payloads)
         {
             // LUCENENET: Bug fix: NUnit throws an exception when something fails. 
             // This causes Dispose() to be skipped and it pollutes other tests indicating false negatives.
@@ -192,6 +194,17 @@ namespace Lucene.Net.Analysis
                     keywordAtt = ts.GetAttribute<IKeywordAttribute>();
                 }
 
+                // *********** From Lucene 8.2.0 **************
+
+                IPayloadAttribute payloadAtt = null;
+                if (payloads != null)
+                {
+                    Assert.IsTrue(ts.HasAttribute<IPayloadAttribute>(), "has no PayloadAttribute");
+                    payloadAtt = ts.GetAttribute<IPayloadAttribute>();
+                }
+
+                // *********** End From Lucene 8.2.0 **************
+
                 // Maps position to the start/end offset:
                 IDictionary<int?, int?> posToStartOffset = new Dictionary<int?, int?>();
                 IDictionary<int?, int?> posToEndOffset = new Dictionary<int?, int?>();
@@ -224,6 +237,12 @@ namespace Lucene.Net.Analysis
                     {
                         keywordAtt.IsKeyword = (i & 1) == 0;
                     }
+                    // *********** From Lucene 8.2.0 **************
+                    if (payloadAtt != null)
+                    {
+                        payloadAtt.Payload = new BytesRef(new byte[] { 0x00, unchecked((byte)-0x21), 0x12, unchecked((byte)-0x43), 0x24 });
+                    }
+                    // *********** End From Lucene 8.2.0 **************
 
                     bool reset = checkClearAtt.GetAndResetClearCalled(); // reset it, because we called clearAttribute() before
                     Assert.IsTrue(ts.IncrementToken(), "token " + i + " does not exist");
@@ -254,6 +273,20 @@ namespace Lucene.Net.Analysis
                     {
                         Assert.AreEqual(keywordAtts[i], keywordAtt.IsKeyword, "keywordAtt " + i);
                     }
+                    // *********** From Lucene 8.2.0 **************
+                    if (payloads != null)
+                    {
+                        if (payloads[i] != null)
+                        {
+                            Assert.AreEqual(new BytesRef(payloads[i]), payloadAtt.Payload, "payloads " + i);
+                        }
+                        else
+                        {
+                            Assert.IsNull(payloads[i], "payloads " + i);
+                        }
+                    }
+                    // *********** End From Lucene 8.2.0 **************
+
 
                     // we can enforce some basic things about a few attributes even if the caller doesn't check:
                     if (offsetAtt != null)
@@ -394,7 +427,7 @@ namespace Lucene.Net.Analysis
 
         public static void AssertTokenStreamContents(TokenStream ts, string[] output, int[] startOffsets, int[] endOffsets, string[] types, int[] posIncrements, int[] posLengths, int? finalOffset, bool[] keywordAtts, bool offsetsAreCorrect)
         {
-            AssertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, posLengths, finalOffset, null, null, offsetsAreCorrect);
+            AssertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, posLengths, finalOffset, null, null, offsetsAreCorrect, null);
         }
 
         public static void AssertTokenStreamContents(TokenStream ts, string[] output, int[] startOffsets, int[] endOffsets, string[] types, int[] posIncrements, int[] posLengths, int? finalOffset, bool offsetsAreCorrect)
@@ -475,6 +508,13 @@ namespace Lucene.Net.Analysis
             AssertTokenStreamContents(a.GetTokenStream("dummy", new StringReader(input)), output, startOffsets, endOffsets, types, posIncrements, posLengths, input.Length, offsetsAreCorrect);
         }
 
+        // LUCENENET: Overload from Lucene 8.2.0
+        public static void AssertAnalyzesTo(Analyzer a, string input, string[] output, int[] startOffsets, int[] endOffsets, string[] types, int[] posIncrements, int[] posLengths, bool graphOffsetsAreCorrect, byte[][] payloads)
+        {
+            CheckResetException(a, input);
+            AssertTokenStreamContents(a.GetTokenStream("dummy", input), output, startOffsets, endOffsets, types, posIncrements, posLengths, input.Length, null, null, graphOffsetsAreCorrect, payloads);
+        }
+
         public static void AssertAnalyzesTo(Analyzer a, string input, string[] output)
         {
             AssertAnalyzesTo(a, input, output, null, null, null, null, null);
@@ -1266,5 +1306,41 @@ namespace Lucene.Net.Analysis
             }
             return ret;
         }
+
+        // *********** From Lucene 8.2.0 **************
+
+        /// <summary>Returns a random <see cref="AttributeFactory"/> impl</summary>
+        public static AttributeFactory NewAttributeFactory(Random random)
+        {
+            switch (random.nextInt(2))
+            {
+                case 0:
+                    return Token.TOKEN_ATTRIBUTE_FACTORY;
+                case 1:
+                    return AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY;
+                default:
+                    throw new AssertionError("Please fix the Random.nextInt() call above");
+            }
+
+            //switch (random.nextInt(3))
+            //{
+            //    case 0:
+            //        return TokenStream.DEFAULT_TOKEN_ATTRIBUTE_FACTORY;
+            //    case 1:
+            //        return Token.TOKEN_ATTRIBUTE_FACTORY;
+            //    case 2:
+            //        return AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY;
+            //    default:
+            //        throw new AssertionError("Please fix the Random.nextInt() call above");
+            //}
+        }
+
+        /// <summary>Returns a random <see cref="AttributeFactory"/> impl</summary>
+        public static AttributeFactory NewAttributeFactory()
+        {
+            return NewAttributeFactory(Random);
+        }
+
+        // *********** End From Lucene 8.2.0 **************
     }
 }
\ No newline at end of file
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestStopFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestStopFilter.cs
index 0a62869..8e10148 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestStopFilter.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestStopFilter.cs
@@ -127,7 +127,7 @@ namespace Lucene.Net.Analysis.Core
         {
             CharArraySet stopSet = StopFilter.MakeStopSet(TEST_VERSION_CURRENT, "of");
             StopFilter stpf = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("test of"), MockTokenizer.WHITESPACE, false), stopSet);
-            AssertTokenStreamContents(stpf, new string[] { "test" }, new int[] { 0 }, new int[] { 4 }, null, new int[] { 1 }, null, 7, 1, null, true);
+            AssertTokenStreamContents(stpf, new string[] { "test" }, new int[] { 0 }, new int[] { 4 }, null, new int[] { 1 }, null, 7, 1, null, true, null);
         }
 
         private void DoTestStopPositons(StopFilter stpf, bool enableIcrements)
diff --git a/src/Lucene.Net.Tests.Analysis.OpenNLP/Lucene.Net.Tests.Analysis.OpenNLP.csproj b/src/Lucene.Net.Tests.Analysis.OpenNLP/Lucene.Net.Tests.Analysis.OpenNLP.csproj
new file mode 100644
index 0000000..f72a2a7
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.OpenNLP/Lucene.Net.Tests.Analysis.OpenNLP.csproj
@@ -0,0 +1,71 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <Import Project="$(SolutionDir)TestTargetFramework.props" />
+
+  <PropertyGroup>
+    <TargetFrameworks></TargetFrameworks>
+    <TargetFramework>net451</TargetFramework>
+    <PackageTargetFallback Condition=" '$(TargetFramework)' == 'netcoreapp1.0' ">$(PackageTargetFallback);dnxcore50</PackageTargetFallback>
+
+    <AssemblyTitle>Lucene.Net.Tests.Analysis.OpenNLP</AssemblyTitle>
+    <RootNamespace>Lucene.Net.Analysis.OpenNlp</RootNamespace>
+    <GenerateRuntimeConfigurationFiles>true</GenerateRuntimeConfigurationFiles>
+    <RuntimeIdentifiers>win7-x86;win7-x64</RuntimeIdentifiers>
+  </PropertyGroup>
+
+  <!-- OpenNLP is not strong-named -->
+  <PropertyGroup Label="Assembly Signing">
+    <SignAssembly>false</SignAssembly>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <None Remove="en-test-chunker.bin" />
+    <None Remove="en-test-lemmas.dict" />
+    <None Remove="en-test-lemmatizer.bin" />
+    <None Remove="en-test-ner.bin" />
+    <None Remove="en-test-pos-maxent.bin" />
+    <None Remove="en-test-sent.bin" />
+    <None Remove="en-test-tokenizer.bin" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <EmbeddedResource Include="en-test-chunker.bin" />
+    <EmbeddedResource Include="en-test-lemmas.dict" />
+    <EmbeddedResource Include="en-test-lemmatizer.bin" />
+    <EmbeddedResource Include="en-test-ner.bin" />
+    <EmbeddedResource Include="en-test-pos-maxent.bin" />
+    <EmbeddedResource Include="en-test-sent.bin" />
+    <EmbeddedResource Include="en-test-tokenizer.bin" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\Lucene.Net.Analysis.OpenNLP\Lucene.Net.Analysis.OpenNLP.csproj" />
+    <ProjectReference Include="..\Lucene.Net.TestFramework.NUnit\Lucene.Net.TestFramework.NUnit.csproj" />
+  </ItemGroup>
+
+  <Import Project="$(SolutionDir)build/TestReferences.Common.targets" />
+
+  <PropertyGroup Condition=" '$(TargetFramework)' == 'netcoreapp2.1' ">
+    <DebugType>portable</DebugType>
+  </PropertyGroup>
+
+  <ItemGroup Condition=" '$(TargetFramework)' == 'netcoreapp1.0' ">
+    <PackageReference Include="System.Net.Primitives" Version="$(SystemNetPrimitivesPackageVersion)" />
+  </ItemGroup>
+
+  <PropertyGroup Condition=" '$(TargetFramework)' == 'netcoreapp1.0' ">
+    <DefineConstants>$(DefineConstants);NETSTANDARD1_6</DefineConstants>
+    <DebugType>portable</DebugType>
+  </PropertyGroup>
+
+  <PropertyGroup Condition=" '$(TargetFramework)' == 'net451' ">
+    <DefineConstants>$(DefineConstants);FEATURE_SERIALIZABLE_EXCEPTIONS</DefineConstants>
+    <DebugType>full</DebugType>
+  </PropertyGroup>
+
+  <ItemGroup Condition=" '$(TargetFramework)' == 'net451' ">
+    <Reference Include="System" />
+    <Reference Include="Microsoft.CSharp" />
+  </ItemGroup>
+
+</Project>
diff --git a/src/Lucene.Net.Tests.Analysis.OpenNLP/TestOpenNLPChunkerFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.OpenNLP/TestOpenNLPChunkerFilterFactory.cs
new file mode 100644
index 0000000..0a0afc3
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.OpenNLP/TestOpenNLPChunkerFilterFactory.cs
@@ -0,0 +1,122 @@
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Payloads;
+using Lucene.Net.Analysis.Util;
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Analysis.OpenNlp
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Needs the OpenNLP Tokenizer because it creates full streams of punctuation.
+    /// Needs the OpenNLP POS tagger for the POS tags.
+    /// <para/>
+    /// Tagging models are created from tiny test data in opennlp/tools/test-model-data/ and are not very accurate.
+    /// </summary>
+    public class TestOpenNLPChunkerFilterFactory : BaseTokenStreamTestCase
+    {
+        private const String SENTENCES = "Sentence number 1 has 6 words. Sentence number 2, 5 words.";
+        private static readonly String[] SENTENCES_punc
+            = { "Sentence", "number", "1", "has", "6", "words", ".", "Sentence", "number", "2", ",", "5", "words", "." };
+        private static readonly int[] SENTENCES_startOffsets = { 0, 9, 16, 18, 22, 24, 29, 31, 40, 47, 48, 50, 52, 57 };
+        private static readonly int[] SENTENCES_endOffsets = { 8, 15, 17, 21, 23, 29, 30, 39, 46, 48, 49, 51, 57, 58 };
+        private static readonly String[] SENTENCES_chunks
+            = { "B-NP", "I-NP", "I-NP", "B-VP", "B-NP", "I-NP", "O", "B-NP", "I-NP", "I-NP", "O", "B-NP", "I-NP", "O" };
+
+        private const String sentenceModelFile = "en-test-sent.bin";
+        private const String tokenizerModelFile = "en-test-tokenizer.bin";
+        private const String posTaggerModelFile = "en-test-pos-maxent.bin";
+        private const String chunkerModelFile = "en-test-chunker.bin";
+
+        private static byte[][] ToPayloads(params string[] strings)
+        {
+            return strings.Select(s => s == null ? null : Encoding.UTF8.GetBytes(s)).ToArray();
+        }
+
+        [Test]
+        public void TestBasic()
+        {
+            //    CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+            //.withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+            //.addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+            //.addTokenFilter("opennlpChunker", "chunkerModel", chunkerModelFile)
+            //.build();
+
+            Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+            {
+                var loader = new ClasspathResourceLoader(GetType());
+
+                var opennlpFactory = new OpenNLPTokenizerFactory(new Dictionary<string, string> { { "tokenizerModel", tokenizerModelFile }, { "sentenceModel", sentenceModelFile } });
+                opennlpFactory.Inform(loader);
+                var opennlp = opennlpFactory.Create(NewAttributeFactory(), reader); //new OpenNLPTokenizer(reader, new Tools.NLPSentenceDetectorOp(sentenceModelFile), new Tools.NLPTokenizerOp(tokenizerModelFile));
+
+                var opennlpPOSFilterFactory = new OpenNLPPOSFilterFactory(new Dictionary<string, string> { { "posTaggerModel", posTaggerModelFile } });
+                opennlpPOSFilterFactory.Inform(loader);
+                var opennlpPOSFilter = opennlpPOSFilterFactory.Create(opennlp);  //new OpenNLPPOSFilter(opennlp, new Tools.NLPPOSTaggerOp(posTaggerModelFile));
+
+                var opennlpChunkerFilterFactory = new OpenNLPChunkerFilterFactory(new Dictionary<string, string> { { "chunkerModel", chunkerModelFile } });
+                opennlpChunkerFilterFactory.Inform(loader);
+                var opennlpChunkerFilter = opennlpChunkerFilterFactory.Create(opennlpPOSFilter);  //new OpenNLPChunkerFilter(filter1, new Tools.NLPChunkerOp(chunkerModelFile));
+
+                return new TokenStreamComponents(opennlp, opennlpChunkerFilter);
+            });
+
+            AssertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets,
+                SENTENCES_chunks, null, null, true);
+        }
+
+        [Test]
+        public void TestPayloads()
+        {
+            //CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+            //.withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+            //.addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+            //.addTokenFilter("opennlpChunker", "chunkerModel", chunkerModelFile)
+            //.addTokenFilter(TypeAsPayloadTokenFilterFactory.class)
+            //.build();
+
+            Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+            {
+                var loader = new ClasspathResourceLoader(GetType());
+
+                var opennlpFactory = new OpenNLPTokenizerFactory(new Dictionary<string, string> { { "tokenizerModel", tokenizerModelFile }, { "sentenceModel", sentenceModelFile } });
+                opennlpFactory.Inform(loader);
+                var opennlp = opennlpFactory.Create(NewAttributeFactory(), reader); //new OpenNLPTokenizer(reader, new Tools.NLPSentenceDetectorOp(sentenceModelFile), new Tools.NLPTokenizerOp(tokenizerModelFile));
+
+                var opennlpPOSFilterFactory = new OpenNLPPOSFilterFactory(new Dictionary<string, string> { { "posTaggerModel", posTaggerModelFile } });
+                opennlpPOSFilterFactory.Inform(loader);
+                var opennlpPOSFilter = opennlpPOSFilterFactory.Create(opennlp);  //new OpenNLPPOSFilter(opennlp, new Tools.NLPPOSTaggerOp(posTaggerModelFile));
+
+                var opennlpChunkerFilterFactory = new OpenNLPChunkerFilterFactory(new Dictionary<string, string> { { "chunkerModel", chunkerModelFile } });
+                opennlpChunkerFilterFactory.Inform(loader);
+                var opennlpChunkerFilter = opennlpChunkerFilterFactory.Create(opennlpPOSFilter);  //new OpenNLPChunkerFilter(filter1, new Tools.NLPChunkerOp(chunkerModelFile));
+
+                var typeAsPayloadFilterFactory = new TypeAsPayloadTokenFilterFactory(new Dictionary<string, string>());
+                var typeAsPayloadFilter = typeAsPayloadFilterFactory.Create(opennlpChunkerFilter);
+
+                return new TokenStreamComponents(opennlp, typeAsPayloadFilter);
+            });
+            AssertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets,
+                null, null, null, true, ToPayloads(SENTENCES_chunks));
+        }
+    }
+}
diff --git a/src/Lucene.Net.Tests.Analysis.OpenNLP/TestOpenNLPLemmatizerFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.OpenNLP/TestOpenNLPLemmatizerFilterFactory.cs
new file mode 100644
index 0000000..f249131
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.OpenNLP/TestOpenNLPLemmatizerFilterFactory.cs
@@ -0,0 +1,372 @@
+using Lucene.Net.Analysis.Miscellaneous;
+using Lucene.Net.Analysis.Util;
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Lucene.Net.Analysis.OpenNlp
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public class TestOpenNLPLemmatizerFilterFactory : BaseTokenStreamTestCase
+    {
+        private const String SENTENCE = "They sent him running in the evening.";
+        private static readonly String[] SENTENCE_dict_punc = { "they", "send", "he", "run", "in", "the", "evening", "." };
+        private static readonly String[] SENTENCE_maxent_punc = { "they", "send", "he", "runn", "in", "the", "evening", "." };
+        private static readonly String[] SENTENCE_posTags = { "NNP", "VBD", "PRP", "VBG", "IN", "DT", "NN", "." };
+
+        private static readonly String SENTENCES = "They sent him running in the evening. He did not come back.";
+        private static readonly String[] SENTENCES_dict_punc
+            = { "they", "send", "he", "run", "in", "the", "evening", ".", "he", "do", "not", "come", "back", "." };
+        private static readonly String[] SENTENCES_maxent_punc
+            = { "they", "send", "he", "runn", "in", "the", "evening", ".", "he", "do", "not", "come", "back", "." };
+        private static readonly String[] SENTENCES_posTags
+            = { "NNP", "VBD", "PRP", "VBG", "IN", "DT", "NN", ".", "PRP", "VBD", "RB", "VB", "RB", "." };
+
+        private static readonly String SENTENCE_both = "Konstantin Kalashnitsov constantly caliphed.";
+        private static readonly String[] SENTENCE_both_punc
+            = { "konstantin", "kalashnitsov", "constantly", "caliph", "." };
+        private static readonly String[] SENTENCE_both_posTags
+            = { "IN", "JJ", "NN", "VBN", "." };
+
+        private const String SENTENCES_both = "Konstantin Kalashnitsov constantly caliphed. Coreena could care, completely.";
+        private static readonly String[] SENTENCES_both_punc
+            = { "konstantin", "kalashnitsov", "constantly", "caliph", ".", "coreena", "could", "care", ",", "completely", "." };
+        private static readonly String[] SENTENCES_both_posTags
+            = { "IN", "JJ", "NN", "VBN", ".", "NNP", "VBN", "NN", ",", "NN", "." };
+
+        private static readonly String[] SENTENCES_dict_keep_orig_punc
+            = { "They", "they", "sent", "send", "him", "he", "running", "run", "in", "the", "evening", ".", "He", "he", "did", "do", "not", "come", "back", "." };
+        private static readonly String[] SENTENCES_max_ent_keep_orig_punc
+            = { "They", "they", "sent", "send", "him", "he", "running", "runn", "in", "the", "evening", ".", "He", "he", "did", "do", "not", "come", "back", "." };
+        private static readonly String[] SENTENCES_keep_orig_posTags
+            = { "NNP", "NNP", "VBD", "VBD", "PRP", "PRP", "VBG", "VBG", "IN", "DT", "NN", ".", "PRP", "PRP", "VBD", "VBD", "RB", "VB", "RB", "." };
+
+        private static readonly String[] SENTENCES_both_keep_orig_punc
+            = { "Konstantin", "konstantin", "Kalashnitsov", "kalashnitsov", "constantly", "caliphed", "caliph", ".", "Coreena", "coreena", "could", "care", ",", "completely", "." };
+        private static readonly String[] SENTENCES_both_keep_orig_posTags
+            = { "IN", "IN", "JJ", "JJ", "NN", "VBN", "VBN", ".", "NNP", "NNP", "VBN", "NN", ",", "NN", "." };
+
+
+        private const String tokenizerModelFile = "en-test-tokenizer.bin";
+        private const String sentenceModelFile = "en-test-sent.bin";
+        private const String posTaggerModelFile = "en-test-pos-maxent.bin";
+        private const String lemmatizerModelFile = "en-test-lemmatizer.bin";
+        private const String lemmatizerDictFile = "en-test-lemmas.dict";
+
+        [Test]
+        public void Test1SentenceDictionaryOnly()
+        {
+            Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldname, reader) =>
+            {
+                var loader = new ClasspathResourceLoader(GetType());
+
+                var opennlpFactory = new OpenNLPTokenizerFactory(new Dictionary<string, string> { { "tokenizerModel", tokenizerModelFile }, { "sentenceModel", sentenceModelFile } });
+                opennlpFactory.Inform(loader);
+                var opennlp = opennlpFactory.Create(reader);
+
+                var opennlpPOSFilterFactory = new OpenNLPPOSFilterFactory(new Dictionary<string, string> { { "posTaggerModel", posTaggerModelFile } });
+                opennlpPOSFilterFactory.Inform(loader);
+                var opennlpPOSFilter = opennlpPOSFilterFactory.Create(opennlp);
+
+                var opennlpLemmatizerFilterFactory = new OpenNLPLemmatizerFilterFactory(new Dictionary<string, string> { { "dictionary", lemmatizerDictFile } });
+                opennlpLemmatizerFilterFactory.Inform(loader);
+                var opennlpLemmatizerFilter = opennlpLemmatizerFilterFactory.Create(opennlpPOSFilter);
+
+                return new TokenStreamComponents(opennlp, opennlpLemmatizerFilter);
+            });
+            //CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+            //.withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+            //.addTokenFilter("opennlpPOS", "posTaggerModel", "en-test-pos-maxent.bin")
+            //.addTokenFilter("opennlplemmatizer", "dictionary", "en-test-lemmas.dict")
+            //.build();
+            AssertAnalyzesTo(analyzer, SENTENCE, SENTENCE_dict_punc, null, null,
+                SENTENCE_posTags, null, null, true);
+        }
+
+        [Test]
+        public void Test2SentencesDictionaryOnly()
+        {
+            Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldname, reader) =>
+            {
+                var loader = new ClasspathResourceLoader(GetType());
+
+                var opennlpFactory = new OpenNLPTokenizerFactory(new Dictionary<string, string> { { "tokenizerModel", tokenizerModelFile }, { "sentenceModel", sentenceModelFile } });
+                opennlpFactory.Inform(loader);
+                var opennlp = opennlpFactory.Create(reader);
+
+                var opennlpPOSFilterFactory = new OpenNLPPOSFilterFactory(new Dictionary<string, string> { { "posTaggerModel", posTaggerModelFile } });
+                opennlpPOSFilterFactory.Inform(loader);
+                var opennlpPOSFilter = opennlpPOSFilterFactory.Create(opennlp);
+
+                var opennlpLemmatizerFilterFactory = new OpenNLPLemmatizerFilterFactory(new Dictionary<string, string> { { "dictionary", lemmatizerDictFile } });
+                opennlpLemmatizerFilterFactory.Inform(loader);
+                var opennlpLemmatizerFilter = opennlpLemmatizerFilterFactory.Create(opennlpPOSFilter);
+
+                return new TokenStreamComponents(opennlp, opennlpLemmatizerFilter);
+            });
+            //CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+            //.withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+            //.addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+            //.addTokenFilter("opennlplemmatizer", "dictionary", lemmatizerDictFile)
+            //.build();
+            AssertAnalyzesTo(analyzer, SENTENCES, SENTENCES_dict_punc, null, null,
+                SENTENCES_posTags, null, null, true);
+        }
+
+        [Test]
+        public void Test1SentenceMaxEntOnly()
+        {
+            Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldname, reader) =>
+            {
+                var loader = new ClasspathResourceLoader(GetType());
+
+                var opennlpFactory = new OpenNLPTokenizerFactory(new Dictionary<string, string> { { "tokenizerModel", tokenizerModelFile }, { "sentenceModel", sentenceModelFile } });
+                opennlpFactory.Inform(loader);
+                var opennlp = opennlpFactory.Create(reader);
+
+                var opennlpPOSFilterFactory = new OpenNLPPOSFilterFactory(new Dictionary<string, string> { { "posTaggerModel", posTaggerModelFile } });
+                opennlpPOSFilterFactory.Inform(loader);
+                var opennlpPOSFilter = opennlpPOSFilterFactory.Create(opennlp);
+
+                var opennlpLemmatizerFilterFactory = new OpenNLPLemmatizerFilterFactory(new Dictionary<string, string> { { "lemmatizerModel", lemmatizerModelFile } });
+                opennlpLemmatizerFilterFactory.Inform(loader);
+                var opennlpLemmatizerFilter = opennlpLemmatizerFilterFactory.Create(opennlpPOSFilter);
+
+                return new TokenStreamComponents(opennlp, opennlpLemmatizerFilter);
+            });
+            //CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+            //    .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+            //    .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+            //    .addTokenFilter("opennlplemmatizer", "lemmatizerModel", lemmatizerModelFile)
+            //    .build();
+            AssertAnalyzesTo(analyzer, SENTENCE, SENTENCE_maxent_punc, null, null,
+                SENTENCE_posTags, null, null, true);
+        }
+
+        [Test]
+        public void Test2SentencesMaxEntOnly()
+        {
+            Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldname, reader) =>
+            {
+                var loader = new ClasspathResourceLoader(GetType());
+
+                var opennlpFactory = new OpenNLPTokenizerFactory(new Dictionary<string, string> { { "tokenizerModel", tokenizerModelFile }, { "sentenceModel", sentenceModelFile } });
+                opennlpFactory.Inform(loader);
+                var opennlp = opennlpFactory.Create(reader);
+
+                var opennlpPOSFilterFactory = new OpenNLPPOSFilterFactory(new Dictionary<string, string> { { "posTaggerModel", posTaggerModelFile } });
+                opennlpPOSFilterFactory.Inform(loader);
+                var opennlpPOSFilter = opennlpPOSFilterFactory.Create(opennlp);
+
+                var opennlpLemmatizerFilterFactory = new OpenNLPLemmatizerFilterFactory(new Dictionary<string, string> { { "lemmatizerModel", lemmatizerModelFile } });
+                opennlpLemmatizerFilterFactory.Inform(loader);
+                var opennlpLemmatizerFilter = opennlpLemmatizerFilterFactory.Create(opennlpPOSFilter);
+
+                return new TokenStreamComponents(opennlp, opennlpLemmatizerFilter);
+            });
+
+            //CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+            //    .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+            //    .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+            //    .addTokenFilter("OpenNLPLemmatizer", "lemmatizerModel", lemmatizerModelFile)
+            //    .build();
+            AssertAnalyzesTo(analyzer, SENTENCES, SENTENCES_maxent_punc, null, null,
+                SENTENCES_posTags, null, null, true);
+        }
+
+        [Test]
+        public void Test1SentenceDictionaryAndMaxEnt()
+        {
+            Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldname, reader) =>
+            {
+                var loader = new ClasspathResourceLoader(GetType());
+
+                var opennlpFactory = new OpenNLPTokenizerFactory(new Dictionary<string, string> { { "tokenizerModel", tokenizerModelFile }, { "sentenceModel", sentenceModelFile } });
+                opennlpFactory.Inform(loader);
+                var opennlp = opennlpFactory.Create(reader);
+
+                var opennlpPOSFilterFactory = new OpenNLPPOSFilterFactory(new Dictionary<string, string> { { "posTaggerModel", posTaggerModelFile } });
+                opennlpPOSFilterFactory.Inform(loader);
+                var opennlpPOSFilter = opennlpPOSFilterFactory.Create(opennlp);
+
+                var opennlpLemmatizerFilterFactory = new OpenNLPLemmatizerFilterFactory(new Dictionary<string, string> { { "dictionary", lemmatizerDictFile }, { "lemmatizerModel", lemmatizerModelFile } });
+                opennlpLemmatizerFilterFactory.Inform(loader);
+                var opennlpLemmatizerFilter = opennlpLemmatizerFilterFactory.Create(opennlpPOSFilter);
+
+                return new TokenStreamComponents(opennlp, opennlpLemmatizerFilter);
+            });
+            //CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+            //    .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+            //    .addTokenFilter("opennlpPOS", "posTaggerModel", "en-test-pos-maxent.bin")
+            //    .addTokenFilter("opennlplemmatizer", "dictionary", "en-test-lemmas.dict", "lemmatizerModel", lemmatizerModelFile)
+            //    .build();
+            AssertAnalyzesTo(analyzer, SENTENCE_both, SENTENCE_both_punc, null, null,
+                SENTENCE_both_posTags, null, null, true);
+        }
+
+        [Test]
+        public void Test2SentencesDictionaryAndMaxEnt()
+        {
+            Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldname, reader) =>
+            {
+                var loader = new ClasspathResourceLoader(GetType());
+
+                var opennlpFactory = new OpenNLPTokenizerFactory(new Dictionary<string, string> { { "tokenizerModel", tokenizerModelFile }, { "sentenceModel", sentenceModelFile } });
+                opennlpFactory.Inform(loader);
+                var opennlp = opennlpFactory.Create(reader);
+
+                var opennlpPOSFilterFactory = new OpenNLPPOSFilterFactory(new Dictionary<string, string> { { "posTaggerModel", posTaggerModelFile } });
+                opennlpPOSFilterFactory.Inform(loader);
+                var opennlpPOSFilter = opennlpPOSFilterFactory.Create(opennlp);
+
+                var opennlpLemmatizerFilterFactory = new OpenNLPLemmatizerFilterFactory(new Dictionary<string, string> { { "dictionary", lemmatizerDictFile }, { "lemmatizerModel", lemmatizerModelFile } });
+                opennlpLemmatizerFilterFactory.Inform(loader);
+                var opennlpLemmatizerFilter = opennlpLemmatizerFilterFactory.Create(opennlpPOSFilter);
+
+                return new TokenStreamComponents(opennlp, opennlpLemmatizerFilter);
+            });
+
+            //CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+            //    .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+            //    .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+            //    .addTokenFilter("opennlplemmatizer", "dictionary", lemmatizerDictFile, "lemmatizerModel", lemmatizerModelFile)
+            //    .build();
+            AssertAnalyzesTo(analyzer, SENTENCES_both, SENTENCES_both_punc, null, null,
+                SENTENCES_both_posTags, null, null, true);
+        }
+
+        [Test]
+        public void TestKeywordAttributeAwarenessDictionaryOnly()
+        {
+            Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldname, reader) =>
+            {
+                var loader = new ClasspathResourceLoader(GetType());
+
+                var opennlpFactory = new OpenNLPTokenizerFactory(new Dictionary<string, string> { { "tokenizerModel", tokenizerModelFile }, { "sentenceModel", sentenceModelFile } });
+                opennlpFactory.Inform(loader);
+                var opennlp = opennlpFactory.Create(reader);
+
+                var opennlpPOSFilterFactory = new OpenNLPPOSFilterFactory(new Dictionary<string, string> { { "posTaggerModel", posTaggerModelFile } });
+                opennlpPOSFilterFactory.Inform(loader);
+                var opennlpPOSFilter = opennlpPOSFilterFactory.Create(opennlp);
+
+                var keywordRepeatFilterFactory = new KeywordRepeatFilterFactory(new Dictionary<string, string>());
+                var keywordRepeatFilter = keywordRepeatFilterFactory.Create(opennlpPOSFilter);
+
+                var opennlpLemmatizerFilterFactory = new OpenNLPLemmatizerFilterFactory(new Dictionary<string, string> { { "dictionary", lemmatizerDictFile } });
+                opennlpLemmatizerFilterFactory.Inform(loader);
+                var opennlpLemmatizerFilter = opennlpLemmatizerFilterFactory.Create(keywordRepeatFilter);
+
+                var removeDuplicatesTokenFilterFactory = new RemoveDuplicatesTokenFilterFactory(new Dictionary<string, string>());
+                var removeDuplicatesTokenFilter = removeDuplicatesTokenFilterFactory.Create(opennlpLemmatizerFilter);
+
+                return new TokenStreamComponents(opennlp, removeDuplicatesTokenFilter);
+            });
+
+            //CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+            //    .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+            //    .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+            //    .addTokenFilter(KeywordRepeatFilterFactory.class)
+            //    .addTokenFilter("opennlplemmatizer", "dictionary", lemmatizerDictFile)
+            //    .addTokenFilter(RemoveDuplicatesTokenFilterFactory.class)
+            //    .build();
+            AssertAnalyzesTo(analyzer, SENTENCES, SENTENCES_dict_keep_orig_punc, null, null,
+                SENTENCES_keep_orig_posTags, null, null, true);
+        }
+
+        [Test]
+        public void TestKeywordAttributeAwarenessMaxEntOnly()
+        {
+            Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldname, reader) =>
+            {
+                var loader = new ClasspathResourceLoader(GetType());
+
+                var opennlpFactory = new OpenNLPTokenizerFactory(new Dictionary<string, string> { { "tokenizerModel", tokenizerModelFile }, { "sentenceModel", sentenceModelFile } });
+                opennlpFactory.Inform(loader);
+                var opennlp = opennlpFactory.Create(reader);
+
+                var opennlpPOSFilterFactory = new OpenNLPPOSFilterFactory(new Dictionary<string, string> { { "posTaggerModel", posTaggerModelFile } });
+                opennlpPOSFilterFactory.Inform(loader);
+                var opennlpPOSFilter = opennlpPOSFilterFactory.Create(opennlp);
+
+                var keywordRepeatFilterFactory = new KeywordRepeatFilterFactory(new Dictionary<string, string>());
+                var keywordRepeatFilter = keywordRepeatFilterFactory.Create(opennlpPOSFilter);
+
+                var opennlpLemmatizerFilterFactory = new OpenNLPLemmatizerFilterFactory(new Dictionary<string, string> { { "lemmatizerModel", lemmatizerModelFile } });
+                opennlpLemmatizerFilterFactory.Inform(loader);
+                var opennlpLemmatizerFilter = opennlpLemmatizerFilterFactory.Create(keywordRepeatFilter);
+
+                var removeDuplicatesTokenFilterFactory = new RemoveDuplicatesTokenFilterFactory(new Dictionary<string, string>());
+                var removeDuplicatesTokenFilter = removeDuplicatesTokenFilterFactory.Create(opennlpLemmatizerFilter);
+
+                return new TokenStreamComponents(opennlp, removeDuplicatesTokenFilter);
+            });
+            //CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+            //    .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+            //    .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+            //    .addTokenFilter(KeywordRepeatFilterFactory.class)
+            //    .addTokenFilter("opennlplemmatizer", "lemmatizerModel", lemmatizerModelFile)
+            //    .addTokenFilter(RemoveDuplicatesTokenFilterFactory.class)
+            //    .build();
+            AssertAnalyzesTo(analyzer, SENTENCES, SENTENCES_max_ent_keep_orig_punc, null, null,
+                SENTENCES_keep_orig_posTags, null, null, true);
+        }
+
+        [Test]
+        public void TestKeywordAttributeAwarenessDictionaryAndMaxEnt()
+        {
+            Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldname, reader) =>
+            {
+                var loader = new ClasspathResourceLoader(GetType());
+
+                var opennlpFactory = new OpenNLPTokenizerFactory(new Dictionary<string, string> { { "tokenizerModel", tokenizerModelFile }, { "sentenceModel", sentenceModelFile } });
+                opennlpFactory.Inform(loader);
+                var opennlp = opennlpFactory.Create(reader);
+
+                var opennlpPOSFilterFactory = new OpenNLPPOSFilterFactory(new Dictionary<string, string> { { "posTaggerModel", posTaggerModelFile } });
+                opennlpPOSFilterFactory.Inform(loader);
+                var opennlpPOSFilter = opennlpPOSFilterFactory.Create(opennlp);
+
+                var keywordRepeatFilterFactory = new KeywordRepeatFilterFactory(new Dictionary<string, string>());
+                var keywordRepeatFilter = keywordRepeatFilterFactory.Create(opennlpPOSFilter);
+
+                var opennlpLemmatizerFilterFactory = new OpenNLPLemmatizerFilterFactory(new Dictionary<string, string> { { "dictionary", lemmatizerDictFile }, { "lemmatizerModel", lemmatizerModelFile } });
+                opennlpLemmatizerFilterFactory.Inform(loader);
+                var opennlpLemmatizerFilter = opennlpLemmatizerFilterFactory.Create(keywordRepeatFilter);
+
+                var removeDuplicatesTokenFilterFactory = new RemoveDuplicatesTokenFilterFactory(new Dictionary<string, string>());
+                var removeDuplicatesTokenFilter = removeDuplicatesTokenFilterFactory.Create(opennlpLemmatizerFilter);
+
+                return new TokenStreamComponents(opennlp, removeDuplicatesTokenFilter);
+            });
+            //CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+            //    .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+            //    .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+            //    .addTokenFilter(KeywordRepeatFilterFactory.class)
+            //    .addTokenFilter("opennlplemmatizer", "dictionary", lemmatizerDictFile, "lemmatizerModel", lemmatizerModelFile)
+            //    .addTokenFilter(RemoveDuplicatesTokenFilterFactory.class)
+            //    .build();
+            AssertAnalyzesTo(analyzer, SENTENCES_both, SENTENCES_both_keep_orig_punc, null, null,
+                SENTENCES_both_keep_orig_posTags, null, null, true);
+        }
+    }
+}
diff --git a/src/Lucene.Net.Tests.Analysis.OpenNLP/TestOpenNLPPOSFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.OpenNLP/TestOpenNLPPOSFilterFactory.cs
new file mode 100644
index 0000000..0d54d49
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.OpenNLP/TestOpenNLPPOSFilterFactory.cs
@@ -0,0 +1,160 @@
+using Lucene.Net.Analysis.Payloads;
+using Lucene.Net.Analysis.Util;
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Lucene.Net.Analysis.OpenNlp
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Needs the OpenNLP Tokenizer because it creates full streams of punctuation.
+    /// The POS model is based on this tokenization.
+    /// 
+    /// <para/>Tagging models are created from tiny test data in opennlp/tools/test-model-data/ and are not very accurate.
+    /// </summary>
+    public class TestOpenNLPPOSFilterFactory : BaseTokenStreamTestCase
+    {
+        private const String SENTENCES = "Sentence number 1 has 6 words. Sentence number 2, 5 words.";
+        private static readonly String[] SENTENCES_punc
+            = { "Sentence", "number", "1", "has", "6", "words", ".", "Sentence", "number", "2", ",", "5", "words", "." };
+        private static readonly int[] SENTENCES_startOffsets = { 0, 9, 16, 18, 22, 24, 29, 31, 40, 47, 48, 50, 52, 57 };
+        private static readonly int[] SENTENCES_endOffsets = { 8, 15, 17, 21, 23, 29, 30, 39, 46, 48, 49, 51, 57, 58 };
+        private static readonly String[] SENTENCES_posTags
+            = { "NN", "NN", "CD", "VBZ", "CD", "NNS", ".", "NN", "NN", "CD", ",", "CD", "NNS", "." };
+
+        private const String NO_BREAK = "No period";
+        private static readonly String[] NO_BREAK_terms = { "No", "period" };
+        private static readonly int[] NO_BREAK_startOffsets = { 0, 3 };
+        private static readonly int[] NO_BREAK_endOffsets = { 2, 9 };
+
+        private const String sentenceModelFile = "en-test-sent.bin";
+        private const String tokenizerModelFile = "en-test-tokenizer.bin";
+        private const String posTaggerModelFile = "en-test-pos-maxent.bin";
+
+
+        private static byte[][] ToPayloads(params string[] strings)
+        {
+            return strings.Select(s => s == null ? null : Encoding.UTF8.GetBytes(s)).ToArray();
+        }
+
+        //    private static byte[][] ToPayloads(params String[] strings)
+        //    {
+        //        return Arrays.stream(strings).map(s->s == null ? null : s.getBytes(StandardCharsets.UTF_8)).toArray(byte[][]::new);
+        //    }
+
+        [Test]
+        public void TestBasic()
+        {
+            var loader = new ClasspathResourceLoader(GetType());
+            Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldname, reader) => {
+                var tokenizerFactory = new OpenNLPTokenizerFactory(new Dictionary<string, string> { { "tokenizerModel", tokenizerModelFile }, { "sentenceModel", sentenceModelFile } });
+                tokenizerFactory.Inform(loader);
+                var tokenizer = tokenizerFactory.Create(reader);
+
+                var filter1Factory = new OpenNLPPOSFilterFactory(new Dictionary<string, string> { { "posTaggerModel", posTaggerModelFile } });
+                filter1Factory.Inform(loader);
+                var filter1 = filter1Factory.Create(tokenizer);
+
+                return new TokenStreamComponents(tokenizer, filter1);
+            });
+        //    CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(GetType()))
+        //.withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+        //.addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+        //.build();
+            AssertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets);
+        }
+
+        [Test]
+        public void TestPOS()
+        {
+            var loader = new ClasspathResourceLoader(GetType());
+            Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldname, reader) =>
+            {
+                var tokenizerFactory = new OpenNLPTokenizerFactory(new Dictionary<string, string> { { "tokenizerModel", tokenizerModelFile }, { "sentenceModel", sentenceModelFile } });
+                tokenizerFactory.Inform(loader);
+                var tokenizer = tokenizerFactory.Create(reader);
+
+                var filter1Factory = new OpenNLPPOSFilterFactory(new Dictionary<string, string> { { "posTaggerModel", posTaggerModelFile } });
+                filter1Factory.Inform(loader);
+                var filter1 = filter1Factory.Create(tokenizer);
+
+                return new TokenStreamComponents(tokenizer, filter1);
+            });
+            //    CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(GetType()))
+            //    .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+            //    .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+            //    .build();
+            AssertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets,
+                SENTENCES_posTags, null, null, true);
+
+            analyzer = Analyzer.NewAnonymous(createComponents: (fieldname, reader) =>
+            {
+                var tokenizerFactory = new OpenNLPTokenizerFactory(new Dictionary<string, string> { { "tokenizerModel", tokenizerModelFile }, { "sentenceModel", sentenceModelFile } });
+                tokenizerFactory.Inform(loader);
+                var tokenizer = tokenizerFactory.Create(reader);
+
+                var filter1Factory = new OpenNLPPOSFilterFactory(new Dictionary<string, string> { { "posTaggerModel", posTaggerModelFile } });
+                filter1Factory.Inform(loader);
+                var filter1 = filter1Factory.Create(tokenizer);
+
+                var filter2Factory = new TypeAsPayloadTokenFilterFactory(new Dictionary<string, string>());
+                var filter2 = filter2Factory.Create(filter1);
+
+                return new TokenStreamComponents(tokenizer, filter2);
+            });
+            //analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(GetType()))
+            //    .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+            //    .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+            //    .addTokenFilter(TypeAsPayloadTokenFilterFactory.class)
+            //.build();
+            AssertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets,
+                null, null, null, true, ToPayloads(SENTENCES_posTags));
+        }
+
+        [Test]
+        public void TestNoBreak()
+        {
+            var analyzer = Analyzer.NewAnonymous(createComponents: (fieldname, reader) =>
+            {
+                var loader = new ClasspathResourceLoader(GetType());
+
+                var tokenizerFactory = new OpenNLPTokenizerFactory(new Dictionary<string, string> { { "tokenizerModel", tokenizerModelFile }, { "sentenceModel", sentenceModelFile } });
+                tokenizerFactory.Inform(loader);
+                var tokenizer = tokenizerFactory.Create(reader);
+                
+                var tokenFilterFactory = new OpenNLPPOSFilterFactory(new Dictionary<string, string> { { "posTaggerModel", posTaggerModelFile } });
+                tokenFilterFactory.Inform(loader);
+                var tokenFilter = tokenFilterFactory.Create(tokenizer);
+                
+                return new TokenStreamComponents(tokenizer, tokenFilter);
+            });
+
+            //CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(GetType()))
+            //    .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+            //    .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+            //    .build();
+            AssertAnalyzesTo(analyzer, NO_BREAK, NO_BREAK_terms, NO_BREAK_startOffsets, NO_BREAK_endOffsets,
+                null, null, null, true);
+        }
+    }
+}
diff --git a/src/Lucene.Net.Tests.Analysis.OpenNLP/TestOpenNLPSentenceBreakIterator.cs b/src/Lucene.Net.Tests.Analysis.OpenNLP/TestOpenNLPSentenceBreakIterator.cs
new file mode 100644
index 0000000..ca6c3cc
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.OpenNLP/TestOpenNLPSentenceBreakIterator.cs
@@ -0,0 +1,252 @@
+using ICU4N.Support.Text;
+using ICU4N.Text;
+using Lucene.Net.Analysis.OpenNlp.Tools;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System;
+
+namespace Lucene.Net.Analysis.OpenNlp
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public class TestOpenNLPSentenceBreakIterator : LuceneTestCase
+    {
+        private const String TEXT
+            //                                                                                                     111
+            //           111111111122222222223333333333444444444455555555556666666666777777777788888888889999999999000
+            // 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012
+            = "Sentence number 1 has 6 words. Sentence number 2, 5 words. And finally, sentence number 3 has 8 words.";
+        private static readonly String[] SENTENCES = new String[] {
+            "Sentence number 1 has 6 words. ", "Sentence number 2, 5 words. ", "And finally, sentence number 3 has 8 words." };
+        private static readonly String PADDING = " Word. Word. ";
+        private static readonly String sentenceModelFile = "en-test-sent.bin";
+
+        public override void BeforeClass()
+        {
+            base.BeforeClass();
+            PopulateCache();
+        }
+
+        public static void PopulateCache()
+        {
+            OpenNLPOpsFactory.GetSentenceModel(sentenceModelFile, new ClasspathResourceLoader(typeof(TestOpenNLPSentenceBreakIterator)));
+        }
+
+        [Test]
+        public void TestThreeSentences()
+        {
+            NLPSentenceDetectorOp sentenceDetectorOp = OpenNLPOpsFactory.GetSentenceDetector(sentenceModelFile);
+            BreakIterator bi = new OpenNLPSentenceBreakIterator(sentenceDetectorOp);
+            bi.SetText(TEXT); // String is converted to StringCharacterIterator
+
+            Do3SentenceTest(bi);
+
+            bi.SetText(GetCharArrayIterator(TEXT));
+            Do3SentenceTest(bi);
+        }
+
+        private CharacterIterator GetCharArrayIterator(String text)
+        {
+            return GetCharArrayIterator(text, 0, text.Length);
+        }
+
+        private class WorkaroundCharArrayIterator : CharArrayIterator
+        {
+            // Lie about all surrogates to the sentence tokenizer,
+            // instead we treat them all as SContinue so we won't break around them.
+            protected override char JreBugWorkaround(char ch)
+            {
+                return ch >= 0xD800 && ch <= 0xDFFF ? (char)0x002C : ch;
+            }
+        }
+
+        private CharacterIterator GetCharArrayIterator(String text, int start, int length)
+        {
+            //    CharArrayIterator charArrayIterator = new CharArrayIterator() {
+            //      // Lie about all surrogates to the sentence tokenizer,
+            //      // instead we treat them all as SContinue so we won't break around them.
+            //      protected override char JreBugWorkaround(char ch)
+            //    {
+            //        return ch >= 0xD800 && ch <= 0xDFFF ? 0x002C : ch;
+            //    }
+            //};
+            CharArrayIterator charArrayIterator = new WorkaroundCharArrayIterator();
+            charArrayIterator.SetText(text.ToCharArray(), start, length);
+            return charArrayIterator;
+        }
+
+        private void Do3SentenceTest(BreakIterator bi) // LUCENENET NOTE: Refactored a bit because Substring in .NET requires some light math to match Java
+        {
+            assertEquals(0, bi.Current);
+            assertEquals(0, bi.First());
+            int current = bi.Current;
+            assertEquals(SENTENCES[0], TEXT.Substring(current, bi.Next() - current)); // LUCNENENET: Corrected 2nd parameter
+            current = bi.Current;
+            assertEquals(SENTENCES[1], TEXT.Substring(current, bi.Next() - current)); // LUCNENENET: Corrected 2nd parameter
+            current = bi.Current;
+            assertEquals(bi.Text.EndIndex, bi.Next());
+            int next = bi.Current;
+            assertEquals(SENTENCES[2], TEXT.Substring(current, next - current)); // LUCNENENET: Corrected 2nd parameter
+            assertEquals(BreakIterator.Done, bi.Next());
+
+            assertEquals(TEXT.Length, bi.Last());
+            int end = bi.Current;
+            int prev = bi.Previous();
+            assertEquals(SENTENCES[2], TEXT.Substring(prev, end - prev)); // LUCNENENET: Corrected 2nd parameter
+            end = bi.Current;
+            prev = bi.Previous();
+            assertEquals(SENTENCES[1], TEXT.Substring(prev, end - prev)); // LUCNENENET: Corrected 2nd parameter
+            end = bi.Current;
+            prev = bi.Previous();
+            assertEquals(SENTENCES[0], TEXT.Substring(prev, end - prev)); // LUCNENENET: Corrected 2nd parameter
+            assertEquals(BreakIterator.Done, bi.Previous());
+            assertEquals(0, bi.Current);
+
+            assertEquals(59, bi.Following(39));
+            assertEquals(59, bi.Following(31));
+            assertEquals(31, bi.Following(30));
+
+            assertEquals(0, bi.Preceding(57));
+            assertEquals(0, bi.Preceding(58));
+            assertEquals(31, bi.Preceding(59));
+
+            assertEquals(0, bi.First());
+            assertEquals(59, bi.Next(2));
+            assertEquals(0, bi.Next(-2));
+        }
+
+        [Test]
+        public void TestSingleSentence()
+        {
+            NLPSentenceDetectorOp sentenceDetectorOp = OpenNLPOpsFactory.GetSentenceDetector(sentenceModelFile);
+            BreakIterator bi = new OpenNLPSentenceBreakIterator(sentenceDetectorOp);
+            bi.SetText(GetCharArrayIterator(SENTENCES[0]));
+            Test1Sentence(bi, SENTENCES[0]);
+        }
+
+        private void Test1Sentence(BreakIterator bi, String text)
+        {
+            int start = bi.Text.BeginIndex;
+            assertEquals(start, bi.First());
+            int current = bi.Current;
+            assertEquals(bi.Text.EndIndex, bi.Next());
+            int end = bi.Current - start;
+            assertEquals(text, text.Substring(current - start, end - start));
+
+            assertEquals(text.Length, bi.Last() - start);
+            end = bi.Current;
+            bi.Previous();
+            assertEquals(BreakIterator.Done, bi.Previous());
+            int previous = bi.Current;
+            assertEquals(text, text.Substring(previous - start, end - start));
+            assertEquals(start, bi.Current);
+
+            assertEquals(BreakIterator.Done, bi.Following(bi.Last() / 2 + start));
+
+            assertEquals(BreakIterator.Done, bi.Preceding(bi.Last() / 2 + start));
+
+            assertEquals(start, bi.First());
+            assertEquals(BreakIterator.Done, bi.Next(13));
+            assertEquals(BreakIterator.Done, bi.Next(-8));
+        }
+
+        [Test]
+        public void TestSliceEnd()
+        {
+            NLPSentenceDetectorOp sentenceDetectorOp = OpenNLPOpsFactory.GetSentenceDetector(sentenceModelFile);
+            BreakIterator bi = new OpenNLPSentenceBreakIterator(sentenceDetectorOp);
+            bi.SetText(GetCharArrayIterator(SENTENCES[0] + PADDING, 0, SENTENCES[0].Length));
+
+            Test1Sentence(bi, SENTENCES[0]);
+        }
+
+        [Test]
+        public void TestSliceStart()
+        {
+            NLPSentenceDetectorOp sentenceDetectorOp = OpenNLPOpsFactory.GetSentenceDetector(sentenceModelFile);
+            BreakIterator bi = new OpenNLPSentenceBreakIterator(sentenceDetectorOp);
+            bi.SetText(GetCharArrayIterator(PADDING + SENTENCES[0], PADDING.Length, SENTENCES[0].Length));
+            Test1Sentence(bi, SENTENCES[0]);
+        }
+
+        [Test]
+        public void TestSliceMiddle()
+        {
+            NLPSentenceDetectorOp sentenceDetectorOp = OpenNLPOpsFactory.GetSentenceDetector(sentenceModelFile);
+            BreakIterator bi = new OpenNLPSentenceBreakIterator(sentenceDetectorOp);
+            bi.SetText(GetCharArrayIterator(PADDING + SENTENCES[0] + PADDING, PADDING.Length, SENTENCES[0].Length));
+
+            Test1Sentence(bi, SENTENCES[0]);
+        }
+
+        /** the current position must be ignored, initial position is always first() */
+        [Test]
+        public void TestFirstPosition()
+        {
+            NLPSentenceDetectorOp sentenceDetectorOp = OpenNLPOpsFactory.GetSentenceDetector(sentenceModelFile);
+            BreakIterator bi = new OpenNLPSentenceBreakIterator(sentenceDetectorOp);
+            bi.SetText(GetCharArrayIterator(SENTENCES[0]));
+            assertEquals(SENTENCES[0].Length, bi.Last()); // side-effect: set current position to last()
+            Test1Sentence(bi, SENTENCES[0]);
+        }
+
+        [Test]
+        public void TestWhitespaceOnly()
+        {
+            NLPSentenceDetectorOp sentenceDetectorOp = OpenNLPOpsFactory.GetSentenceDetector(sentenceModelFile);
+            BreakIterator bi = new OpenNLPSentenceBreakIterator(sentenceDetectorOp);
+            bi.SetText("   \n \n\n\r\n\t  \n");
+            Test0Sentences(bi);
+        }
+
+        [Test]
+        public void TestEmptyString()
+        {
+            NLPSentenceDetectorOp sentenceDetectorOp = OpenNLPOpsFactory.GetSentenceDetector(sentenceModelFile);
+            BreakIterator bi = new OpenNLPSentenceBreakIterator(sentenceDetectorOp);
+            bi.SetText("");
+            Test0Sentences(bi);
+        }
+
+        private void Test0Sentences(BreakIterator bi)
+        {
+            assertEquals(0, bi.Current);
+            assertEquals(0, bi.First());
+            assertEquals(BreakIterator.Done, bi.Next());
+            assertEquals(0, bi.Last());
+            assertEquals(BreakIterator.Done, bi.Previous());
+            assertEquals(BreakIterator.Done, bi.Following(0));
+            assertEquals(BreakIterator.Done, bi.Preceding(0));
+            assertEquals(0, bi.First());
+            assertEquals(BreakIterator.Done, bi.Next(13));
+            assertEquals(BreakIterator.Done, bi.Next(-8));
+        }
+
+        internal static void assertEquals(object expected, object actual)
+        {
+            Assert.AreEqual(expected, actual);
+        }
+
+        internal static void assertEquals(long expected, long actual)
+        {
+            Assert.AreEqual(expected, actual);
+        }
+
+    }
+}
diff --git a/src/Lucene.Net.Tests.Analysis.OpenNLP/TestOpenNLPTokenizerFactory.cs b/src/Lucene.Net.Tests.Analysis.OpenNLP/TestOpenNLPTokenizerFactory.cs
new file mode 100644
index 0000000..51ae740
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.OpenNLP/TestOpenNLPTokenizerFactory.cs
@@ -0,0 +1,140 @@
+using Lucene.Net.Analysis.Util;
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+using System.IO;
+
+namespace Lucene.Net.Analysis.OpenNlp
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Tests the Tokenizer as well- the Tokenizer needs the OpenNLP model files,
+    /// which this can load from src/test-files/opennlp/solr/conf
+    /// </summary>
+    public class TestOpenNLPTokenizerFactory : BaseTokenStreamTestCase
+    {
+        private const String SENTENCES = "Sentence number 1 has 6 words. Sentence number 2, 5 words.";
+        private static String[] SENTENCES_punc = { "Sentence", "number", "1", "has", "6", "words", ".", "Sentence", "number", "2", ",", "5", "words", "." };
+        private static int[] SENTENCES_startOffsets = { 0, 9, 16, 18, 22, 24, 29, 31, 40, 47, 48, 50, 52, 57 };
+        private static int[] SENTENCES_endOffsets = { 8, 15, 17, 21, 23, 29, 30, 39, 46, 48, 49, 51, 57, 58 };
+
+        private const String SENTENCE1 = "Sentence number 1 has 6 words.";
+        private static String[] SENTENCE1_punc = { "Sentence", "number", "1", "has", "6", "words", "." };
+
+        [Test]
+        public void TestTokenizer()
+        {
+            Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldname, reader) =>
+            {
+                var tokenizerFactory = new OpenNLPTokenizerFactory(new Dictionary<string, string> { { "sentenceModel", "en-test-sent.bin" }, { "tokenizerModel", "en-test-tokenizer.bin" } });
+                var tokenizer = tokenizerFactory.Create(reader);
+                return new TokenStreamComponents(tokenizer);
+            });
+            //CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+            //    .withTokenizer("opennlp", "sentenceModel", "en-test-sent.bin", "tokenizerModel", "en-test-tokenizer.bin")
+            //    .build();
+            AssertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets);
+            AssertAnalyzesTo(analyzer, SENTENCE1, SENTENCE1_punc);
+        }
+
+        [Test]
+        public void TestTokenizerNoSentenceDetector()
+        {
+            var expected = Assert.Throws<ArgumentException>(() =>
+            {
+                Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldname, reader) =>
+                {
+                    var tokenizerFactory = new OpenNLPTokenizerFactory(new Dictionary<string, string> { { "tokenizerModel", "en-test-tokenizer.bin" } });
+                    var tokenizer = tokenizerFactory.Create(reader);
+                    return new TokenStreamComponents(tokenizer);
+                });
+                analyzer.GetTokenStream("", "");
+            });
+
+            //        IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
+            //          CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+            //              .withTokenizer("opennlp", "tokenizerModel", "en-test-tokenizer.bin")
+            //              .build();
+            //});
+            assertTrue(expected.Message.Contains("Configuration Error: missing parameter 'sentenceModel'"));
+        }
+
+        [Test]
+        public void TestTokenizerNoTokenizer()
+        {
+            //Analyzer analyzer2 = Analyzer.NewAnonymous(createComponents: (fieldname, reader) =>
+            //{
+            //    var tokenizerFactory = new OpenNLPTokenizerFactory(new Dictionary<string, string> { { "sentenceModel", "en-test-sent.bin" } });
+            //    tokenizerFactory.Inform(new ClasspathResourceLoader(GetType()));
+            //    var tokenizer = tokenizerFactory.Create(reader);
+            //    return new TokenStreamComponents(tokenizer);
+            //});
+            //analyzer2.GetTokenStream("", "");
+
+            var expected = Assert.Throws<ArgumentException>(() =>
+            {
+                Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldname, reader) =>
+                {
+                    var tokenizerFactory = new OpenNLPTokenizerFactory(new Dictionary<string, string> { { "sentenceModel", "en-test-sent.bin" } });
+                    var tokenizer = tokenizerFactory.Create(reader);
+                    return new TokenStreamComponents(tokenizer);
+                });
+                analyzer.GetTokenStream("", "");
+            });
+
+            //        IllegalArgumentException expected = expectThrows(ArgumentException.class, () -> {
+            //          CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+            //              .withTokenizer("opennlp", "sentenceModel", "en-test-sent.bin")
+            //              .build();
+            //});
+            assertTrue(expected.Message.Contains("Configuration Error: missing parameter 'tokenizerModel'"));
+        }
+
+        // test analyzer caching the tokenizer
+        [Test]
+        public void TestClose()
+        {
+            IDictionary<String, String> args = new Dictionary<String, String>()
+            {
+                { "sentenceModel", "en-test-sent.bin" },
+                { "tokenizerModel", "en-test-tokenizer.bin" }
+            };
+            OpenNLPTokenizerFactory factory = new OpenNLPTokenizerFactory(args);
+            factory.Inform(new ClasspathResourceLoader(GetType()));
+
+            Tokenizer ts = factory.Create(NewAttributeFactory(), new StringReader(SENTENCES));
+            //ts.SetReader(new StringReader(SENTENCES));
+
+            ts.Reset();
+            ts.Dispose();
+            ts.Reset();
+            ts.SetReader(new StringReader(SENTENCES));
+            AssertTokenStreamContents(ts, SENTENCES_punc);
+            ts.Dispose();
+            ts.Reset();
+            ts.SetReader(new StringReader(SENTENCES));
+            AssertTokenStreamContents(ts, SENTENCES_punc);
+        }
+
+        internal static void assertTrue(bool condition)
+        {
+            Assert.IsTrue(condition);
+        }
+    }
+}
diff --git a/src/Lucene.Net.Tests.Analysis.OpenNLP/en-test-chunker.bin b/src/Lucene.Net.Tests.Analysis.OpenNLP/en-test-chunker.bin
new file mode 100644
index 0000000..b9bfdb4
Binary files /dev/null and b/src/Lucene.Net.Tests.Analysis.OpenNLP/en-test-chunker.bin differ
diff --git a/src/Lucene.Net.Tests.Analysis.OpenNLP/en-test-lemmas.dict b/src/Lucene.Net.Tests.Analysis.OpenNLP/en-test-lemmas.dict
new file mode 100644
index 0000000..d1d486c
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.OpenNLP/en-test-lemmas.dict
@@ -0,0 +1,12 @@
+they	NNP	they
+sent	VBD	send
+him	PRP	he
+running	VBG	run
+in	IN	in
+the	DT	the
+evening	NN	evening
+he	PRP	he
+did	VBD	do
+not	RB	not
+come	VB	come
+back	RB	back
diff --git a/src/Lucene.Net.Tests.Analysis.OpenNLP/en-test-lemmatizer.bin b/src/Lucene.Net.Tests.Analysis.OpenNLP/en-test-lemmatizer.bin
new file mode 100644
index 0000000..aac6ce2
Binary files /dev/null and b/src/Lucene.Net.Tests.Analysis.OpenNLP/en-test-lemmatizer.bin differ
diff --git a/src/Lucene.Net.Tests.Analysis.OpenNLP/en-test-ner.bin b/src/Lucene.Net.Tests.Analysis.OpenNLP/en-test-ner.bin
new file mode 100644
index 0000000..b4d8cdc
Binary files /dev/null and b/src/Lucene.Net.Tests.Analysis.OpenNLP/en-test-ner.bin differ
diff --git a/src/Lucene.Net.Tests.Analysis.OpenNLP/en-test-pos-maxent.bin b/src/Lucene.Net.Tests.Analysis.OpenNLP/en-test-pos-maxent.bin
new file mode 100644
index 0000000..a5f1481
Binary files /dev/null and b/src/Lucene.Net.Tests.Analysis.OpenNLP/en-test-pos-maxent.bin differ
diff --git a/src/Lucene.Net.Tests.Analysis.OpenNLP/en-test-sent.bin b/src/Lucene.Net.Tests.Analysis.OpenNLP/en-test-sent.bin
new file mode 100644
index 0000000..6e19e6b
Binary files /dev/null and b/src/Lucene.Net.Tests.Analysis.OpenNLP/en-test-sent.bin differ
diff --git a/src/Lucene.Net.Tests.Analysis.OpenNLP/en-test-tokenizer.bin b/src/Lucene.Net.Tests.Analysis.OpenNLP/en-test-tokenizer.bin
new file mode 100644
index 0000000..796a744
Binary files /dev/null and b/src/Lucene.Net.Tests.Analysis.OpenNLP/en-test-tokenizer.bin differ
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/TestOpenNLPChunkerFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/TestOpenNLPChunkerFilterFactory.cs
new file mode 100644
index 0000000..d767d51
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/TestOpenNLPChunkerFilterFactory.cs
@@ -0,0 +1,86 @@
+//using Lucene.Net.Analysis;
+//using Lucene.Net.Analysis.Util;
+//using NUnit.Framework;
+//using System;
+//using System.Collections.Generic;
+//using System.Linq;
+//using System.Text;
+
+//namespace Lucene.Net.Analysis.OpenNlp
+//{
+//    /// <summary>
+//    /// Needs the OpenNLP Tokenizer because it creates full streams of punctuation.
+//    /// Needs the OpenNLP POS tagger for the POS tags.
+//    /// <para/>
+//    /// Tagging models are created from tiny test data in opennlp/tools/test-model-data/ and are not very accurate.
+//    /// </summary>
+//    public class TestOpenNLPChunkerFilterFactory : BaseTokenStreamTestCase
+//    {
+//        private const String SENTENCES = "Sentence number 1 has 6 words. Sentence number 2, 5 words.";
+//        private static readonly String[] SENTENCES_punc
+//            = {"Sentence", "number", "1", "has", "6", "words", ".", "Sentence", "number", "2", ",", "5", "words", "."};
+//        private static readonly int[] SENTENCES_startOffsets = { 0, 9, 16, 18, 22, 24, 29, 31, 40, 47, 48, 50, 52, 57 };
+//        private static readonly int[] SENTENCES_endOffsets = { 8, 15, 17, 21, 23, 29, 30, 39, 46, 48, 49, 51, 57, 58 };
+//        private static readonly String[] SENTENCES_chunks
+//            = { "B-NP", "I-NP", "I-NP", "B-VP", "B-NP", "I-NP", "O", "B-NP", "I-NP", "I-NP", "O", "B-NP", "I-NP", "O" };
+
+//  private const String sentenceModelFile = "en-test-sent.bin";
+//  private const String tokenizerModelFile = "en-test-tokenizer.bin";
+//  private const String posTaggerModelFile = "en-test-pos-maxent.bin";
+//  private const String chunkerModelFile = "en-test-chunker.bin";
+
+
+//  //private static byte[][] toPayloads(params string[] strings)
+//  //      {
+//  //          //return Arrays.stream(strings).map(s->s == null ? null : s.getBytes(StandardCharsets.UTF_8)).toArray(byte[][]::new);
+//  //          return strings.SelectMany(s => s == null ? null : Encoding.UTF8.GetBytes(s)).ToArray();
+//  //      }
+
+//        [Test]
+//        public void TestBasic() 
+//        {
+//            //    //    CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+//            //    //.withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+//            //    //.addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+//            //    //.addTokenFilter("opennlpChunker", "chunkerModel", chunkerModelFile)
+//            //    //.build();
+
+//            Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+//            {
+//                var loader = new ClasspathResourceLoader(GetType());
+
+//                var opennlpFactory = new OpenNLPTokenizerFactory(new Dictionary<string, string> { { "tokenizerModel", tokenizerModelFile }, { "sentenceModel", sentenceModelFile } });
+//                opennlpFactory.Inform(loader);
+//                var opennlp = opennlpFactory.Create(AttributeFactory.Def //new OpenNLPTokenizer(reader, new Tools.NLPSentenceDetectorOp(sentenceModelFile), new Tools.NLPTokenizerOp(tokenizerModelFile));
+//                var filter1 = new OpenNLPPOSFilter(opennlp, new Tools.NLPPOSTaggerOp(posTaggerModelFile));
+//                var filter2 = new OpenNLPChunkerFilter(filter1, new Tools.NLPChunkerOp(chunkerModelFile));
+//                return new TokenStreamComponents(opennlp, filter2);
+//            });
+
+//            AssertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets,
+//                SENTENCES_chunks, null, null, true);
+//        }
+
+//        [Test]
+//        public void TestPayloads() 
+//    {
+//        //    //CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+//        //    //.withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+//        //    //.addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+//        //    //.addTokenFilter("opennlpChunker", "chunkerModel", chunkerModelFile)
+//        //    //.addTokenFilter(TypeAsPayloadTokenFilterFactory.class)
+//        //    //.build();
+
+//        //    Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+//        //    {
+//        //        var opennlp = new OpenNLPTokenizer(reader, new Tools.NLPSentenceDetectorOp(sentenceModelFile), new Tools.NLPTokenizerOp(tokenizerModelFile));
+//        //        var filter1 = new OpenNLPPOSFilter(opennlp, new Tools.NLPPOSTaggerOp(posTaggerModelFile));
+//        //        var filter2 = new OpenNLPChunkerFilter(filter1, new Tools.NLPChunkerOp(chunkerModelFile));
+//        //        var filter3 = new Payloads.TypeAsPayloadTokenFilter(filter2);
+//        //        return new TokenStreamComponents(opennlp, filter3);
+//        //    });
+//        //    AssertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets,
+//        //null, null, null, true, toPayloads(SENTENCES_chunks));
+//}
+//    }
+//}


Mime
View raw message