lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nightowl...@apache.org
Subject [49/50] [abbrv] lucenenet git commit: Ported Analysis.Stempel + tests (closes #190)
Date Sun, 23 Oct 2016 13:02:35 GMT
Ported Analysis.Stempel + tests (closes #190)


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/29525086
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/29525086
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/29525086

Branch: refs/heads/master
Commit: 2952508699645b571a2b960afaedc725252e168c
Parents: 4dbc359
Author: Shad Storhaug <shad@shadstorhaug.com>
Authored: Sun Oct 2 21:37:26 2016 +0700
Committer: Shad Storhaug <shad@shadstorhaug.com>
Committed: Sun Oct 23 19:44:24 2016 +0700

----------------------------------------------------------------------
 Lucene.Net.sln                                  |  28 ++
 .../Egothor.Stemmer/Cell.cs                     | 105 +++++
 .../Egothor.Stemmer/Compile.cs                  | 222 +++++++++
 .../Egothor.Stemmer/Diff.cs                     | 332 +++++++++++++
 .../Egothor.Stemmer/DiffIt.cs                   | 144 ++++++
 .../Egothor.Stemmer/Gener.cs                    | 139 ++++++
 .../Egothor.Stemmer/Lift.cs                     | 165 +++++++
 .../Egothor.Stemmer/MultiTrie.cs                | 213 +++++++++
 .../Egothor.Stemmer/MultiTrie2.cs               | 421 +++++++++++++++++
 .../Egothor.Stemmer/Optimizer.cs                | 227 +++++++++
 .../Egothor.Stemmer/Optimizer2.cs               |  92 ++++
 .../Egothor.Stemmer/Reduce.cs                   | 143 ++++++
 .../Egothor.Stemmer/Row.cs                      | 342 ++++++++++++++
 .../Egothor.Stemmer/Trie.cs                     | 472 +++++++++++++++++++
 .../Lucene.Net.Analysis.Stempel.csproj          |  87 ++++
 .../Pl/PolishAnalyzer.cs                        | 164 +++++++
 .../Pl/stemmer_20000.tbl                        | Bin 0 -> 2225192 bytes
 .../Pl/stopwords.txt                            | 186 ++++++++
 .../Properties/AssemblyInfo.cs                  |  39 ++
 .../RectangularArrays.cs                        |  52 ++
 .../Stempel/StempelFilter.cs                    |  91 ++++
 .../Stempel/StempelPolishStemFilterFactory.cs   |  48 ++
 .../Stempel/StempelStemmer.cs                   | 105 +++++
 src/Lucene.Net.Core/Lucene.Net.csproj           |   4 +
 src/Lucene.Net.Core/Support/DataInputStream.cs  | 323 +++++++++++++
 src/Lucene.Net.Core/Support/DataOutputStream.cs | 256 ++++++++++
 src/Lucene.Net.Core/Support/IDataInput.cs       |  24 +
 src/Lucene.Net.Core/Support/IDataOutput.cs      |  23 +
 .../Egothor.Stemmer/TestCompile.cs              | 211 +++++++++
 .../Egothor.Stemmer/TestStemmer.cs              | 191 ++++++++
 .../Egothor.Stemmer/testRules.txt               |   4 +
 .../Lucene.Net.Tests.Analysis.Stempel.csproj    |  89 ++++
 .../Pl/TestPolishAnalyzer.cs                    | 102 ++++
 .../Properties/AssemblyInfo.cs                  |  36 ++
 .../TestStempelPolishStemFilterFactory.cs       |  56 +++
 .../packages.config                             |   4 +
 36 files changed, 5140 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/Lucene.Net.sln
----------------------------------------------------------------------
diff --git a/Lucene.Net.sln b/Lucene.Net.sln
index 0322498..c87a7be 100644
--- a/Lucene.Net.sln
+++ b/Lucene.Net.sln
@@ -64,6 +64,10 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Memory", "src\Lu
 EndProject
 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Tests.Memory", "src\Lucene.Net.Tests.Memory\Lucene.Net.Tests.Memory.csproj", "{7F9378BF-C88D-46FF-9AE8-5E7D8C0225D3}"
 EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Analysis.Stempel", "src\Lucene.Net.Analysis.Stempel\Lucene.Net.Analysis.Stempel.csproj", "{A76DAD88-E3A5-40F9-9114-FACD77BD8265}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Tests.Analysis.Stempel", "src\Lucene.Net.Tests.Analysis.Stempel\Lucene.Net.Tests.Analysis.Stempel.csproj", "{940A6AB1-F00A-40E2-BC1A-2898EFA8C48F}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
@@ -355,6 +359,30 @@ Global
 		{7F9378BF-C88D-46FF-9AE8-5E7D8C0225D3}.Release|Mixed Platforms.Build.0 = Release|Any CPU
 		{7F9378BF-C88D-46FF-9AE8-5E7D8C0225D3}.Release|x86.ActiveCfg = Release|Any CPU
 		{7F9378BF-C88D-46FF-9AE8-5E7D8C0225D3}.Release|x86.Build.0 = Release|Any CPU
+		{A76DAD88-E3A5-40F9-9114-FACD77BD8265}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{A76DAD88-E3A5-40F9-9114-FACD77BD8265}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{A76DAD88-E3A5-40F9-9114-FACD77BD8265}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU
+		{A76DAD88-E3A5-40F9-9114-FACD77BD8265}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU
+		{A76DAD88-E3A5-40F9-9114-FACD77BD8265}.Debug|x86.ActiveCfg = Debug|Any CPU
+		{A76DAD88-E3A5-40F9-9114-FACD77BD8265}.Debug|x86.Build.0 = Debug|Any CPU
+		{A76DAD88-E3A5-40F9-9114-FACD77BD8265}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{A76DAD88-E3A5-40F9-9114-FACD77BD8265}.Release|Any CPU.Build.0 = Release|Any CPU
+		{A76DAD88-E3A5-40F9-9114-FACD77BD8265}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
+		{A76DAD88-E3A5-40F9-9114-FACD77BD8265}.Release|Mixed Platforms.Build.0 = Release|Any CPU
+		{A76DAD88-E3A5-40F9-9114-FACD77BD8265}.Release|x86.ActiveCfg = Release|Any CPU
+		{A76DAD88-E3A5-40F9-9114-FACD77BD8265}.Release|x86.Build.0 = Release|Any CPU
+		{940A6AB1-F00A-40E2-BC1A-2898EFA8C48F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{940A6AB1-F00A-40E2-BC1A-2898EFA8C48F}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{940A6AB1-F00A-40E2-BC1A-2898EFA8C48F}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU
+		{940A6AB1-F00A-40E2-BC1A-2898EFA8C48F}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU
+		{940A6AB1-F00A-40E2-BC1A-2898EFA8C48F}.Debug|x86.ActiveCfg = Debug|Any CPU
+		{940A6AB1-F00A-40E2-BC1A-2898EFA8C48F}.Debug|x86.Build.0 = Debug|Any CPU
+		{940A6AB1-F00A-40E2-BC1A-2898EFA8C48F}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{940A6AB1-F00A-40E2-BC1A-2898EFA8C48F}.Release|Any CPU.Build.0 = Release|Any CPU
+		{940A6AB1-F00A-40E2-BC1A-2898EFA8C48F}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
+		{940A6AB1-F00A-40E2-BC1A-2898EFA8C48F}.Release|Mixed Platforms.Build.0 = Release|Any CPU
+		{940A6AB1-F00A-40E2-BC1A-2898EFA8C48F}.Release|x86.ActiveCfg = Release|Any CPU
+		{940A6AB1-F00A-40E2-BC1A-2898EFA8C48F}.Release|x86.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Cell.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Cell.cs b/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Cell.cs
new file mode 100644
index 0000000..b1fa11c
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Cell.cs
@@ -0,0 +1,105 @@
+/*
+                    Egothor Software License version 1.00
+                    Copyright (C) 1997-2004 Leo Galambos.
+                 Copyright (C) 2002-2004 "Egothor developers"
+                      on behalf of the Egothor Project.
+                             All rights reserved.
+
+   This  software  is  copyrighted  by  the "Egothor developers". If this
+   license applies to a single file or document, the "Egothor developers"
+   are the people or entities mentioned as copyright holders in that file
+   or  document.  If  this  license  applies  to the Egothor project as a
+   whole,  the  copyright holders are the people or entities mentioned in
+   the  file CREDITS. This file can be found in the same location as this
+   license in the distribution.
+
+   Redistribution  and  use  in  source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+    1. Redistributions  of  source  code  must retain the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       following disclaimer.
+    2. Redistributions  in binary form must reproduce the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       disclaimer  that  follows  these  conditions  in the documentation
+       and/or other materials provided with the distribution.
+    3. The name "Egothor" must not be used to endorse or promote products
+       derived  from  this software without prior written permission. For
+       written permission, please contact Leo.G@seznam.cz
+    4. Products  derived  from this software may not be called "Egothor",
+       nor  may  "Egothor"  appear  in  their name, without prior written
+       permission from Leo.G@seznam.cz.
+
+   In addition, we request that you include in the end-user documentation
+   provided  with  the  redistribution  and/or  in the software itself an
+   acknowledgement equivalent to the following:
+   "This product includes software developed by the Egothor Project.
+    http://egothor.sf.net/"
+
+   THIS  SOFTWARE  IS  PROVIDED  ``AS  IS''  AND ANY EXPRESSED OR IMPLIED
+   WARRANTIES,  INCLUDING,  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+   MERCHANTABILITY  AND  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+   IN  NO  EVENT  SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+   FOR   ANY   DIRECT,   INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR
+   CONSEQUENTIAL  DAMAGES  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE  GOODS  OR  SERVICES;  LOSS  OF  USE,  DATA, OR PROFITS; OR
+   BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER  IN  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+   OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+   IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   This  software  consists  of  voluntary  contributions  made  by  many
+   individuals  on  behalf  of  the  Egothor  Project  and was originally
+   created by Leo Galambos (Leo.G@seznam.cz).
+ */
+namespace Egothor.Stemmer
+{
+    /// <summary>
+    /// A <see cref="Cell"/> is a portion of a <see cref="Trie"/>.
+    /// </summary>
+    public class Cell
+    {
+        /// <summary>
+        /// next row id in this way
+        /// </summary>
+        internal int @ref = -1;
+        /// <summary>
+        /// command of the cell
+        /// </summary>
+        internal int cmd = -1;
+        /// <summary>
+        /// how many cmd-s was in subtrie before Pack()
+        /// </summary>
+        internal int cnt = 0;
+        /// <summary>
+        /// how many chars would be discarded from input key in this way
+        /// </summary>
+        internal int skip = 0;
+
+        /// <summary>
+        /// Constructor for the <see cref="Cell"/> object.
+        /// </summary>
+        internal Cell() { }
+
+        /// <summary>
+        /// Construct a <see cref="Cell"/> using the properties of the given <see cref="Cell"/>.
+        /// </summary>
+        /// <param name="a">the <see cref="Cell"/> whose properties will be used</param>
+        internal Cell(Cell a)
+        {
+            @ref = a.@ref;
+            cmd = a.cmd;
+            cnt = a.cnt;
+            skip = a.skip;
+        }
+
+        /// <summary>
+        /// Return a string containing this <see cref="Cell"/>'s attributes.
+        /// </summary>
+        /// <returns>a string representation of this <see cref="Cell"/></returns>
+        public override string ToString()
+        {
+            return "ref(" + @ref +")cmd(" + cmd + ")cnt(" + cnt + ")skp(" + skip + ")";
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Compile.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Compile.cs b/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Compile.cs
new file mode 100644
index 0000000..20cb46a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Compile.cs
@@ -0,0 +1,222 @@
+using Lucene.Net.Support;
+using System;
+using System.IO;
+using System.Text;
+
+/*
+                    Egothor Software License version 1.00
+                    Copyright (C) 1997-2004 Leo Galambos.
+                 Copyright (C) 2002-2004 "Egothor developers"
+                      on behalf of the Egothor Project.
+                             All rights reserved.
+
+   This  software  is  copyrighted  by  the "Egothor developers". If this
+   license applies to a single file or document, the "Egothor developers"
+   are the people or entities mentioned as copyright holders in that file
+   or  document.  If  this  license  applies  to the Egothor project as a
+   whole,  the  copyright holders are the people or entities mentioned in
+   the  file CREDITS. This file can be found in the same location as this
+   license in the distribution.
+
+   Redistribution  and  use  in  source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+    1. Redistributions  of  source  code  must retain the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       following disclaimer.
+    2. Redistributions  in binary form must reproduce the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       disclaimer  that  follows  these  conditions  in the documentation
+       and/or other materials provided with the distribution.
+    3. The name "Egothor" must not be used to endorse or promote products
+       derived  from  this software without prior written permission. For
+       written permission, please contact Leo.G@seznam.cz
+    4. Products  derived  from this software may not be called "Egothor",
+       nor  may  "Egothor"  appear  in  their name, without prior written
+       permission from Leo.G@seznam.cz.
+
+   In addition, we request that you include in the end-user documentation
+   provided  with  the  redistribution  and/or  in the software itself an
+   acknowledgement equivalent to the following:
+   "This product includes software developed by the Egothor Project.
+    http://egothor.sf.net/"
+
+   THIS  SOFTWARE  IS  PROVIDED  ``AS  IS''  AND ANY EXPRESSED OR IMPLIED
+   WARRANTIES,  INCLUDING,  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+   MERCHANTABILITY  AND  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+   IN  NO  EVENT  SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+   FOR   ANY   DIRECT,   INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR
+   CONSEQUENTIAL  DAMAGES  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE  GOODS  OR  SERVICES;  LOSS  OF  USE,  DATA, OR PROFITS; OR
+   BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER  IN  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+   OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+   IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   This  software  consists  of  voluntary  contributions  made  by  many
+   individuals  on  behalf  of  the  Egothor  Project  and was originally
+   created by Leo Galambos (Leo.G@seznam.cz).
+ */
+namespace Egothor.Stemmer
+{
+    /// <summary>
+    /// The Compile class is used to compile a stemmer table.
+    /// </summary>
+    public class Compile
+    {
+        static bool backward;
+        static bool multi;
+        static Trie trie;
+
+        /// <summary>
+        /// no instantiation
+        /// </summary>
+        private Compile() { }
+
+        /**
+         * Entry point to the Compile application.
+         * <p>
+         * This program takes any number of arguments: the first is the name of the
+         * desired stemming algorithm to use (a list is available in the package
+         * description) , all of the rest should be the path or paths to a file or
+         * files containing a stemmer table to compile.
+         * 
+         * @param args the command line arguments
+         */
+        public static void Main(string[] args)
+        {
+            if (args.Length < 1)
+            {
+                return;
+            }
+
+            args[0].ToUpperInvariant();
+
+            backward = args[0][0] == '-';
+            int qq = (backward) ? 1 : 0;
+            bool storeorig = false;
+
+            if (args[0][qq] == '0')
+            {
+                storeorig = true;
+                qq++;
+            }
+
+            multi = args[0][qq] == 'M';
+            if (multi)
+            {
+                qq++;
+            }
+
+            // LUCENENET TODO: Is this any different than Encoding.UTF8?
+            //String charset = System.getProperty("egothor.stemmer.charset", "UTF-8");
+
+            char[] optimizer = new char[args[0].Length - qq];
+            for (int i = 0; i < optimizer.Length; i++)
+            {
+                optimizer[i] = args[0][qq + i];
+            }
+
+            for (int i = 1; i < args.Length; i++)
+            {
+                TextReader @in;
+                // System.out.println("[" + args[i] + "]");
+                Diff diff = new Diff();
+                //int stems = 0; // not used
+                int words = 0;
+
+
+                AllocTrie();
+
+                Console.WriteLine(args[i]);
+                using (@in = new StreamReader(
+                    new FileStream(args[i], FileMode.Open, FileAccess.Read), Encoding.UTF8))
+                {
+                    for (string line = @in.ReadLine(); line != null; line = @in.ReadLine())
+                    {
+                        try
+                        {
+                            line = line.ToLowerInvariant();
+                            StringTokenizer st = new StringTokenizer(line);
+                            string stem = st.NextToken();
+                            if (storeorig)
+                            {
+                                trie.Add(stem, "-a");
+                                words++;
+                            }
+                            while (st.HasMoreTokens())
+                            {
+                                string token = st.NextToken();
+                                if (token.Equals(stem) == false)
+                                {
+                                    trie.Add(token, diff.Exec(token, stem));
+                                    words++;
+                                }
+                            }
+                        }
+                        catch (InvalidOperationException /*x*/)
+                        {
+                            // no base token (stem) on a line
+                        }
+                    }
+                }
+
+                Optimizer o = new Optimizer();
+                Optimizer2 o2 = new Optimizer2();
+                Lift l = new Lift(true);
+                Lift e = new Lift(false);
+                Gener g = new Gener();
+
+                for (int j = 0; j < optimizer.Length; j++)
+                {
+                    string prefix;
+                    switch (optimizer[j])
+                    {
+                        case 'G':
+                            trie = trie.Reduce(g);
+                            prefix = "G: ";
+                            break;
+                        case 'L':
+                            trie = trie.Reduce(l);
+                            prefix = "L: ";
+                            break;
+                        case 'E':
+                            trie = trie.Reduce(e);
+                            prefix = "E: ";
+                            break;
+                        case '2':
+                            trie = trie.Reduce(o2);
+                            prefix = "2: ";
+                            break;
+                        case '1':
+                            trie = trie.Reduce(o);
+                            prefix = "1: ";
+                            break;
+                        default:
+                            continue;
+                    }
+                    trie.PrintInfo(System.Console.Out, prefix + " ");
+                }
+
+                using (DataOutputStream os = new DataOutputStream(
+                    new FileStream(args[i] + ".out", FileMode.OpenOrCreate, FileAccess.Write)))
+                {
+                    os.WriteUTF(args[0]);
+                    trie.Store(os);
+                }
+            }
+        }
+
+        internal static void AllocTrie()
+        {
+            if (multi)
+            {
+                trie = new MultiTrie2(!backward);
+            }
+            else
+            {
+                trie = new Trie(!backward);
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Diff.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Diff.cs b/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Diff.cs
new file mode 100644
index 0000000..e5e372e
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Diff.cs
@@ -0,0 +1,332 @@
+using System;
+using System.Text;
+
+/*
+                    Egothor Software License version 1.00
+                    Copyright (C) 1997-2004 Leo Galambos.
+                 Copyright (C) 2002-2004 "Egothor developers"
+                      on behalf of the Egothor Project.
+                             All rights reserved.
+
+   This  software  is  copyrighted  by  the "Egothor developers". If this
+   license applies to a single file or document, the "Egothor developers"
+   are the people or entities mentioned as copyright holders in that file
+   or  document.  If  this  license  applies  to the Egothor project as a
+   whole,  the  copyright holders are the people or entities mentioned in
+   the  file CREDITS. This file can be found in the same location as this
+   license in the distribution.
+
+   Redistribution  and  use  in  source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+    1. Redistributions  of  source  code  must retain the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       following disclaimer.
+    2. Redistributions  in binary form must reproduce the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       disclaimer  that  follows  these  conditions  in the documentation
+       and/or other materials provided with the distribution.
+    3. The name "Egothor" must not be used to endorse or promote products
+       derived  from  this software without prior written permission. For
+       written permission, please contact Leo.G@seznam.cz
+    4. Products  derived  from this software may not be called "Egothor",
+       nor  may  "Egothor"  appear  in  their name, without prior written
+       permission from Leo.G@seznam.cz.
+
+   In addition, we request that you include in the end-user documentation
+   provided  with  the  redistribution  and/or  in the software itself an
+   acknowledgement equivalent to the following:
+   "This product includes software developed by the Egothor Project.
+    http://egothor.sf.net/"
+
+   THIS  SOFTWARE  IS  PROVIDED  ``AS  IS''  AND ANY EXPRESSED OR IMPLIED
+   WARRANTIES,  INCLUDING,  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+   MERCHANTABILITY  AND  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+   IN  NO  EVENT  SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+   FOR   ANY   DIRECT,   INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR
+   CONSEQUENTIAL  DAMAGES  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE  GOODS  OR  SERVICES;  LOSS  OF  USE,  DATA, OR PROFITS; OR
+   BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER  IN  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+   OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+   IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   This  software  consists  of  voluntary  contributions  made  by  many
+   individuals  on  behalf  of  the  Egothor  Project  and was originally
+   created by Leo Galambos (Leo.G@seznam.cz).
+ */
+
+namespace Egothor.Stemmer
+{
+    /// <summary>
+    /// The Diff object generates a patch string.
+    /// <para>
+    /// A patch string is actually a command to a stemmer telling it how to reduce a
+    /// word to its root. For example, to reduce the word teacher to its root teach
+    /// the patch string Db would be generated. This command tells the stemmer to
+    /// delete the last 2 characters from the word teacher to reach the stem (the
+    /// patch commands are applied starting from the last character in order to save
+    /// </para>
+    /// </summary>
+    public class Diff
+    {
+        int sizex = 0;
+        int sizey = 0;
+        int[][] net;
+        int[][] way;
+
+        int INSERT;
+        int DELETE;
+        int REPLACE;
+        int NOOP;
+
+        /// <summary>
+        /// Constructor for the Diff object.
+        /// </summary>
+        public Diff()
+            : this(1, 1, 1, 0)
+        {
+        }
+
+        /// <summary>
+        /// Constructor for the Diff object
+        /// </summary>
+        /// <param name="ins">Description of the Parameter</param>
+        /// <param name="del">Description of the Parameter</param>
+        /// <param name="rep">Description of the Parameter</param>
+        /// <param name="noop">Description of the Parameter</param>
+        public Diff(int ins, int del, int rep, int noop)
+        {
+            INSERT = ins;
+            DELETE = del;
+            REPLACE = rep;
+            NOOP = noop;
+        }
+
+        /// <summary>
+        /// Apply the given patch string <paramref name="diff"/> to the given string
+        /// <paramref name="dest"/>
+        /// </summary>
+        /// <param name="dest">Destination string</param>
+        /// <param name="diff">Patch string</param>
+        public static void Apply(StringBuilder dest, string diff)
+        {
+            try
+            {
+
+                if (diff == null)
+                {
+                    return;
+                }
+
+                int pos = dest.Length - 1;
+                if (pos < 0)
+                {
+                    return;
+                }
+                // orig == ""
+                for (int i = 0; i < diff.Length / 2; i++)
+                {
+                    char cmd = diff[2 * i];
+                    char param = diff[2 * i + 1];
+                    int par_num = (param - 'a' + 1);
+                    switch (cmd)
+                    {
+                        case '-':
+                            pos = pos - par_num + 1;
+                            break;
+                        case 'R':
+                            dest[pos] = param;
+                            break;
+                        case 'D':
+                            int o = pos;
+                            pos -= par_num - 1;
+                            /*
+                             * delete par_num chars from index pos
+                             */
+                            // String s = orig.toString();
+                            // s = s.substring( 0, pos ) + s.substring( o + 1 );
+                            // orig = new StringBuffer( s );
+                            dest.Remove(pos, (o + 1) - pos);
+                            break;
+                        case 'I':
+                            dest.Insert(pos += 1, param);
+                            break;
+                    }
+                    pos--;
+                }
+            }
+            catch (IndexOutOfRangeException /*x*/)
+            {
+                // x.printStackTrace();
+            }
+            catch (ArgumentOutOfRangeException /*x*/)
+            {
+                // x.printStackTrace();
+            }
+        }
+
+        /// <summary>
+        /// Construct a patch string that transforms a to b.
+        /// </summary>
+        /// <param name="a">1st string</param>
+        /// <param name="b">2nd string</param>
+        /// <returns></returns>
+        public string Exec(string a, string b)
+        {
+            if (a == null || b == null)
+            {
+                return null;
+            }
+
+            int x;
+            int y;
+            int maxx;
+            int maxy;
+            int[] go = new int[4];
+            const int X = 1;
+            const int Y = 2;
+            const int R = 3;
+            const int D = 0;
+
+            /*
+             * setup memory if needed => processing speed up
+             */
+            maxx = a.Length + 1;
+            maxy = b.Length + 1;
+            if ((maxx >= sizex) || (maxy >= sizey))
+            {
+                sizex = maxx + 8;
+                sizey = maxy + 8;
+                net = RectangularArrays.ReturnRectangularIntArray(sizex, sizey);
+                way = RectangularArrays.ReturnRectangularIntArray(sizex, sizey);
+            }
+
+            /*
+             * clear the network
+             */
+            for (x = 0; x < maxx; x++)
+            {
+                for (y = 0; y < maxy; y++)
+                {
+                    net[x][y] = 0;
+                }
+            }
+
+            /*
+             * set known persistent values
+             */
+            for (x = 1; x < maxx; x++)
+            {
+                net[x][0] = x;
+                way[x][0] = X;
+            }
+            for (y = 1; y < maxy; y++)
+            {
+                net[0][y] = y;
+                way[0][y] = Y;
+            }
+
+            for (x = 1; x < maxx; x++)
+            {
+                for (y = 1; y < maxy; y++)
+                {
+                    go[X] = net[x - 1][y] + DELETE;
+                    // way on x costs 1 unit
+                    go[Y] = net[x][y - 1] + INSERT;
+                    // way on y costs 1 unit
+                    go[R] = net[x - 1][y - 1] + REPLACE;
+                    go[D] = net[x - 1][y - 1]
+                        + ((a[x - 1] == b[y - 1]) ? NOOP : 100);
+                    // diagonal costs 0, when no change
+                    ushort min = (ushort)D;
+                    if (go[min] >= go[X])
+                    {
+                        min = (ushort)X;
+                    }
+                    if (go[min] > go[Y])
+                    {
+                        min = (ushort)Y;
+                    }
+                    if (go[min] > go[R])
+                    {
+                        min = (ushort)R;
+                    }
+                    way[x][y] = min;
+                    net[x][y] = (ushort)go[min];
+                }
+            }
+
+            // read the patch string
+            StringBuilder result = new StringBuilder();
+            char @base = (char)('a' - 1);
+            char deletes = @base;
+            char equals = @base;
+            for (x = maxx - 1, y = maxy - 1; x + y != 0;)
+            {
+                switch (way[x][y])
+                {
+                    case X:
+                        if (equals != @base)
+                        {
+                            result.Append("-" + (equals));
+                            equals = @base;
+                        }
+                        deletes++;
+                        x--;
+                        break;
+                    // delete
+                    case Y:
+                        if (deletes != @base)
+                        {
+                            result.Append("D" + (deletes));
+                            deletes = @base;
+                        }
+                        if (equals != @base)
+                        {
+                            result.Append("-" + (equals));
+                            equals = @base;
+                        }
+                        result.Append('I');
+                        result.Append(b[--y]);
+                        break;
+                    // insert
+                    case R:
+                        if (deletes != @base)
+                        {
+                            result.Append("D" + (deletes));
+                            deletes = @base;
+                        }
+                        if (equals != @base)
+                        {
+                            result.Append("-" + (equals));
+                            equals = @base;
+                        }
+                        result.Append('R');
+                        result.Append(b[--y]);
+                        x--;
+                        break;
+                    // replace
+                    case D:
+                        if (deletes != @base)
+                        {
+                            result.Append("D" + (deletes));
+                            deletes = @base;
+                        }
+                        equals++;
+                        x--;
+                        y--;
+                        break;
+                        // no change
+                }
+            }
+            if (deletes != @base)
+            {
+                result.Append("D" + (deletes));
+                deletes = @base;
+            }
+
+            return result.ToString();
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/DiffIt.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/DiffIt.cs b/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/DiffIt.cs
new file mode 100644
index 0000000..01621bf
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/DiffIt.cs
@@ -0,0 +1,144 @@
+using Lucene.Net.Support;
+using System;
+using System.IO;
+using System.Text;
+
+/*
+                    Egothor Software License version 1.00
+                    Copyright (C) 1997-2004 Leo Galambos.
+                 Copyright (C) 2002-2004 "Egothor developers"
+                      on behalf of the Egothor Project.
+                             All rights reserved.
+
+   This  software  is  copyrighted  by  the "Egothor developers". If this
+   license applies to a single file or document, the "Egothor developers"
+   are the people or entities mentioned as copyright holders in that file
+   or  document.  If  this  license  applies  to the Egothor project as a
+   whole,  the  copyright holders are the people or entities mentioned in
+   the  file CREDITS. This file can be found in the same location as this
+   license in the distribution.
+
+   Redistribution  and  use  in  source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+    1. Redistributions  of  source  code  must retain the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       following disclaimer.
+    2. Redistributions  in binary form must reproduce the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       disclaimer  that  follows  these  conditions  in the documentation
+       and/or other materials provided with the distribution.
+    3. The name "Egothor" must not be used to endorse or promote products
+       derived  from  this software without prior written permission. For
+       written permission, please contact Leo.G@seznam.cz
+    4. Products  derived  from this software may not be called "Egothor",
+       nor  may  "Egothor"  appear  in  their name, without prior written
+       permission from Leo.G@seznam.cz.
+
+   In addition, we request that you include in the end-user documentation
+   provided  with  the  redistribution  and/or  in the software itself an
+   acknowledgement equivalent to the following:
+   "This product includes software developed by the Egothor Project.
+    http://egothor.sf.net/"
+
+   THIS  SOFTWARE  IS  PROVIDED  ``AS  IS''  AND ANY EXPRESSED OR IMPLIED
+   WARRANTIES,  INCLUDING,  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+   MERCHANTABILITY  AND  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+   IN  NO  EVENT  SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+   FOR   ANY   DIRECT,   INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR
+   CONSEQUENTIAL  DAMAGES  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE  GOODS  OR  SERVICES;  LOSS  OF  USE,  DATA, OR PROFITS; OR
+   BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER  IN  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+   OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+   IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   This  software  consists  of  voluntary  contributions  made  by  many
+   individuals  on  behalf  of  the  Egothor  Project  and was originally
+   created by Leo Galambos (Leo.G@seznam.cz).
+ */
+
+namespace Egothor.Stemmer
+{
+    /// <summary>
+    /// The DiffIt class is a means generate patch commands from an already prepared
+    /// stemmer table.
+    /// </summary>
+    public class DiffIt
+    {
+        /// <summary>
+        /// no instantiation
+        /// </summary>
+        private DiffIt() { }
+
+        internal static int Get(int i, string s)
+        {
+            int result;
+            if (!int.TryParse(s.Substring(i, 1), out result))
+            {
+                return 1;
+            }
+
+            return result;
+            //try
+            //{
+            //    return int.parseInt(s.substring(i, i + 1));
+            //}
+            //catch (Exception /*x*/)
+            //{
+            //    return 1;
+            //}
+        }
+
+        /// <summary>
+        /// Entry point to the DiffIt application.
+        /// <para>
+        /// This application takes one argument, the path to a file containing a
+        /// stemmer table. The program reads the file and generates the patch commands
+        /// for the stems.
+        /// </para>
+        /// </summary>
+        /// <param name="args">the path to a file containing a stemmer table</param>
+        public static void Main(string[] args)
+        {
+
+
+            int ins = Get(0, args[0]);
+            int del = Get(1, args[0]);
+            int rep = Get(2, args[0]);
+            int nop = Get(3, args[0]);
+
+            for (int i = 1; i < args.Length; i++)
+            {
+                TextReader @in;
+                // System.out.println("[" + args[i] + "]");
+                Diff diff = new Diff(ins, del, rep, nop);
+                // LUCENENET TODO: Is using Encoding.UTF8 good enough?
+                //String charset = System.getProperty("egothor.stemmer.charset", "UTF-8");
+                @in = new StreamReader(new FileStream(args[i], FileMode.Open, FileAccess.Read), Encoding.UTF8);
+                for (string line = @in.ReadLine(); line != null; line = @in.ReadLine())
+                {
+                    try
+                    {
+                        line = line.ToLowerInvariant();
+                        StringTokenizer st = new StringTokenizer(line);
+                        string stem = st.NextToken();
+                        Console.WriteLine(stem + " -a");
+                        while (st.HasMoreTokens())
+                        {
+                            String token = st.NextToken();
+                            if (token.Equals(stem) == false)
+                            {
+                                Console.WriteLine(stem + " " + diff.Exec(token, stem));
+                            }
+                        }
+                    }
+                    catch (InvalidOperationException /*x*/)
+                    {
+                        // no base token (stem) on a line
+                    }
+                }
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Gener.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Gener.cs b/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Gener.cs
new file mode 100644
index 0000000..bacfc68
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Gener.cs
@@ -0,0 +1,139 @@
+using Lucene.Net.Support;
+using System.Collections.Generic;
+
+/*
+                    Egothor Software License version 1.00
+                    Copyright (C) 1997-2004 Leo Galambos.
+                 Copyright (C) 2002-2004 "Egothor developers"
+                      on behalf of the Egothor Project.
+                             All rights reserved.
+
+   This  software  is  copyrighted  by  the "Egothor developers". If this
+   license applies to a single file or document, the "Egothor developers"
+   are the people or entities mentioned as copyright holders in that file
+   or  document.  If  this  license  applies  to the Egothor project as a
+   whole,  the  copyright holders are the people or entities mentioned in
+   the  file CREDITS. This file can be found in the same location as this
+   license in the distribution.
+
+   Redistribution  and  use  in  source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+    1. Redistributions  of  source  code  must retain the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       following disclaimer.
+    2. Redistributions  in binary form must reproduce the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       disclaimer  that  follows  these  conditions  in the documentation
+       and/or other materials provided with the distribution.
+    3. The name "Egothor" must not be used to endorse or promote products
+       derived  from  this software without prior written permission. For
+       written permission, please contact Leo.G@seznam.cz
+    4. Products  derived  from this software may not be called "Egothor",
+       nor  may  "Egothor"  appear  in  their name, without prior written
+       permission from Leo.G@seznam.cz.
+
+   In addition, we request that you include in the end-user documentation
+   provided  with  the  redistribution  and/or  in the software itself an
+   acknowledgement equivalent to the following:
+   "This product includes software developed by the Egothor Project.
+    http://egothor.sf.net/"
+
+   THIS  SOFTWARE  IS  PROVIDED  ``AS  IS''  AND ANY EXPRESSED OR IMPLIED
+   WARRANTIES,  INCLUDING,  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+   MERCHANTABILITY  AND  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+   IN  NO  EVENT  SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+   FOR   ANY   DIRECT,   INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR
+   CONSEQUENTIAL  DAMAGES  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE  GOODS  OR  SERVICES;  LOSS  OF  USE,  DATA, OR PROFITS; OR
+   BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER  IN  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+   OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+   IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   This  software  consists  of  voluntary  contributions  made  by  many
+   individuals  on  behalf  of  the  Egothor  Project  and was originally
+   created by Leo Galambos (Leo.G@seznam.cz).
+ */
+
+namespace Egothor.Stemmer
+{
+    /// <summary>
+    /// The Gener object helps in the discarding of nodes which break the reduction
+    /// effort and defend the structure against large reductions.
+    /// </summary>
+    public class Gener : Reduce
+    {
+        /// <summary>
+        /// Constructor for the Gener object.
+        /// </summary>
+        public Gener() { }
+
+        /// <summary>
+        /// Return a Trie with infrequent values occurring in the given Trie removed.
+        /// </summary>
+        /// <param name="orig">the Trie to optimize</param>
+        /// <returns>a new optimized Trie</returns>
+        public override Trie Optimize(Trie orig)
+        {
+            IList<string> cmds = orig.cmds;
+            IList<Row> rows = new List<Row>();
+            IList<Row> orows = orig.rows;
+            int[] remap = new int[orows.Count];
+
+            Arrays.Fill(remap, 1);
+            for (int j = orows.Count - 1; j >= 0; j--)
+            {
+                if (Eat(orows[j], remap))
+                {
+                    remap[j] = 0;
+                }
+            }
+
+            Arrays.Fill(remap, -1);
+            rows = RemoveGaps(orig.root, orows, new List<Row>(), remap);
+
+            return new Trie(orig.forward, remap[orig.root], cmds, rows);
+        }
+
+        /// <summary>
+        /// Test whether the given Row of Cells in a Trie should be included in an
+        /// optimized Trie.
+        /// </summary>
+        /// <param name="in">the Row to test</param>
+        /// <param name="remap">Description of the Parameter</param>
+        /// <returns><c>true</c> if the Row should remain; otherwise, <c>false</c></returns>
+        public bool Eat(Row @in, int[] remap)
+        {
+            int sum = 0;
+            for (IEnumerator<Cell> i = @in.cells.Values.GetEnumerator(); i.MoveNext();)
+            {
+                Cell c = i.Current;
+                sum += c.cnt;
+                if (c.@ref >= 0)
+                {
+                    if (remap[c.@ref] == 0)
+                    {
+                        c.@ref = -1;
+                    }
+                }
+            }
+            int frame = sum / 10;
+            bool live = false;
+            for (IEnumerator<Cell> i = @in.cells.Values.GetEnumerator(); i.MoveNext();)
+            {
+                Cell c = i.Current;
+                if (c.cnt < frame && c.cmd >= 0)
+                {
+                    c.cnt = 0;
+                    c.cmd = -1;
+                }
+                if (c.cmd >= 0 || c.@ref >= 0)
+                {
+                    live |= true;
+                }
+            }
+            return !live;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Lift.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Lift.cs b/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Lift.cs
new file mode 100644
index 0000000..43a4602
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Lift.cs
@@ -0,0 +1,165 @@
+using Lucene.Net.Support;
+using System.Collections.Generic;
+
+/*
+                    Egothor Software License version 1.00
+                    Copyright (C) 1997-2004 Leo Galambos.
+                 Copyright (C) 2002-2004 "Egothor developers"
+                      on behalf of the Egothor Project.
+                             All rights reserved.
+
+   This  software  is  copyrighted  by  the "Egothor developers". If this
+   license applies to a single file or document, the "Egothor developers"
+   are the people or entities mentioned as copyright holders in that file
+   or  document.  If  this  license  applies  to the Egothor project as a
+   whole,  the  copyright holders are the people or entities mentioned in
+   the  file CREDITS. This file can be found in the same location as this
+   license in the distribution.
+
+   Redistribution  and  use  in  source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+    1. Redistributions  of  source  code  must retain the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       following disclaimer.
+    2. Redistributions  in binary form must reproduce the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       disclaimer  that  follows  these  conditions  in the documentation
+       and/or other materials provided with the distribution.
+    3. The name "Egothor" must not be used to endorse or promote products
+       derived  from  this software without prior written permission. For
+       written permission, please contact Leo.G@seznam.cz
+    4. Products  derived  from this software may not be called "Egothor",
+       nor  may  "Egothor"  appear  in  their name, without prior written
+       permission from Leo.G@seznam.cz.
+
+   In addition, we request that you include in the end-user documentation
+   provided  with  the  redistribution  and/or  in the software itself an
+   acknowledgement equivalent to the following:
+   "This product includes software developed by the Egothor Project.
+    http://egothor.sf.net/"
+
+   THIS  SOFTWARE  IS  PROVIDED  ``AS  IS''  AND ANY EXPRESSED OR IMPLIED
+   WARRANTIES,  INCLUDING,  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+   MERCHANTABILITY  AND  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+   IN  NO  EVENT  SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+   FOR   ANY   DIRECT,   INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR
+   CONSEQUENTIAL  DAMAGES  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE  GOODS  OR  SERVICES;  LOSS  OF  USE,  DATA, OR PROFITS; OR
+   BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER  IN  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+   OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+   IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   This  software  consists  of  voluntary  contributions  made  by  many
+   individuals  on  behalf  of  the  Egothor  Project  and was originally
+   created by Leo Galambos (Leo.G@seznam.cz).
+ */
+
+namespace Egothor.Stemmer
+{
+    /// <summary>
+    /// The Lift class is a data structure that is a variation of a Patricia trie.
+    /// <para>
+    /// Lift's <i>raison d'etre</i> is to implement reduction of the trie via the
+    /// Lift-Up method., which makes the data structure less liable to overstemming.
+    /// </para>
+    /// </summary>
+    public class Lift : Reduce
+    {
+        bool changeSkip;
+
+        /// <summary>
+        /// Constructor for the Lift object.
+        /// </summary>
+        /// <param name="changeSkip">
+        /// when set to <c>true</c>, comparison of two Cells takes
+        /// a skip command into account
+        /// </param>
+        public Lift(bool changeSkip)
+        {
+            this.changeSkip = changeSkip;
+        }
+
+        /// <summary>
+        /// Optimize (eliminate rows with no content) the given Trie and return the
+        /// reduced Trie.
+        /// </summary>
+        /// <param name="orig">the Trie to optimized</param>
+        /// <returns>the reduced Trie</returns>
+        public override Trie Optimize(Trie orig)
+        {
+            IList<string> cmds = orig.cmds;
+            IList<Row> rows = new List<Row>();
+            IList<Row> orows = orig.rows;
+            int[] remap = new int[orows.Count];
+
+            for (int j = orows.Count - 1; j >= 0; j--)
+            {
+                LiftUp(orows[j], orows);
+            }
+
+            Arrays.Fill(remap, -1);
+            rows = RemoveGaps(orig.root, orows, new List<Row>(), remap);
+
+            return new Trie(orig.forward, remap[orig.root], cmds, rows);
+        }
+
+        /// <summary>
+        /// Reduce the trie using Lift-Up reduction.
+        /// <para>
+        /// The Lift-Up reduction propagates all leaf-values (patch commands), where
+        /// possible, to higher levels which are closer to the root of the trie.
+        /// </para>
+        /// </summary>
+        /// <param name="in">the Row to consider when optimizing</param>
+        /// <param name="nodes">contains the patch commands</param>
+        public void LiftUp(Row @in, IList<Row> nodes)
+        {
+            IEnumerator<Cell> i = @in.cells.Values.GetEnumerator();
+            for (; i.MoveNext();)
+            {
+                Cell c = i.Current;
+                if (c.@ref >= 0)
+                {
+                    Row to = nodes[c.@ref];
+                    int sum = to.UniformCmd(changeSkip);
+                    if (sum >= 0)
+                    {
+                        if (sum == c.cmd)
+                        {
+                            if (changeSkip)
+                            {
+                                if (c.skip != to.uniformSkip + 1)
+                                {
+                                    continue;
+                                }
+                                c.skip = to.uniformSkip + 1;
+                            }
+                            else
+                            {
+                                c.skip = 0;
+                            }
+                            c.cnt += to.uniformCnt;
+                            c.@ref = -1;
+                        }
+                        else if (c.cmd < 0)
+                        {
+                            c.cnt = to.uniformCnt;
+                            c.cmd = sum;
+                            c.@ref = -1;
+                            if (changeSkip)
+                            {
+                                c.skip = to.uniformSkip + 1;
+                            }
+                            else
+                            {
+                                c.skip = 0;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/MultiTrie.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/MultiTrie.cs b/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/MultiTrie.cs
new file mode 100644
index 0000000..7bdad8f
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/MultiTrie.cs
@@ -0,0 +1,213 @@
+using Lucene.Net.Support;
+using System.Collections.Generic;
+using System.IO;
+using System.Text;
+
+/*
+                    Egothor Software License version 1.00
+                    Copyright (C) 1997-2004 Leo Galambos.
+                 Copyright (C) 2002-2004 "Egothor developers"
+                      on behalf of the Egothor Project.
+                             All rights reserved.
+
+   This  software  is  copyrighted  by  the "Egothor developers". If this
+   license applies to a single file or document, the "Egothor developers"
+   are the people or entities mentioned as copyright holders in that file
+   or  document.  If  this  license  applies  to the Egothor project as a
+   whole,  the  copyright holders are the people or entities mentioned in
+   the  file CREDITS. This file can be found in the same location as this
+   license in the distribution.
+
+   Redistribution  and  use  in  source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+    1. Redistributions  of  source  code  must retain the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       following disclaimer.
+    2. Redistributions  in binary form must reproduce the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       disclaimer  that  follows  these  conditions  in the documentation
+       and/or other materials provided with the distribution.
+    3. The name "Egothor" must not be used to endorse or promote products
+       derived  from  this software without prior written permission. For
+       written permission, please contact Leo.G@seznam.cz
+    4. Products  derived  from this software may not be called "Egothor",
+       nor  may  "Egothor"  appear  in  their name, without prior written
+       permission from Leo.G@seznam.cz.
+
+   In addition, we request that you include in the end-user documentation
+   provided  with  the  redistribution  and/or  in the software itself an
+   acknowledgement equivalent to the following:
+   "This product includes software developed by the Egothor Project.
+    http://egothor.sf.net/"
+
+   THIS  SOFTWARE  IS  PROVIDED  ``AS  IS''  AND ANY EXPRESSED OR IMPLIED
+   WARRANTIES,  INCLUDING,  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+   MERCHANTABILITY  AND  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+   IN  NO  EVENT  SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+   FOR   ANY   DIRECT,   INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR
+   CONSEQUENTIAL  DAMAGES  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE  GOODS  OR  SERVICES;  LOSS  OF  USE,  DATA, OR PROFITS; OR
+   BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER  IN  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+   OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+   IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   This  software  consists  of  voluntary  contributions  made  by  many
+   individuals  on  behalf  of  the  Egothor  Project  and was originally
+   created by Leo Galambos (Leo.G@seznam.cz).
+ */
+
+namespace Egothor.Stemmer
+{
+    /// <summary>
+    /// The <see cref="MultiTrie"/> is a <see cref="Trie"/> of <see cref="Trie"/>s. It stores words and their associated patch
+    /// commands. The <see cref="MultiTrie"/> handles patch commands individually (each command by
+    /// itself).
+    /// </summary>
+    public class MultiTrie : Trie
+    {
+        internal static char EOM = '*';
+        internal static string EOM_NODE = "" + EOM;
+
+        protected List<Trie> tries = new List<Trie>();
+
+        int BY = 1;
+
+        /// <summary>
+        /// Constructor for the <see cref="MultiTrie"/> object.
+        /// </summary>
+        /// <param name="is">the input stream</param>
+        /// <exception cref="IOException">if an I/O error occurs</exception>
+        public MultiTrie(IDataInput @is)
+            : base(false)
+        {
+            forward = @is.ReadBoolean();
+            BY = @is.ReadInt();
+            for (int i = @is.ReadInt(); i > 0; i--)
+            {
+                tries.Add(new Trie(@is));
+            }
+        }
+
+        /// <summary>
+        /// Constructor for the <see cref="MultiTrie"/> object
+        /// </summary>
+        /// <param name="forward">set to <c>true</c> if the elements should be read left to right</param>
+        public MultiTrie(bool forward)
+            : base(forward)
+        {
+        }
+
+        /// <summary>
+        /// Return the element that is stored in a cell associated with the given key.
+        /// </summary>
+        /// <param name="key">the key to the cell holding the desired element</param>
+        /// <returns>the element</returns>
+        public override string GetFully(string key)
+        {
+            StringBuilder result = new StringBuilder(tries.Count * 2);
+            for (int i = 0; i < tries.Count; i++)
+            {
+                string r = tries[i].GetFully(key);
+                if (r == null || (r.Length == 1 && r[0] == EOM))
+                {
+                    return result.ToString();
+                }
+                result.Append(r);
+            }
+            return result.ToString();
+        }
+
+        /// <summary>
+        /// Return the element that is stored as last on a path belonging to the given
+        /// key.
+        /// </summary>
+        /// <param name="key">the key associated with the desired element</param>
+        /// <returns>the element that is stored as last on a path</returns>
+        public override string GetLastOnPath(string key)
+        {
+            StringBuilder result = new StringBuilder(tries.Count * 2);
+            for (int i = 0; i < tries.Count; i++)
+            {
+                string r = tries[i].GetLastOnPath(key);
+                if (r == null || (r.Length == 1 && r[0] == EOM))
+                {
+                    return result.ToString();
+                }
+                result.Append(r);
+            }
+            return result.ToString();
+        }
+
+        /// <summary>
+        /// Write this data structure to the given output stream.
+        /// </summary>
+        /// <param name="os">the output stream</param>
+        /// <exception cref="IOException">if an I/O error occurs</exception>
+        public override void Store(IDataOutput os)
+        {
+            os.WriteBoolean(forward);
+            os.WriteInt(BY);
+            os.WriteInt(tries.Count);
+            foreach (Trie trie in tries)
+                trie.Store(os);
+        }
+
+        /// <summary>
+        /// Add an element to this structure consisting of the given key and patch
+        /// command.
+        /// <para>
+        /// This method will return without executing if the <paramref name="cmd"/>
+        /// parameter's length is 0.
+        /// </para>
+        /// </summary>
+        /// <param name="key">the key</param>
+        /// <param name="cmd">the patch command</param>
+        public override void Add(string key, string cmd)
+        {
+            if (cmd.Length == 0)
+            {
+                return;
+            }
+            int levels = cmd.Length / BY;
+            while (levels >= tries.Count)
+            {
+                tries.Add(new Trie(forward));
+            }
+            for (int i = 0; i < levels; i++)
+            {
+                tries[i].Add(key, cmd.Substring(BY * i, BY));
+            }
+            tries[levels].Add(key, EOM_NODE);
+        }
+
+        /// <summary>
+        /// Remove empty rows from the given <see cref="Trie"/> and return the newly reduced <see cref="Trie"/>.
+        /// </summary>
+        /// <param name="by">the <see cref="Trie"/> to reduce</param>
+        /// <returns>the newly reduced Trie</returns>
+        public override Trie Reduce(Reduce by)
+        {
+            List<Trie> h = new List<Trie>();
+            foreach (Trie trie in tries)
+                h.Add(trie.Reduce(by));
+
+            MultiTrie m = new MultiTrie(forward);
+            m.tries = h;
+            return m;
+        }
+
+        /// <summary>
+        /// Print the given prefix and the position(s) in the Trie where it appears.
+        /// </summary>
+        /// <param name="out"></param>
+        /// <param name="prefix">the desired prefix</param>
+        public override void PrintInfo(TextWriter @out, string prefix)
+        {
+            int c = 0;
+            foreach (Trie trie in tries)
+                trie.PrintInfo(@out, prefix + "[" + (++c) + "] ");
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/MultiTrie2.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/MultiTrie2.cs b/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/MultiTrie2.cs
new file mode 100644
index 0000000..9db6b92
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/MultiTrie2.cs
@@ -0,0 +1,421 @@
+using Lucene.Net.Support;
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+/*
+                    Egothor Software License version 1.00
+                    Copyright (C) 1997-2004 Leo Galambos.
+                 Copyright (C) 2002-2004 "Egothor developers"
+                      on behalf of the Egothor Project.
+                             All rights reserved.
+
+   This  software  is  copyrighted  by  the "Egothor developers". If this
+   license applies to a single file or document, the "Egothor developers"
+   are the people or entities mentioned as copyright holders in that file
+   or  document.  If  this  license  applies  to the Egothor project as a
+   whole,  the  copyright holders are the people or entities mentioned in
+   the  file CREDITS. This file can be found in the same location as this
+   license in the distribution.
+
+   Redistribution  and  use  in  source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+    1. Redistributions  of  source  code  must retain the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       following disclaimer.
+    2. Redistributions  in binary form must reproduce the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       disclaimer  that  follows  these  conditions  in the documentation
+       and/or other materials provided with the distribution.
+    3. The name "Egothor" must not be used to endorse or promote products
+       derived  from  this software without prior written permission. For
+       written permission, please contact Leo.G@seznam.cz
+    4. Products  derived  from this software may not be called "Egothor",
+       nor  may  "Egothor"  appear  in  their name, without prior written
+       permission from Leo.G@seznam.cz.
+
+   In addition, we request that you include in the end-user documentation
+   provided  with  the  redistribution  and/or  in the software itself an
+   acknowledgement equivalent to the following:
+   "This product includes software developed by the Egothor Project.
+    http://egothor.sf.net/"
+
+   THIS  SOFTWARE  IS  PROVIDED  ``AS  IS''  AND ANY EXPRESSED OR IMPLIED
+   WARRANTIES,  INCLUDING,  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+   MERCHANTABILITY  AND  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+   IN  NO  EVENT  SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+   FOR   ANY   DIRECT,   INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR
+   CONSEQUENTIAL  DAMAGES  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE  GOODS  OR  SERVICES;  LOSS  OF  USE,  DATA, OR PROFITS; OR
+   BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER  IN  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+   OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+   IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   This  software  consists  of  voluntary  contributions  made  by  many
+   individuals  on  behalf  of  the  Egothor  Project  and was originally
+   created by Leo Galambos (Leo.G@seznam.cz).
+ */
+
+namespace Egothor.Stemmer
+{
+    /// <summary>
+    /// The <see cref="MultiTrie"/> is a <see cref="Trie"/> of <see cref="Trie"/>s.
+    /// <para>
+    /// It stores words and their associated patch commands. The <see cref="MultiTrie"/> handles
+    /// patch commands broken into their constituent parts, as a <see cref="MultiTrie"/> does, but
+    /// the commands are delimited by the skip command.
+    /// </para>
+    /// </summary>
+    public class MultiTrie2 : MultiTrie
+    {
+        /// <summary>
+        /// Constructor for the <see cref="MultiTrie"/> object.
+        /// </summary>
+        /// <param name="is">the input stream</param>
+        /// <exception cref="IOException">if an I/O error occurs</exception>
+        public MultiTrie2(IDataInput @is)
+            : base(@is)
+        {
+        }
+
+        /// <summary>
+        /// Constructor for the <see cref="MultiTrie2"/> object
+        /// </summary>
+        /// <param name="forward">set to <c>true</c> if the elements should be read left to right</param>
+        public MultiTrie2(bool forward)
+            : base(forward)
+        {
+        }
+
+        /// <summary>
+        /// Return the element that is stored in a cell associated with the given key.
+        /// </summary>
+        /// <param name="key">the key to the cell holding the desired element</param>
+        /// <returns>the element</returns>
+        public override string GetFully(string key)
+        {
+            StringBuilder result = new StringBuilder(tries.Count * 2);
+            try
+            {
+                string lastkey = key;
+                string[] p = new string[tries.Count];
+                char lastch = ' ';
+                for (int i = 0; i < tries.Count; i++)
+                {
+                    string r = tries[i].GetFully(lastkey);
+                    if (r == null || (r.Length == 1 && r[0] == EOM))
+                    {
+                        return result.ToString();
+                    }
+                    if (CannotFollow(lastch, r[0]))
+                    {
+                        return result.ToString();
+                    }
+                    else
+                    {
+                        lastch = r[r.Length - 2];
+                    }
+                    // key=key.substring(lengthPP(r));
+                    p[i] = r;
+                    if (p[i][0] == '-')
+                    {
+                        if (i > 0)
+                        {
+                            if (!TrySkip(key, LengthPP(p[i - 1]), out key))
+                            {
+                                break;
+                            }
+                        }
+                        if (!TrySkip(key, LengthPP(p[i - 1]), out key))
+                        {
+                            break;
+                        }
+                    }
+                    // key = skip(key, lengthPP(r));
+                    result.Append(r);
+                    if (key.Length != 0)
+                    {
+                        lastkey = key;
+                    }
+                }
+            }
+            catch (ArgumentOutOfRangeException /*x*/) { }
+            return result.ToString();
+        }
+
+        /// <summary>
+        /// Return the element that is stored as last on a path belonging to the given
+        /// key.
+        /// </summary>
+        /// <param name="key">the key associated with the desired element</param>
+        /// <returns>the element that is stored as last on a path</returns>
+        public override string GetLastOnPath(string key)
+        {
+            StringBuilder result = new StringBuilder(tries.Count * 2);
+            try
+            {
+                string lastkey = key;
+                string[] p = new string[tries.Count];
+                char lastch = ' ';
+                for (int i = 0; i < tries.Count; i++)
+                {
+                    string r = tries[i].GetLastOnPath(lastkey);
+                    if (r == null || (r.Length == 1 && r[0] == EOM))
+                    {
+                        return result.ToString();
+                    }
+                    // System.err.println("LP:"+key+" last:"+lastch+" new:"+r);
+                    if (CannotFollow(lastch, r[0]))
+                    {
+                        return result.ToString();
+                    }
+                    else
+                    {
+                        lastch = r[r.Length - 2];
+                    }
+                    // key=key.substring(lengthPP(r));
+                    p[i] = r;
+                    if (p[i][0] == '-')
+                    {
+                        if (i > 0)
+                        {
+                            if (!TrySkip(key, LengthPP(p[i - 1]), out key))
+                            {
+                                break;
+                            }
+                        }
+                        if (!TrySkip(key, LengthPP(p[i]), out key))
+                        {
+                            break;
+                        }
+                    }
+                    // key = skip(key, lengthPP(r));
+                    result.Append(r);
+                    if (key.Length != 0)
+                    {
+                        lastkey = key;
+                    }
+                }
+            }
+            catch (ArgumentOutOfRangeException /*x*/) { }
+            return result.ToString();
+        }
+
+        /// <summary>
+        /// Write this data structure to the given output stream.
+        /// </summary>
+        /// <param name="os">the output stream</param>
+        /// <exception cref="IOException">if an I/O error occurs</exception>
+        public override void Store(IDataOutput os)
+        {
+            base.Store(os);
+        }
+
+        /// <summary>
+        /// Add an element to this structure consisting of the given key and patch
+        /// command.
+        /// <para>
+        /// This method will return without executing if the <paramref name="cmd"/>
+        /// parameter's length is 0.
+        /// </para>
+        /// </summary>
+        /// <param name="key">the key</param>
+        /// <param name="cmd">the patch command</param>
+        public override void Add(string key, string cmd)
+        {
+            if (cmd.Length == 0)
+            {
+                return;
+            }
+            // System.err.println( cmd );
+            string[] p = Decompose(cmd);
+            int levels = p.Length;
+            // System.err.println("levels "+key+" cmd "+cmd+"|"+levels);
+            while (levels >= tries.Count)
+            {
+                tries.Add(new Trie(forward));
+            }
+            string lastkey = key;
+            for (int i = 0; i < levels; i++)
+            {
+                if (key.Length > 0)
+                {
+                    tries[i].Add(key, p[i]);
+                    lastkey = key;
+                }
+                else
+                {
+                    tries[i].Add(lastkey, p[i]);
+                }
+                // System.err.println("-"+key+" "+p[i]+"|"+key.length());
+                /*
+                 * key=key.substring(lengthPP(p[i]));
+                 */
+                if (p[i].Length > 0 && p[i][0] == '-')
+                {
+                    if (i > 0)
+                    {
+                        if (!TrySkip(key, LengthPP(p[i - 1]), out key))
+                        {
+                            // LUCENENET: Should never happen, but since we don't
+                            // have a catch block here who knows what might happen if
+                            // we don't do this.
+                            throw new ArgumentOutOfRangeException();
+                        }
+                    }
+                    if (!TrySkip(key, LengthPP(p[i]), out key))
+                    {
+                        // LUCENENET: Should never happen, but since we don't
+                        // have a catch block here who knows what might happen if
+                        // we don't do this.
+                        throw new ArgumentOutOfRangeException();
+                    }
+                }
+                // System.err.println("--->"+key);
+            }
+            if (key.Length > 0)
+            {
+                tries[levels].Add(key, EOM_NODE);
+            }
+            else
+            {
+                tries[levels].Add(lastkey, EOM_NODE);
+            }
+        }
+
+        /// <summary>
+        /// Break the given patch command into its constituent pieces. The pieces are
+        /// delimited by NOOP commands.
+        /// </summary>
+        /// <param name="cmd">the patch command</param>
+        /// <returns>an array containing the pieces of the command</returns>
+        public virtual string[] Decompose(string cmd)
+        {
+            int parts = 0;
+
+            for (int i = 0; 0 <= i && i < cmd.Length;)
+            {
+                int next = DashEven(cmd, i);
+                if (i == next)
+                {
+                    parts++;
+                    i = next + 2;
+                }
+                else
+                {
+                    parts++;
+                    i = next;
+                }
+            }
+
+            string[] part = new string[parts];
+            int x = 0;
+
+            for (int i = 0; 0 <= i && i < cmd.Length;)
+            {
+                int next = DashEven(cmd, i);
+                if (i == next)
+                {
+                    part[x++] = cmd.Substring(i, 2);
+                    i = next + 2;
+                }
+                else
+                {
+                    part[x++] = (next < 0) ? cmd.Substring(i, cmd.Length - i) : cmd.Substring(i, next - i);
+                    i = next;
+                }
+            }
+            return part;
+        }
+
+        /// <summary>
+        /// Remove empty rows from the given Trie and return the newly reduced Trie.
+        /// </summary>
+        /// <param name="by">the <see cref="Trie"/> to reduce</param>
+        /// <returns>the newly reduced Trie</returns>
+        public override Trie Reduce(Reduce by)
+        {
+            List<Trie> h = new List<Trie>();
+            foreach (Trie trie in tries)
+                h.Add(trie.Reduce(by));
+
+            MultiTrie2 m = new MultiTrie2(forward);
+            m.tries = h;
+            return m;
+        }
+
+        private bool CannotFollow(char after, char goes)
+        {
+            switch (after)
+            {
+                case '-':
+                case 'D':
+                    return after == goes;
+            }
+            return false;
+        }
+
+        private bool TrySkip(string @in, int count, out string result)
+        {
+            // LUCENENET: Rather than relying on this to throw an exception by passing a negative
+            // length to Substring like they did in Java, we check that the value
+            // is negative and return false to the caller so it can safely break out
+            // of the loop.
+            int skipLength = @in.Length - count;
+            if (skipLength < 0)
+            {
+                result = string.Empty;
+                return false;
+            }
+            if (forward)
+            {
+                result = @in.Substring(count, skipLength);
+            }
+            else
+            {
+                result = @in.Substring(0, (skipLength) - 0);
+            }
+            return true;
+        }
+
+        private int DashEven(string @in, int from)
+        {
+            while (from < @in.Length)
+            {
+                if (@in[from] == '-')
+                {
+                    return from;
+                }
+                else
+                {
+                    from += 2;
+                }
+            }
+            return -1;
+        }
+
+
+        private int LengthPP(string cmd)
+        {
+            int len = 0;
+            for (int i = 0; i < cmd.Length; i++)
+            {
+                switch (cmd[i++])
+                {
+                    case '-':
+                    case 'D':
+                        len += cmd[i] - 'a' + 1;
+                        break;
+                    case 'R':
+                        len++; /* intentional fallthrough */
+                        goto case 'I';
+                    case 'I':
+                        break;
+                }
+            }
+            return len;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Optimizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Optimizer.cs b/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Optimizer.cs
new file mode 100644
index 0000000..e299452
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Optimizer.cs
@@ -0,0 +1,227 @@
+using Lucene.Net.Support;
+using System.Collections.Generic;
+
+/*
+                    Egothor Software License version 1.00
+                    Copyright (C) 1997-2004 Leo Galambos.
+                 Copyright (C) 2002-2004 "Egothor developers"
+                      on behalf of the Egothor Project.
+                             All rights reserved.
+
+   This  software  is  copyrighted  by  the "Egothor developers". If this
+   license applies to a single file or document, the "Egothor developers"
+   are the people or entities mentioned as copyright holders in that file
+   or  document.  If  this  license  applies  to the Egothor project as a
+   whole,  the  copyright holders are the people or entities mentioned in
+   the  file CREDITS. This file can be found in the same location as this
+   license in the distribution.
+
+   Redistribution  and  use  in  source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+    1. Redistributions  of  source  code  must retain the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       following disclaimer.
+    2. Redistributions  in binary form must reproduce the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       disclaimer  that  follows  these  conditions  in the documentation
+       and/or other materials provided with the distribution.
+    3. The name "Egothor" must not be used to endorse or promote products
+       derived  from  this software without prior written permission. For
+       written permission, please contact Leo.G@seznam.cz
+    4. Products  derived  from this software may not be called "Egothor",
+       nor  may  "Egothor"  appear  in  their name, without prior written
+       permission from Leo.G@seznam.cz.
+
+   In addition, we request that you include in the end-user documentation
+   provided  with  the  redistribution  and/or  in the software itself an
+   acknowledgement equivalent to the following:
+   "This product includes software developed by the Egothor Project.
+    http://egothor.sf.net/"
+
+   THIS  SOFTWARE  IS  PROVIDED  ``AS  IS''  AND ANY EXPRESSED OR IMPLIED
+   WARRANTIES,  INCLUDING,  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+   MERCHANTABILITY  AND  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+   IN  NO  EVENT  SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+   FOR   ANY   DIRECT,   INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR
+   CONSEQUENTIAL  DAMAGES  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE  GOODS  OR  SERVICES;  LOSS  OF  USE,  DATA, OR PROFITS; OR
+   BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER  IN  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+   OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+   IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   This  software  consists  of  voluntary  contributions  made  by  many
+   individuals  on  behalf  of  the  Egothor  Project  and was originally
+   created by Leo Galambos (Leo.G@seznam.cz).
+ */
+
+namespace Egothor.Stemmer
+{
+    /// <summary>
+    /// The <see cref="Optimizer"/> class is a <see cref="Trie"/> that will be reduced (have empty rows removed).
+    /// <para>
+    /// The reduction will be made by joining two rows where the first is a subset of
+    /// the second.
+    /// </para>
+    /// </summary>
+    public class Optimizer : Reduce
+    {
+        /// <summary>
+        /// Constructor for the <see cref="Optimizer"/> object.
+        /// </summary>
+        public Optimizer() { }
+
+        /// <summary>
+        /// Optimize (remove empty rows) from the given Trie and return the resulting
+        /// Trie.
+        /// </summary>
+        /// <param name="orig">the <see cref="Trie"/> to consolidate</param>
+        /// <returns>the newly consolidated Trie</returns>
+        public override Trie Optimize(Trie orig)
+        {
+            IList<string> cmds = orig.cmds;
+            IList<Row> rows = new List<Row>();
+            IList<Row> orows = orig.rows;
+            int[] remap = new int[orows.Count];
+
+            for (int j = orows.Count - 1; j >= 0; j--)
+            {
+                Row now = new Remap(orows[j], remap);
+                bool merged = false;
+
+                for (int i = 0; i < rows.Count; i++)
+                {
+                    Row q = Merge(now, rows[i]);
+                    if (q != null)
+                    {
+                        rows[i] = q;
+                        merged = true;
+                        remap[j] = i;
+                        break;
+                    }
+                }
+
+                if (merged == false)
+                {
+                    remap[j] = rows.Count;
+                    rows.Add(now);
+                }
+            }
+
+            int root = remap[orig.root];
+            Arrays.Fill(remap, -1);
+            rows = RemoveGaps(root, rows, new List<Row>(), remap);
+
+            return new Trie(orig.forward, remap[root], cmds, rows);
+        }
+
+        /// <summary>
+        /// Merge the given rows and return the resulting <see cref="Row"/>.
+        /// </summary>
+        /// <param name="master">the master <see cref="Row"/></param>
+        /// <param name="existing">the existing <see cref="Row"/></param>
+        /// <returns>the resulting <see cref="Row"/>, or <c>null</c> if the operation cannot be realized</returns>
+        public Row Merge(Row master, Row existing)
+        {
+            var i = master.cells.Keys.GetEnumerator();
+            Row n = new Row();
+            for (; i.MoveNext();)
+            {
+                char ch = i.Current;
+                // XXX also must handle Cnt and Skip !!
+                Cell a = master.cells.ContainsKey(ch) ? master.cells[ch] : null;
+                Cell b = existing.cells.ContainsKey(ch) ? existing.cells[ch] : null;
+
+                Cell s = (b == null) ? new Cell(a) : Merge(a, b);
+                if (s == null)
+                {
+                    return null;
+                }
+                n.cells[ch] = s;
+            }
+            i = existing.cells.Keys.GetEnumerator();
+            for (; i.MoveNext();)
+            {
+                char ch = i.Current;
+                if (master.At(ch) != null)
+                {
+                    continue;
+                }
+                n.cells[ch] = existing.At(ch);
+            }
+            return n;
+        }
+
+        /// <summary>
+        /// Merge the given <see cref="Cell"/>s and return the resulting <see cref="Cell"/>.
+        /// </summary>
+        /// <param name="m">the master <see cref="Cell"/></param>
+        /// <param name="e">the existing <see cref="Cell"/></param>
+        /// <returns>the resulting <see cref="Cell"/>, or <c>null</c> if the operation cannot be realized</returns>
+        public virtual Cell Merge(Cell m, Cell e)
+        {
+            Cell n = new Cell();
+
+            if (m.skip != e.skip)
+            {
+                return null;
+            }
+
+            if (m.cmd >= 0)
+            {
+                if (e.cmd >= 0)
+                {
+                    if (m.cmd == e.cmd)
+                    {
+                        n.cmd = m.cmd;
+                    }
+                    else
+                    {
+                        return null;
+                    }
+                }
+                else
+                {
+                    n.cmd = m.cmd;
+                }
+            }
+            else
+            {
+                n.cmd = e.cmd;
+            }
+            if (m.@ref >= 0)
+            {
+                if (e.@ref >= 0)
+                {
+                    if (m.@ref == e.@ref)
+                    {
+                        if (m.skip == e.skip)
+                        {
+                            n.@ref = m.@ref;
+                        }
+                        else
+                        {
+                            return null;
+                        }
+                    }
+                    else
+                    {
+                        return null;
+                    }
+                }
+                else
+                {
+                    n.@ref = m.@ref;
+                }
+            }
+            else
+            {
+                n.@ref = e.@ref;
+            }
+            n.cnt = m.cnt + e.cnt;
+            n.skip = m.skip;
+            return n;
+        }
+    }
+}


Mime
View raw message