lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nightowl...@apache.org
Subject [48/50] [abbrv] lucenenet git commit: Ported Analysis.Stempel + tests (closes #190)
Date Sun, 23 Oct 2016 13:02:34 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Optimizer2.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Optimizer2.cs b/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Optimizer2.cs
new file mode 100644
index 0000000..d1b25a6
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Optimizer2.cs
@@ -0,0 +1,92 @@
+/*
+                    Egothor Software License version 1.00
+                    Copyright (C) 1997-2004 Leo Galambos.
+                 Copyright (C) 2002-2004 "Egothor developers"
+                      on behalf of the Egothor Project.
+                             All rights reserved.
+
+   This  software  is  copyrighted  by  the "Egothor developers". If this
+   license applies to a single file or document, the "Egothor developers"
+   are the people or entities mentioned as copyright holders in that file
+   or  document.  If  this  license  applies  to the Egothor project as a
+   whole,  the  copyright holders are the people or entities mentioned in
+   the  file CREDITS. This file can be found in the same location as this
+   license in the distribution.
+
+   Redistribution  and  use  in  source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+    1. Redistributions  of  source  code  must retain the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       following disclaimer.
+    2. Redistributions  in binary form must reproduce the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       disclaimer  that  follows  these  conditions  in the documentation
+       and/or other materials provided with the distribution.
+    3. The name "Egothor" must not be used to endorse or promote products
+       derived  from  this software without prior written permission. For
+       written permission, please contact Leo.G@seznam.cz
+    4. Products  derived  from this software may not be called "Egothor",
+       nor  may  "Egothor"  appear  in  their name, without prior written
+       permission from Leo.G@seznam.cz.
+
+   In addition, we request that you include in the end-user documentation
+   provided  with  the  redistribution  and/or  in the software itself an
+   acknowledgement equivalent to the following:
+   "This product includes software developed by the Egothor Project.
+    http://egothor.sf.net/"
+
+   THIS  SOFTWARE  IS  PROVIDED  ``AS  IS''  AND ANY EXPRESSED OR IMPLIED
+   WARRANTIES,  INCLUDING,  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+   MERCHANTABILITY  AND  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+   IN  NO  EVENT  SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+   FOR   ANY   DIRECT,   INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR
+   CONSEQUENTIAL  DAMAGES  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE  GOODS  OR  SERVICES;  LOSS  OF  USE,  DATA, OR PROFITS; OR
+   BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER  IN  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+   OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+   IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   This  software  consists  of  voluntary  contributions  made  by  many
+   individuals  on  behalf  of  the  Egothor  Project  and was originally
+   created by Leo Galambos (Leo.G@seznam.cz).
+ */
+
+namespace Egothor.Stemmer
+{
+    /// <summary>
+    /// The Optimizer class is a Trie that will be reduced (have empty rows removed).
+    /// <para>
+    /// This is the result of allowing a joining of rows when there is no collision
+    /// between non-<c>null</c> values in the rows. Information loss, resulting in
+    /// the stemmer not being able to recognize words (as in Optimizer), is
+    /// curtailed, allowing the stemmer to recognize words for which the original
+    /// trie was built. Use of this class allows the stemmer to be self-teaching.
+    /// </para>
+    /// </summary>
+    public class Optimizer2 : Optimizer
+    {
+        /// <summary>
+        /// Constructor for the <see cref="Optimizer2"/> object.
+        /// </summary>
+        public Optimizer2() { }
+
+        /// <summary>
+        /// Merge the given <see cref="Cell"/>s and return the resulting <see cref="Cell"/>.
+        /// </summary>
+        /// <param name="m">the master <see cref="Cell"/></param>
+        /// <param name="e">the existing <see cref="Cell"/></param>
+        /// <returns>the resulting <see cref="Cell"/>, or <c>null</c> if the operation cannot be realized</returns>
+        public override Cell Merge(Cell m, Cell e)
+        {
+            if (m.cmd == e.cmd && m.@ref == e.@ref && m.skip == e.skip) {
+                Cell c = new Cell(m);
+                c.cnt += e.cnt;
+                return c;
+            } else {
+                return null;
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Reduce.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Reduce.cs b/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Reduce.cs
new file mode 100644
index 0000000..61a1c3f
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Reduce.cs
@@ -0,0 +1,143 @@
+using Lucene.Net.Support;
+using System.Collections.Generic;
+
+/*
+                    Egothor Software License version 1.00
+                    Copyright (C) 1997-2004 Leo Galambos.
+                 Copyright (C) 2002-2004 "Egothor developers"
+                      on behalf of the Egothor Project.
+                             All rights reserved.
+
+   This  software  is  copyrighted  by  the "Egothor developers". If this
+   license applies to a single file or document, the "Egothor developers"
+   are the people or entities mentioned as copyright holders in that file
+   or  document.  If  this  license  applies  to the Egothor project as a
+   whole,  the  copyright holders are the people or entities mentioned in
+   the  file CREDITS. This file can be found in the same location as this
+   license in the distribution.
+
+   Redistribution  and  use  in  source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+    1. Redistributions  of  source  code  must retain the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       following disclaimer.
+    2. Redistributions  in binary form must reproduce the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       disclaimer  that  follows  these  conditions  in the documentation
+       and/or other materials provided with the distribution.
+    3. The name "Egothor" must not be used to endorse or promote products
+       derived  from  this software without prior written permission. For
+       written permission, please contact Leo.G@seznam.cz
+    4. Products  derived  from this software may not be called "Egothor",
+       nor  may  "Egothor"  appear  in  their name, without prior written
+       permission from Leo.G@seznam.cz.
+
+   In addition, we request that you include in the end-user documentation
+   provided  with  the  redistribution  and/or  in the software itself an
+   acknowledgement equivalent to the following:
+   "This product includes software developed by the Egothor Project.
+    http://egothor.sf.net/"
+
+   THIS  SOFTWARE  IS  PROVIDED  ``AS  IS''  AND ANY EXPRESSED OR IMPLIED
+   WARRANTIES,  INCLUDING,  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+   MERCHANTABILITY  AND  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+   IN  NO  EVENT  SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+   FOR   ANY   DIRECT,   INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR
+   CONSEQUENTIAL  DAMAGES  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE  GOODS  OR  SERVICES;  LOSS  OF  USE,  DATA, OR PROFITS; OR
+   BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER  IN  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+   OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+   IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   This  software  consists  of  voluntary  contributions  made  by  many
+   individuals  on  behalf  of  the  Egothor  Project  and was originally
+   created by Leo Galambos (Leo.G@seznam.cz).
+ */
+
+namespace Egothor.Stemmer
+{
+    /// <summary>
+    /// The <see cref="Reduce"/> object is used to remove gaps in a <see cref="Trie"/> which stores a dictionary.
+    /// </summary>
+    public class Reduce
+    {
+        /// <summary>
+        /// Constructor for the <see cref="Reduce"/> object.
+        /// </summary>
+        public Reduce() { }
+
+        /// <summary>
+        /// Optimize (remove holes in the rows) the given <see cref="Trie"/> and return the
+        /// restructured <see cref="Trie"/>.
+        /// </summary>
+        /// <param name="orig">the <see cref="Trie"/> to optimize</param>
+        /// <returns>the restructured <see cref="Trie"/></returns>
+        public virtual Trie Optimize(Trie orig)
+        {
+            IList<string> cmds = orig.cmds;
+            IList<Row> rows = new List<Row>();
+            IList<Row> orows = orig.rows;
+            int[] remap = new int[orows.Count];
+
+            Arrays.Fill(remap, -1);
+            rows = RemoveGaps(orig.root, rows, new List<Row>(), remap);
+
+            return new Trie(orig.forward, remap[orig.root], cmds, rows);
+        }
+
+        internal virtual IList<Row> RemoveGaps(int ind, IList<Row> old, IList<Row> to, int[] remap)
+        {
+            remap[ind] = to.Count;
+
+            Row now = old[ind];
+            to.Add(now);
+            IEnumerator<Cell> i = now.cells.Values.GetEnumerator();
+            for (; i.MoveNext();)
+            {
+                Cell c = i.Current;
+                if (c.@ref >= 0 && remap[c.@ref] < 0)
+                {
+                    RemoveGaps(c.@ref, old, to, remap);
+                }
+            }
+            to[remap[ind]] = new Remap(now, remap);
+            return to;
+        }
+
+        /// <summary>
+        /// This class is part of the Egothor Project
+        /// </summary>
+        internal class Remap : Row
+        {
+            /**
+             * Constructor for the <see cref="Remap"/> object
+             * 
+             * @param old Description of the Parameter
+             * @param remap Description of the Parameter
+             */
+            public Remap(Row old, int[] remap)
+                        : base()
+            {
+                var i = old.cells.Keys.GetEnumerator();
+                for (; i.MoveNext();)
+                {
+                    char ch = i.Current;
+                    Cell c = old.At(ch);
+                    Cell nc;
+                    if (c.@ref >= 0)
+                    {
+                        nc = new Cell(c);
+                        nc.@ref = remap[nc.@ref];
+                    }
+                    else
+                    {
+                        nc = new Cell(c);
+                    }
+                    cells[ch] = nc;
+                }
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Row.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Row.cs b/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Row.cs
new file mode 100644
index 0000000..6fdad76
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Row.cs
@@ -0,0 +1,342 @@
+using Lucene.Net.Support;
+using System.Collections.Generic;
+using System.IO;
+
+/*
+                    Egothor Software License version 1.00
+                    Copyright (C) 1997-2004 Leo Galambos.
+                 Copyright (C) 2002-2004 "Egothor developers"
+                      on behalf of the Egothor Project.
+                             All rights reserved.
+
+   This  software  is  copyrighted  by  the "Egothor developers". If this
+   license applies to a single file or document, the "Egothor developers"
+   are the people or entities mentioned as copyright holders in that file
+   or  document.  If  this  license  applies  to the Egothor project as a
+   whole,  the  copyright holders are the people or entities mentioned in
+   the  file CREDITS. This file can be found in the same location as this
+   license in the distribution.
+
+   Redistribution  and  use  in  source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+    1. Redistributions  of  source  code  must retain the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       following disclaimer.
+    2. Redistributions  in binary form must reproduce the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       disclaimer  that  follows  these  conditions  in the documentation
+       and/or other materials provided with the distribution.
+    3. The name "Egothor" must not be used to endorse or promote products
+       derived  from  this software without prior written permission. For
+       written permission, please contact Leo.G@seznam.cz
+    4. Products  derived  from this software may not be called "Egothor",
+       nor  may  "Egothor"  appear  in  their name, without prior written
+       permission from Leo.G@seznam.cz.
+
+   In addition, we request that you include in the end-user documentation
+   provided  with  the  redistribution  and/or  in the software itself an
+   acknowledgement equivalent to the following:
+   "This product includes software developed by the Egothor Project.
+    http://egothor.sf.net/"
+
+   THIS  SOFTWARE  IS  PROVIDED  ``AS  IS''  AND ANY EXPRESSED OR IMPLIED
+   WARRANTIES,  INCLUDING,  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+   MERCHANTABILITY  AND  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+   IN  NO  EVENT  SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+   FOR   ANY   DIRECT,   INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR
+   CONSEQUENTIAL  DAMAGES  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE  GOODS  OR  SERVICES;  LOSS  OF  USE,  DATA, OR PROFITS; OR
+   BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER  IN  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+   OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+   IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   This  software  consists  of  voluntary  contributions  made  by  many
+   individuals  on  behalf  of  the  Egothor  Project  and was originally
+   created by Leo Galambos (Leo.G@seznam.cz).
+ */
+
+namespace Egothor.Stemmer
+{
+    /// <summary>
+    /// The <see cref="Row"/> class represents a row in a matrix representation of a <see cref="Trie"/>.
+    /// </summary>
+    public class Row
+    {
+        // LUCENENET NOTE: This was TreeMap in Java, which allows duplicate keys, but
+        // SortedDictionary does not. If there are issues with Stempel, check this!!
+        internal SortedDictionary<char, Cell> cells = new SortedDictionary<char, Cell>();
+        internal int uniformCnt = 0;
+        internal int uniformSkip = 0;
+
+        /// <summary>
+        /// Construct a <see cref="Row"/> object from input carried in via the given input stream.
+        /// </summary>
+        /// <param name="@is">the input stream</param>
+        /// <exception cref="IOException">if an I/O error occurs</exception>
+        public Row(IDataInput @is)
+        {
+            for (int i = @is.ReadInt(); i > 0; i--)
+            {
+                char ch = @is.ReadChar();
+                Cell c = new Cell();
+                c.cmd = @is.ReadInt();
+                c.cnt = @is.ReadInt();
+                c.@ref = @is.ReadInt();
+                c.skip = @is.ReadInt();
+                cells[ch] = c;
+            }
+        }
+
+        /// <summary>
+        /// The default constructor for the <see cref="Row"/> object.
+        /// </summary>
+        public Row() { }
+
+        /// <summary>
+        /// Construct a <see cref="Row"/> using the cells of the given <see cref="Row"/>.
+        /// </summary>
+        /// <param name="old">the <see cref="Row"/> to copy</param>
+        public Row(Row old)
+        {
+            cells = old.cells;
+        }
+
+        /// <summary>
+        /// Set the command in the <see cref="Cell"/> of the given <see cref="char"/> to the given <see cref="int"/>.
+        /// </summary>
+        /// <param name="way">the <see cref="char"/> defining the <see cref="Cell"/></param>
+        /// <param name="cmd">the new command</param>
+        public void SetCmd(char way, int cmd)
+        {
+            Cell c = At(way);
+            if (c == null)
+            {
+                c = new Cell();
+                c.cmd = cmd;
+                cells[way] = c;
+            }
+            else
+            {
+                c.cmd = cmd;
+            }
+            c.cnt = (cmd >= 0) ? 1 : 0;
+        }
+
+        /// <summary>
+        /// Set the reference to the next row in the <see cref="Cell"/> of the given <see cref="char"/> to the
+        /// given <see cref="int"/>.
+        /// </summary>
+        /// <param name="way">the <see cref="char"/> defining the <see cref="Cell"/></param>
+        /// <param name="ref">The new ref value</param>
+        public void SetRef(char way, int @ref)
+        {
+            Cell c = At(way);
+            if (c == null)
+            {
+                c = new Cell();
+                c.@ref = @ref;
+                cells[way] = c;
+            }
+            else
+            {
+                c.@ref = @ref;
+            }
+        }
+
+        /// <summary>
+        /// Return the number of cells in use.
+        /// </summary>
+        /// <returns>the number of cells in use</returns>
+        public int GetCells()
+        {
+            IEnumerator<char> i = cells.Keys.GetEnumerator();
+            int size = 0;
+            for (; i.MoveNext();)
+            {
+                char c = i.Current;
+                Cell e = At(c);
+                if (e.cmd >= 0 || e.@ref >= 0)
+                {
+                    size++;
+                }
+            }
+            return size;
+        }
+
+        /// <summary>
+        /// Return the number of references (how many transitions) to other rows.
+        /// </summary>
+        /// <returns>the number of references</returns>
+        public int GetCellsPnt()
+        {
+            IEnumerator<char> i = cells.Keys.GetEnumerator();
+            int size = 0;
+            for (; i.MoveNext();)
+            {
+                char c = i.Current;
+                Cell e = At(c);
+                if (e.@ref >= 0)
+                {
+                    size++;
+                }
+            }
+            return size;
+        }
+
+        /// <summary>
+        /// Return the number of patch commands saved in this Row.
+        /// </summary>
+        /// <returns>the number of patch commands</returns>
+        public int GetCellsVal()
+        {
+            IEnumerator<char> i = cells.Keys.GetEnumerator();
+            int size = 0;
+            for (; i.MoveNext();)
+            {
+                char c = i.Current;
+                Cell e = At(c);
+                if (e.cmd >= 0)
+                {
+                    size++;
+                }
+            }
+            return size;
+        }
+
+        /// <summary>
+        /// Return the command in the <see cref="Cell"/> associated with the given <see cref="char"/>.
+        /// </summary>
+        /// <param name="way">the <see cref="char"/> associated with the <see cref="Cell"/> holding the desired command</param>
+        /// <returns>the command</returns>
+        public int GetCmd(char way)
+        {
+            Cell c = At(way);
+            return (c == null) ? -1 : c.cmd;
+        }
+
+        /// <summary>
+        /// Return the number of patch commands were in the <see cref="Cell"/> associated with the
+        /// given <see cref="char"/> before the <see cref="Trie"/> containing this <see cref="Row"/> was reduced.
+        /// </summary>
+        /// <param name="way">the <see cref="char"/> associated with the desired <see cref="Cell"/></param>
+        /// <returns>the number of patch commands before reduction</returns>
+        public int GetCnt(char way)
+        {
+            Cell c = At(way);
+            return (c == null) ? -1 : c.cnt;
+        }
+
+        /// <summary>
+        /// Return the reference to the next <see cref="Row"/> in the <see cref="Cell"/> associated with the given
+        /// <see cref="char"/>.
+        /// </summary>
+        /// <param name="way">the <see cref="char"/> associated with the desired <see cref="Cell"/></param>
+        /// <returns>the reference, or -1 if the <see cref="Cell"/> is <c>null</c></returns>
+        public int GetRef(char way)
+        {
+            Cell c = At(way);
+            return (c == null) ? -1 : c.@ref;
+        }
+
+        /// <summary>
+        /// Write the contents of this <see cref="Row"/> to the given output stream.
+        /// </summary>
+        /// <param name="os">the output stream</param>
+        /// <exception cref="IOException">if an I/O error occurs</exception>
+        public virtual void Store(IDataOutput os)
+        {
+            os.WriteInt(cells.Count);
+            IEnumerator<char> i = cells.Keys.GetEnumerator();
+            for (; i.MoveNext();)
+            {
+                char c = i.Current;
+                Cell e = At(c);
+                if (e.cmd < 0 && e.@ref < 0)
+                {
+                    continue;
+                }
+
+                os.WriteChar(c);
+                os.WriteInt(e.cmd);
+                os.WriteInt(e.cnt);
+                os.WriteInt(e.@ref);
+                os.WriteInt(e.skip);
+            }
+        }
+
+        /// <summary>
+        /// Return the number of identical <see cref="Cell"/>s (containing patch commands) in this
+        /// Row.
+        /// </summary>
+        /// <param name="eqSkip">when set to <c>false</c> the removed patch commands are considered</param>
+        /// <returns>the number of identical <see cref="Cell"/>s, or -1 if there are (at least) two different <see cref="Cell"/>s</returns>
+        public int UniformCmd(bool eqSkip)
+        {
+            IEnumerator<Cell> i = cells.Values.GetEnumerator();
+            int ret = -1;
+            uniformCnt = 1;
+            uniformSkip = 0;
+            for (; i.MoveNext();)
+            {
+                Cell c = i.Current;
+                if (c.@ref >= 0)
+                {
+                    return -1;
+                }
+                if (c.cmd >= 0)
+                {
+                    if (ret < 0)
+                    {
+                        ret = c.cmd;
+                        uniformSkip = c.skip;
+                    }
+                    else if (ret == c.cmd)
+                    {
+                        if (eqSkip)
+                        {
+                            if (uniformSkip == c.skip)
+                            {
+                                uniformCnt++;
+                            }
+                            else
+                            {
+                                return -1;
+                            }
+                        }
+                        else
+                        {
+                            uniformCnt++;
+                        }
+                    }
+                    else
+                    {
+                        return -1;
+                    }
+                }
+            }
+            return ret;
+        }
+
+        /// <summary>
+        /// Write the contents of this <see cref="Row"/> to the <see cref="TextWriter"/>.
+        /// </summary>
+        /// <param name="out"></param>
+        public virtual void Print(TextWriter @out)
+        {
+            for (IEnumerator<char> i = cells.Keys.GetEnumerator(); i.MoveNext();)
+            {
+                char ch = i.Current;
+                Cell c = At(ch);
+                @out.Write("[" + ch + ":" + c + "]");
+            }
+            @out.WriteLine();
+        }
+
+        internal Cell At(char index)
+        {
+            return cells.ContainsKey(index) ? cells[index] : null;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Trie.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Trie.cs b/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Trie.cs
new file mode 100644
index 0000000..905f213
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/Trie.cs
@@ -0,0 +1,472 @@
+using Lucene.Net.Support;
+using System.Collections.Generic;
+using System.IO;
+
+/*
+                    Egothor Software License version 1.00
+                    Copyright (C) 1997-2004 Leo Galambos.
+                 Copyright (C) 2002-2004 "Egothor developers"
+                      on behalf of the Egothor Project.
+                             All rights reserved.
+
+   This  software  is  copyrighted  by  the "Egothor developers". If this
+   license applies to a single file or document, the "Egothor developers"
+   are the people or entities mentioned as copyright holders in that file
+   or  document.  If  this  license  applies  to the Egothor project as a
+   whole,  the  copyright holders are the people or entities mentioned in
+   the  file CREDITS. This file can be found in the same location as this
+   license in the distribution.
+
+   Redistribution  and  use  in  source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+    1. Redistributions  of  source  code  must retain the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       following disclaimer.
+    2. Redistributions  in binary form must reproduce the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       disclaimer  that  follows  these  conditions  in the documentation
+       and/or other materials provided with the distribution.
+    3. The name "Egothor" must not be used to endorse or promote products
+       derived  from  this software without prior written permission. For
+       written permission, please contact Leo.G@seznam.cz
+    4. Products  derived  from this software may not be called "Egothor",
+       nor  may  "Egothor"  appear  in  their name, without prior written
+       permission from Leo.G@seznam.cz.
+
+   In addition, we request that you include in the end-user documentation
+   provided  with  the  redistribution  and/or  in the software itself an
+   acknowledgement equivalent to the following:
+   "This product includes software developed by the Egothor Project.
+    http://egothor.sf.net/"
+
+   THIS  SOFTWARE  IS  PROVIDED  ``AS  IS''  AND ANY EXPRESSED OR IMPLIED
+   WARRANTIES,  INCLUDING,  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+   MERCHANTABILITY  AND  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+   IN  NO  EVENT  SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+   FOR   ANY   DIRECT,   INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR
+   CONSEQUENTIAL  DAMAGES  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE  GOODS  OR  SERVICES;  LOSS  OF  USE,  DATA, OR PROFITS; OR
+   BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER  IN  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+   OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+   IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   This  software  consists  of  voluntary  contributions  made  by  many
+   individuals  on  behalf  of  the  Egothor  Project  and was originally
+   created by Leo Galambos (Leo.G@seznam.cz).
+ */
+
+namespace Egothor.Stemmer
+{
+    /// <summary>
+    /// A <see cref="Trie"/> is used to store a dictionary of words and their stems.
+    /// <para>
+    /// Actually, what is stored are words with their respective patch commands. A
+    /// trie can be termed forward (keys read from left to right) or backward (keys
+    /// read from right to left). This property will vary depending on the language
+    /// for which a <see cref="Trie"/> is constructed.
+    /// </para>
+    /// </summary>
+    public class Trie
+    {
+        internal IList<Row> rows = new List<Row>();
+        internal IList<string> cmds = new List<string>();
+        internal int root;
+
+        internal bool forward = false;
+
+        /// <summary>
+        /// Constructor for the <see cref="Trie"/> object.
+        /// </summary>
+        /// <param name="is">the input stream</param>
+        /// <exception cref="IOException">if an I/O error occurs</exception>
+        public Trie(IDataInput @is)
+        {
+            forward = @is.ReadBoolean();
+            root = @is.ReadInt();
+            for (int i = @is.ReadInt(); i > 0; i--)
+            {
+                cmds.Add(@is.ReadUTF());
+            }
+            for (int i = @is.ReadInt(); i > 0; i--)
+            {
+                rows.Add(new Row(@is));
+            }
+        }
+
+        /// <summary>
+        /// Constructor for the <see cref="Trie"/> object.
+        /// </summary>
+        /// <param name="forward">set to <c>true</c></param>
+        public Trie(bool forward)
+        {
+            rows.Add(new Row());
+            root = 0;
+            this.forward = forward;
+        }
+
+        /// <summary>
+        /// Constructor for the <see cref="Trie"/> object.
+        /// </summary>
+        /// <param name="forward"><c>true</c> if read left to right, <c>false</c> if read right to left</param>
+        /// <param name="root">index of the row that is the root node</param>
+        /// <param name="cmds">the patch commands to store</param>
+        /// <param name="rows">a Vector of Vectors. Each inner Vector is a node of this <see cref="Trie"/></param>
+        public Trie(bool forward, int root, IList<string> cmds, IList<Row> rows)
+        {
+            this.rows = rows;
+            this.cmds = cmds;
+            this.root = root;
+            this.forward = forward;
+        }
+
+        /// <summary>
+        /// Gets the all attribute of the <see cref="Trie"/> object
+        /// </summary>
+        /// <param name="key">Description of the Parameter</param>
+        /// <returns>The all value</returns>
+        public virtual string[] GetAll(string key)
+        {
+            int[] res = new int[key.Length];
+            int resc = 0;
+            Row now = GetRow(root);
+            int w;
+            StrEnum e = new StrEnum(key, forward);
+            bool br = false;
+
+            for (int i = 0; i < key.Length - 1; i++)
+            {
+                char ch = e.Next();
+                w = now.GetCmd(ch);
+                if (w >= 0)
+                {
+                    int n = w;
+                    for (int j = 0; j < resc; j++)
+                    {
+                        if (n == res[j])
+                        {
+                            n = -1;
+                            break;
+                        }
+                    }
+                    if (n >= 0)
+                    {
+                        res[resc++] = n;
+                    }
+                }
+                w = now.GetRef(ch);
+                if (w >= 0)
+                {
+                    now = GetRow(w);
+                }
+                else
+                {
+                    br = true;
+                    break;
+                }
+            }
+            if (br == false)
+            {
+                w = now.GetCmd(e.Next());
+                if (w >= 0)
+                {
+                    int n = w;
+                    for (int j = 0; j < resc; j++)
+                    {
+                        if (n == res[j])
+                        {
+                            n = -1;
+                            break;
+                        }
+                    }
+                    if (n >= 0)
+                    {
+                        res[resc++] = n;
+                    }
+                }
+            }
+
+            if (resc < 1)
+            {
+                return null;
+            }
+            string[] R = new string[resc];
+            for (int j = 0; j < resc; j++)
+            {
+                R[j] = cmds[res[j]];
+            }
+            return R;
+        }
+
+        /// <summary>
+        /// Return the number of cells in this <see cref="Trie"/> object.
+        /// </summary>
+        /// <returns>the number of cells</returns>
+        public virtual int GetCells()
+        {
+            int size = 0;
+            foreach (Row row in rows)
+                size += row.GetCells();
+            return size;
+        }
+
+        /// <summary>
+        /// Gets the cellsPnt attribute of the <see cref="Trie"/> object
+        /// </summary>
+        /// <returns>The cellsPnt value</returns>
+        public virtual int GetCellsPnt()
+        {
+            int size = 0;
+            foreach (Row row in rows)
+                size += row.GetCellsPnt();
+            return size;
+        }
+
+        /// <summary>
+        /// Gets the cellsVal attribute of the <see cref="Trie"/> object
+        /// </summary>
+        /// <returns>The cellsVal value</returns>
+        public virtual int GetCellsVal()
+        {
+            int size = 0;
+            foreach (Row row in rows)
+                size += row.GetCellsVal();
+            return size;
+        }
+
+        /// <summary>
+        /// Return the element that is stored in a cell associated with the given key.
+        /// </summary>
+        /// <param name="key">the key</param>
+        /// <returns>the associated element</returns>
+        public virtual string GetFully(string key)
+        {
+            Row now = GetRow(root);
+            int w;
+            Cell c;
+            int cmd = -1;
+            StrEnum e = new StrEnum(key, forward);
+            char ch;
+            char aux;
+
+            for (int i = 0; i < key.Length;)
+            {
+                ch = e.Next();
+                i++;
+
+                c = now.At(ch);
+                if (c == null)
+                {
+                    return null;
+                }
+
+                cmd = c.cmd;
+
+                for (int skip = c.skip; skip > 0; skip--)
+                {
+                    if (i < key.Length)
+                    {
+                        aux = e.Next();
+                    }
+                    else
+                    {
+                        return null;
+                    }
+                    i++;
+                }
+
+                w = now.GetRef(ch);
+                if (w >= 0)
+                {
+                    now = GetRow(w);
+                }
+                else if (i < key.Length)
+                {
+                    return null;
+                }
+            }
+            return (cmd == -1) ? null : cmds[cmd];
+        }
+
+        /// <summary>
+        /// Return the element that is stored as last on a path associated with the
+        /// given key.
+        /// </summary>
+        /// <param name="key">the key associated with the desired element</param>
+        /// <returns>the last on path element</returns>
+        public virtual string GetLastOnPath(string key)
+        {
+            Row now = GetRow(root);
+            int w;
+            string last = null;
+            StrEnum e = new StrEnum(key, forward);
+
+            for (int i = 0; i < key.Length - 1; i++)
+            {
+                char ch = e.Next();
+                w = now.GetCmd(ch);
+                if (w >= 0)
+                {
+                    last = cmds[w];
+                }
+                w = now.GetRef(ch);
+                if (w >= 0)
+                {
+                    now = GetRow(w);
+                }
+                else
+                {
+                    return last;
+                }
+            }
+            w = now.GetCmd(e.Next());
+            return (w >= 0) ? cmds[w] : last;
+        }
+
+        /// <summary>
+        /// Return the <see cref="Row"/> at the given index.
+        /// </summary>
+        /// <param name="index">the index containing the desired <see cref="Row"/></param>
+        /// <returns>the <see cref="Row"/></returns>
+        private Row GetRow(int index)
+        {
+            if (index < 0 || index >= rows.Count)
+            {
+                return null;
+            }
+            return rows[index];
+        }
+
+        /// <summary>
+        /// Write this <see cref="Trie"/> to the given output stream.
+        /// </summary>
+        /// <param name="os">the output stream</param>
+        /// <exception cref="IOException">if an I/O error occurs</exception>
+        public virtual void Store(IDataOutput os)
+        {
+            os.WriteBoolean(forward);
+            os.WriteInt(root);
+            os.WriteInt(cmds.Count);
+            foreach (string cmd in cmds)
+                os.WriteUTF(cmd);
+
+            os.WriteInt(rows.Count);
+            foreach (Row row in rows)
+                row.Store(os);
+        }
+
+        /// <summary>
+        /// Add the given key associated with the given patch command. If either
+        /// parameter is null this method will return without executing.
+        /// </summary>
+        /// <param name="key">the key</param>
+        /// <param name="cmd">the patch command</param>
+        public virtual void Add(string key, string cmd)
+        {
+            if (key == null || cmd == null)
+            {
+                return;
+            }
+            if (cmd.Length == 0)
+            {
+                return;
+            }
+            int id_cmd = cmds.IndexOf(cmd);
+            if (id_cmd == -1)
+            {
+                id_cmd = cmds.Count;
+                cmds.Add(cmd);
+            }
+
+            int node = root;
+            Row r = GetRow(node);
+
+            StrEnum e = new StrEnum(key, forward);
+
+            for (int i = 0; i < e.Length - 1; i++)
+            {
+                char ch = e.Next();
+                node = r.GetRef(ch);
+                if (node >= 0)
+                {
+                    r = GetRow(node);
+                }
+                else
+                {
+                    node = rows.Count;
+                    Row n;
+                    rows.Add(n = new Row());
+                    r.SetRef(ch, node);
+                    r = n;
+                }
+            }
+            r.SetCmd(e.Next(), id_cmd);
+        }
+
+        /// <summary>
+        /// Remove empty rows from the given <see cref="Trie"/> and return the newly reduced <see cref="Trie"/>.
+        /// </summary>
+        /// <param name="by">the <see cref="Trie"/> to reduce</param>
+        /// <returns>newly reduced <see cref="Trie"/></returns>
+        public virtual Trie Reduce(Reduce by)
+        {
+            return by.Optimize(this);
+        }
+
+        /// <summary>
+        /// writes debugging info to the printstream
+        /// </summary>
+        public virtual void PrintInfo(TextWriter @out, string prefix)
+        {
+            @out.WriteLine(prefix + "nds " + rows.Count + " cmds " + cmds.Count
+                + " cells " + GetCells() + " valcells " + GetCellsVal() + " pntcells "
+                + GetCellsPnt());
+        }
+
+        /// <summary>
+        /// This class is part of the Egothor Project
+        /// </summary>
+        internal class StrEnum
+        {
+            private string s;
+            private int from;
+            private int by;
+
+            /// <summary>
+            /// Constructor for the <see cref="StrEnum"/> object
+            /// </summary>
+            /// <param name="s">Description of the Parameter</param>
+            /// <param name="up">Description of the Parameter</param>
+            internal StrEnum(string s, bool up)
+            {
+                this.s = s;
+                if (up)
+                {
+                    from = 0;
+                    by = 1;
+                }
+                else
+                {
+                    from = s.Length - 1;
+                    by = -1;
+                }
+            }
+
+            internal int Length
+            {
+                get
+                {
+                    return s.Length;
+                }
+            }
+
+            internal char Next()
+            {
+                char ch = s[from];
+                from += by;
+                return ch;
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Analysis.Stempel/Lucene.Net.Analysis.Stempel.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Stempel/Lucene.Net.Analysis.Stempel.csproj b/src/Lucene.Net.Analysis.Stempel/Lucene.Net.Analysis.Stempel.csproj
new file mode 100644
index 0000000..0f82c21
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Stempel/Lucene.Net.Analysis.Stempel.csproj
@@ -0,0 +1,87 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <ProjectGuid>{A76DAD88-E3A5-40F9-9114-FACD77BD8265}</ProjectGuid>
+    <OutputType>Library</OutputType>
+    <AppDesignerFolder>Properties</AppDesignerFolder>
+    <RootNamespace>Lucene.Net.Analysis</RootNamespace>
+    <AssemblyName>Lucene.Net.Analysis.Stempel</AssemblyName>
+    <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
+    <FileAlignment>512</FileAlignment>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+    <DebugSymbols>true</DebugSymbols>
+    <DebugType>full</DebugType>
+    <Optimize>false</Optimize>
+    <OutputPath>bin\Debug\</OutputPath>
+    <DefineConstants>DEBUG;TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+    <DebugType>pdbonly</DebugType>
+    <Optimize>true</Optimize>
+    <OutputPath>bin\Release\</OutputPath>
+    <DefineConstants>TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+  </PropertyGroup>
+  <ItemGroup>
+    <Reference Include="System" />
+    <Reference Include="System.Core" />
+    <Reference Include="System.Xml.Linq" />
+    <Reference Include="System.Data.DataSetExtensions" />
+    <Reference Include="Microsoft.CSharp" />
+    <Reference Include="System.Data" />
+    <Reference Include="System.Net.Http" />
+    <Reference Include="System.Xml" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="Egothor.Stemmer\Cell.cs" />
+    <Compile Include="Egothor.Stemmer\Compile.cs" />
+    <Compile Include="Egothor.Stemmer\Diff.cs" />
+    <Compile Include="Egothor.Stemmer\DiffIt.cs" />
+    <Compile Include="Egothor.Stemmer\Gener.cs" />
+    <Compile Include="Egothor.Stemmer\Lift.cs" />
+    <Compile Include="Egothor.Stemmer\MultiTrie.cs" />
+    <Compile Include="Egothor.Stemmer\MultiTrie2.cs" />
+    <Compile Include="Egothor.Stemmer\Optimizer.cs" />
+    <Compile Include="Egothor.Stemmer\Optimizer2.cs" />
+    <Compile Include="Egothor.Stemmer\Reduce.cs" />
+    <Compile Include="Egothor.Stemmer\Row.cs" />
+    <Compile Include="Egothor.Stemmer\Trie.cs" />
+    <Compile Include="Pl\PolishAnalyzer.cs" />
+    <Compile Include="Properties\AssemblyInfo.cs" />
+    <Compile Include="RectangularArrays.cs" />
+    <Compile Include="Stempel\StempelFilter.cs" />
+    <Compile Include="Stempel\StempelPolishStemFilterFactory.cs" />
+    <Compile Include="Stempel\StempelStemmer.cs" />
+  </ItemGroup>
+  <ItemGroup>
+    <EmbeddedResource Include="Pl\stemmer_20000.tbl" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\Lucene.Net.Analysis.Common\Lucene.Net.Analysis.Common.csproj">
+      <Project>{4add0bbc-b900-4715-9526-d871de8eea64}</Project>
+      <Name>Lucene.Net.Analysis.Common</Name>
+    </ProjectReference>
+    <ProjectReference Include="..\Lucene.Net.Core\Lucene.Net.csproj">
+      <Project>{5d4ad9be-1ffb-41ab-9943-25737971bf57}</Project>
+      <Name>Lucene.Net</Name>
+    </ProjectReference>
+  </ItemGroup>
+  <ItemGroup>
+    <EmbeddedResource Include="Pl\stopwords.txt" />
+  </ItemGroup>
+  <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+  <!-- To modify your build process, add your task inside one of the targets below and uncomment it. 
+       Other similar extension points exist, see Microsoft.Common.targets.
+  <Target Name="BeforeBuild">
+  </Target>
+  <Target Name="AfterBuild">
+  </Target>
+  -->
+</Project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Analysis.Stempel/Pl/PolishAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Stempel/Pl/PolishAnalyzer.cs b/src/Lucene.Net.Analysis.Stempel/Pl/PolishAnalyzer.cs
new file mode 100644
index 0000000..7cc5773
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Stempel/Pl/PolishAnalyzer.cs
@@ -0,0 +1,164 @@
+using Egothor.Stemmer;
+using Lucene.Net.Analysis.Core;
+using Lucene.Net.Analysis.Miscellaneous;
+using Lucene.Net.Analysis.Standard;
+using Lucene.Net.Analysis.Stempel;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Util;
+using System;
+using System.IO;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Pl
+{
+    /*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+    /// <summary>
+    /// <see cref="Analyzer"/> for Polish.
+    /// </summary>
+    public sealed class PolishAnalyzer : StopwordAnalyzerBase
+    {
+        private readonly CharArraySet stemExclusionSet;
+        private readonly Trie stemTable;
+
+        /// <summary>
+        /// File containing default Polish stopwords.
+        /// </summary>
+        public readonly static string DEFAULT_STOPWORD_FILE = "stopwords.txt";
+
+        /// <summary>
+        /// File containing default Polish stemmer table.
+        /// </summary>
+        public readonly static string DEFAULT_STEMMER_FILE = "stemmer_20000.tbl";
+
+        /// <summary>
+        /// Returns an unmodifiable instance of the default stop words set.
+        /// </summary>
+        /// <returns>default stop words set.</returns>
+        public static CharArraySet GetDefaultStopSet()
+        {
+            return DefaultsHolder.DEFAULT_STOP_SET;
+        }
+
+        /// <summary>
+        /// Returns an unmodifiable instance of the default stemmer table.
+        /// </summary>
+        public static Trie GetDefaultTable()
+        {
+            return DefaultsHolder.DEFAULT_TABLE;
+        }
+
+        /// <summary>
+        /// Atomically loads the <see cref="DEFAULT_STOP_SET"/> in a lazy fashion once the outer class 
+        /// accesses the static final set the first time.;
+        /// </summary>
+        private class DefaultsHolder
+        {
+            internal static readonly CharArraySet DEFAULT_STOP_SET;
+            internal static readonly Trie DEFAULT_TABLE;
+
+            static DefaultsHolder()
+            {
+                try
+                {
+                    DEFAULT_STOP_SET = WordlistLoader.GetWordSet(IOUtils.GetDecodingReader(typeof(PolishAnalyzer),
+                        typeof(PolishAnalyzer).Namespace + "." + DEFAULT_STOPWORD_FILE, Encoding.UTF8), "#",
+#pragma warning disable 612, 618
+                        LuceneVersion.LUCENE_CURRENT);
+#pragma warning restore 612, 618
+                }
+                catch (IOException ex)
+                {
+                    // default set should always be present as it is part of the
+                    // distribution (embedded resource)
+                    throw new SystemException("Unable to load default stopword set", ex);
+                }
+
+                try
+                {
+                    DEFAULT_TABLE = StempelStemmer.Load(typeof(PolishAnalyzer).Assembly.GetManifestResourceStream(
+                        typeof(PolishAnalyzer).Namespace + "." + DEFAULT_STEMMER_FILE));
+                }
+                catch (IOException ex)
+                {
+                    // default set should always be present as it is part of the
+                    // distribution (embedded resource)
+                    throw new SystemException("Unable to load default stemming tables", ex);
+                }
+            }
+        }
+
+        /// <summary>
+        /// Builds an analyzer with the default stop words: <see cref="DEFAULT_STOPWORD_FILE"/>.
+        /// </summary>
+        /// <param name="matchVersion">lucene compatibility version</param>
+        public PolishAnalyzer(LuceneVersion matchVersion)
+            : this(matchVersion, DefaultsHolder.DEFAULT_STOP_SET)
+        {
+        }
+
+        /// <summary>
+        /// Builds an analyzer with the given stop words.
+        /// </summary>
+        /// <param name="matchVersion">lucene compatibility version</param>
+        /// <param name="stopwords">a stopword set</param>
+        public PolishAnalyzer(LuceneVersion matchVersion, CharArraySet stopwords)
+            : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+        {
+        }
+
+        /// <summary>
+        /// Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+        /// provided this analyzer will add a <see cref="SetKeywordMarkerFilter"/> before
+        /// stemming.
+        /// </summary>
+        /// <param name="matchVersion">lucene compatibility version</param>
+        /// <param name="stopwords">a stopword set</param>
+        /// <param name="stemExclusionSet">a set of terms not to be stemmed</param>
+        public PolishAnalyzer(LuceneVersion matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet)
+            : base(matchVersion, stopwords)
+        {
+            this.stemTable = DefaultsHolder.DEFAULT_TABLE;
+            this.stemExclusionSet = CharArraySet.UnmodifiableSet(CharArraySet.Copy(
+                matchVersion, stemExclusionSet));
+        }
+
+        /// <summary>
+        /// Creates a <see cref="Analyzer.TokenStreamComponents"/>
+        /// which tokenizes all the text in the provided <see cref="TextReader"/>.
+        /// </summary>
+        /// <returns>
+        /// A <see cref="Analyzer.TokenStreamComponents"/> built from an <see cref="StandardTokenizer"/>
+        /// filtered with <see cref="StandardFilter"/>, <see cref="LowerCaseFilter"/>, <see cref="StopFilter"/>, 
+        /// <see cref="SetKeywordMarkerFilter"/> if a stem excusion set is provided and <see cref="StempelFilter"/>.
+        /// </returns>
+        public override TokenStreamComponents CreateComponents(string fieldName,
+            TextReader reader)
+        {
+            Tokenizer source = new StandardTokenizer(matchVersion, reader);
+            TokenStream result = new StandardFilter(matchVersion, source);
+            result = new LowerCaseFilter(matchVersion, result);
+            result = new StopFilter(matchVersion, result, stopwords);
+            if (stemExclusionSet.Any())
+                result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+            result = new StempelFilter(result, new StempelStemmer(stemTable));
+            return new TokenStreamComponents(source, result);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Analysis.Stempel/Pl/stemmer_20000.tbl
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Stempel/Pl/stemmer_20000.tbl b/src/Lucene.Net.Analysis.Stempel/Pl/stemmer_20000.tbl
new file mode 100644
index 0000000..64c89a9
Binary files /dev/null and b/src/Lucene.Net.Analysis.Stempel/Pl/stemmer_20000.tbl differ

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Analysis.Stempel/Pl/stopwords.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Stempel/Pl/stopwords.txt b/src/Lucene.Net.Analysis.Stempel/Pl/stopwords.txt
new file mode 100644
index 0000000..167e9e0
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Stempel/Pl/stopwords.txt
@@ -0,0 +1,186 @@
+# This file was created from the carrot2 project and is distributed under the BSD license.
+# See http://project.carrot2.org/license.html
+# Also see http://www.opensource.org/licenses/bsd-license.html
+# From trunk/core/carrot2-util-text/src-resources/stopwords.pl
+vol
+o.o.
+mgr
+godz
+zł
+www
+pl
+ul
+tel
+hab
+prof
+inż
+dr
+i
+u
+aby
+albo
+ale
+ani
+aż
+bardzo
+bez
+bo
+bowiem
+by
+byli
+bym
+był
+była
+było
+były
+być
+będzie
+będą
+chce
+choć
+co
+coraz
+coś
+czy
+czyli
+często
+dla
+do
+gdy
+gdyby
+gdyż
+gdzie
+go
+ich
+im
+inne
+iż
+ja
+jak
+jakie
+jako
+je
+jednak
+jednym
+jedynie
+jego
+jej
+jest
+jeszcze
+jeśli
+jeżeli
+już
+ją
+kiedy
+kilku
+kto
+która
+które
+którego
+której
+który
+których
+którym
+którzy
+lat
+lecz
+lub
+ma
+mają
+mamy
+mi
+miał
+mimo
+mnie
+mogą
+może
+można
+mu
+musi
+na
+nad
+nam
+nas
+nawet
+nic
+nich
+nie
+niej
+nim
+niż
+no
+nowe
+np
+nr
+o
+od
+ok
+on
+one
+oraz
+pan
+po
+pod
+ponad
+ponieważ
+poza
+przed
+przede
+przez
+przy
+raz
+razie
+roku
+również
+się
+sobie
+sposób
+swoje
+są
+ta
+tak
+takich
+takie
+także
+tam
+te
+tego
+tej
+temu
+ten
+teraz
+też
+to
+trzeba
+tu
+tych
+tylko
+tym
+tys
+tzw
+tę
+w
+we
+wie
+więc
+wszystko
+wśród
+właśnie
+z
+za
+zaś
+ze
+że
+żeby
+ii
+iii
+iv
+vi
+vii
+viii
+ix
+xi
+xii
+xiii
+xiv
+xv

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Analysis.Stempel/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Stempel/Properties/AssemblyInfo.cs b/src/Lucene.Net.Analysis.Stempel/Properties/AssemblyInfo.cs
new file mode 100644
index 0000000..56385ee
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Stempel/Properties/AssemblyInfo.cs
@@ -0,0 +1,39 @@
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following 
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("Lucene.Net.Analysis.Stempel")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("")]
+[assembly: AssemblyProduct("Lucene.Net.Analysis.Stempel")]
+[assembly: AssemblyCopyright("Copyright ©  2016")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible 
+// to COM components.  If you need to access a type in this assembly from 
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("a76dad88-e3a5-40f9-9114-facd77bd8265")]
+
+// for testing
+[assembly: InternalsVisibleTo("Lucene.Net.Tests.Analysis.Stempel")]
+
+// Version information for an assembly consists of the following four values:
+//
+//      Major Version
+//      Minor Version 
+//      Build Number
+//      Revision
+//
+// You can specify all the values or you can default the Build and Revision Numbers 
+// by using the '*' as shown below:
+// [assembly: AssemblyVersion("1.0.*")]
+[assembly: AssemblyVersion("1.0.0.0")]
+[assembly: AssemblyFileVersion("1.0.0.0")]

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Analysis.Stempel/RectangularArrays.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Stempel/RectangularArrays.cs b/src/Lucene.Net.Analysis.Stempel/RectangularArrays.cs
new file mode 100644
index 0000000..25acdf2
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Stempel/RectangularArrays.cs
@@ -0,0 +1,52 @@
+//----------------------------------------------------------------------------------------
+//	Copyright © 2007 - 2013 Tangible Software Solutions Inc.
+//	this class can be used by anyone provided that the copyright notice remains intact.
+//
+//	this class provides the logic to simulate Java rectangular arrays, which are jagged
+//	arrays with inner arrays of the same length. A size of -1 indicates unknown length.
+//----------------------------------------------------------------------------------------
+
+using Lucene.Net.Util;
+
+internal static partial class RectangularArrays
+{
+    internal static int[][] ReturnRectangularIntArray(int Size1, int Size2)
+    {
+        int[][] Array;
+        if (Size1 > -1)
+        {
+            Array = new int[Size1][];
+            if (Size2 > -1)
+            {
+                for (int Array1 = 0; Array1 < Size1; Array1++)
+                {
+                    Array[Array1] = new int[Size2];
+                }
+            }
+        }
+        else
+            Array = null;
+
+        return Array;
+    }
+
+    internal static BytesRef[][] ReturnRectangularBytesRefArray(int Size1, int Size2)
+    {
+        BytesRef[][] Array;
+        if (Size1 > -1)
+        {
+            Array = new BytesRef[Size1][];
+            if (Size2 > -1)
+            {
+                for (int Array1 = 0; Array1 < Size1; Array1++)
+                {
+                    Array[Array1] = new BytesRef[Size2];
+                }
+            }
+        }
+        else
+            Array = null;
+
+        return Array;
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Analysis.Stempel/Stempel/StempelFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Stempel/Stempel/StempelFilter.cs b/src/Lucene.Net.Analysis.Stempel/Stempel/StempelFilter.cs
new file mode 100644
index 0000000..f2964ea
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Stempel/Stempel/StempelFilter.cs
@@ -0,0 +1,91 @@
+using Lucene.Net.Analysis.Tokenattributes;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Stempel
+{
+    /*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+    /// <summary>
+    /// Transforms the token stream as per the stemming algorithm.
+    /// <para>
+    /// Note: the input to the stemming filter must already be in lower case, so you
+    /// will need to use <see cref="Analysis.Core.LowerCaseFilter"/> or <see cref="Analysis.Core.LowerCaseTokenizer"/> farther down the
+    /// <see cref="Tokenizer"/> chain in order for this to work properly!
+    /// </para>
+    /// </summary>
+    public sealed class StempelFilter : TokenFilter
+    {
+        private readonly ICharTermAttribute termAtt;
+        private readonly IKeywordAttribute keywordAtt;
+        private readonly StempelStemmer stemmer;
+        private readonly int minLength;
+
+        /// <summary>
+        /// Minimum length of input words to be processed. Shorter words are returned
+        /// unchanged.
+        /// </summary>
+        public static readonly int DEFAULT_MIN_LENGTH = 3;
+
+        /// <summary>
+        /// Create filter using the supplied stemming table.
+        /// </summary>
+        /// <param name="in">input token stream</param>
+        /// <param name="stemmer">stemmer</param>
+        public StempelFilter(TokenStream @in, StempelStemmer stemmer)
+            : this(@in, stemmer, DEFAULT_MIN_LENGTH)
+        {
+        }
+
+        /// <summary>
+        /// Create filter using the supplied stemming table.
+        /// </summary>
+        /// <param name="in">input token stream</param>
+        /// <param name="stemmer">stemmer</param>
+        /// <param name="minLength">For performance reasons words shorter than minLength 
+        /// characters are not processed, but simply returned.</param>
+        public StempelFilter(TokenStream @in, StempelStemmer stemmer, int minLength)
+            : base(@in)
+        {
+            this.stemmer = stemmer;
+            this.minLength = minLength;
+            this.termAtt = AddAttribute<ICharTermAttribute>();
+            this.keywordAtt = AddAttribute<IKeywordAttribute>();
+        }
+
+        /// <summary>
+        /// Returns the next input <see cref="Token"/>, after being stemmed
+        /// </summary>
+        public override bool IncrementToken()
+        {
+            if (input.IncrementToken())
+            {
+                if (!keywordAtt.Keyword && termAtt.Length > minLength)
+                {
+                    StringBuilder sb = stemmer.Stem(termAtt.ToString());
+                    if (sb != null) // if we can't stem it, return unchanged
+                        termAtt.SetEmpty().Append(sb);
+                }
+                return true;
+            }
+            else
+            {
+                return false;
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Analysis.Stempel/Stempel/StempelPolishStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Stempel/Stempel/StempelPolishStemFilterFactory.cs b/src/Lucene.Net.Analysis.Stempel/Stempel/StempelPolishStemFilterFactory.cs
new file mode 100644
index 0000000..759f403
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Stempel/Stempel/StempelPolishStemFilterFactory.cs
@@ -0,0 +1,48 @@
+using Lucene.Net.Analysis.Pl;
+using Lucene.Net.Analysis.Util;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+
+namespace Lucene.Net.Analysis.Stempel
+{
+    /*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+    /// <summary>
+    /// Factory for <see cref="StempelFilter"/> using a Polish stemming table.
+    /// </summary>
+    public class StempelPolishStemFilterFactory : TokenFilterFactory
+    {
+        /// <summary>
+        /// Creates a new <see cref="StempelPolishStemFilterFactory"/>
+        /// </summary>
+        public StempelPolishStemFilterFactory(IDictionary<string, string> args)
+            : base(args)
+        {
+            if (args.Any())
+            {
+                throw new ArgumentException("Unknown parameters: " + args);
+            }
+        }
+
+        public override TokenStream Create(TokenStream input)
+        {
+            return new StempelFilter(input, new StempelStemmer(PolishAnalyzer.GetDefaultTable()));
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Analysis.Stempel/Stempel/StempelStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Stempel/Stempel/StempelStemmer.cs b/src/Lucene.Net.Analysis.Stempel/Stempel/StempelStemmer.cs
new file mode 100644
index 0000000..43e544a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Stempel/Stempel/StempelStemmer.cs
@@ -0,0 +1,105 @@
+using Egothor.Stemmer;
+using Lucene.Net.Support;
+using System.IO;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Stempel
+{
+    /*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+    /// <summary>
+    /// Stemmer class is a convenient facade for other stemmer-related classes. The
+    /// core stemming algorithm and its implementation is taken verbatim from the
+    /// Egothor project ( <a href="http://www.egothor.org">www.egothor.org </a>).
+    /// <para>
+    /// Even though the stemmer tables supplied in the distribution package are built
+    /// for Polish language, there is nothing language-specific here.
+    /// </para>
+    /// </summary>
+    public class StempelStemmer
+    {
+        private Trie stemmer = null;
+        private StringBuilder buffer = new StringBuilder();
+
+        /// <summary>
+        /// Create a Stemmer using selected stemmer table
+        /// </summary>
+        /// <param name="stemmerTable">stemmer table.</param>
+        public StempelStemmer(Stream stemmerTable)
+            : this(Load(stemmerTable))
+        {
+        }
+
+        /// <summary>
+        /// Create a Stemmer using pre-loaded stemmer table
+        /// </summary>
+        /// <param name="stemmer">pre-loaded stemmer table</param>
+        public StempelStemmer(Trie stemmer)
+        {
+            this.stemmer = stemmer;
+        }
+
+        /// <summary>
+        /// Load a stemmer table from an inputstream.
+        /// </summary>
+        public static Trie Load(Stream stemmerTable)
+        {
+            DataInputStream @in = null;
+            try
+            {
+                @in = new DataInputStream(stemmerTable);
+                string method = @in.ReadUTF().ToUpperInvariant();
+                if (method.IndexOf('M') < 0)
+                {
+                    return new Trie(@in);
+                }
+                else
+                {
+                    return new MultiTrie2(@in);
+                }
+            }
+            finally
+            {
+                @in.Dispose();
+            }
+        }
+
+        /// <summary>
+        /// Stem a word.
+        /// </summary>
+        /// <param name="word">input word to be stemmed.</param>
+        /// <returns>stemmed word, or null if the stem could not be generated.</returns>
+        public StringBuilder Stem(string word)
+        {
+            string cmd = stemmer.GetLastOnPath(word);
+
+            if (cmd == null)
+                return null;
+
+            buffer.Length = 0;
+            buffer.Append(word);
+
+            Diff.Apply(buffer, cmd);
+
+            if (buffer.Length > 0)
+                return buffer;
+            else
+                return null;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Core/Lucene.Net.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Lucene.Net.csproj b/src/Lucene.Net.Core/Lucene.Net.csproj
index 75817c5..39bf69e 100644
--- a/src/Lucene.Net.Core/Lucene.Net.csproj
+++ b/src/Lucene.Net.Core/Lucene.Net.csproj
@@ -626,12 +626,16 @@
     <Compile Include="Support\ConcurrentHashMapWrapper.cs" />
     <Compile Include="Support\ConcurrentHashSet.cs" />
     <Compile Include="Support\CultureContext.cs" />
+    <Compile Include="Support\DataInputStream.cs" />
+    <Compile Include="Support\DataOutputStream.cs" />
     <Compile Include="Support\ErrorHandling.cs" />
     <Compile Include="Support\FileStreamExtensions.cs" />
     <Compile Include="Support\HashCodeMerge.cs" />
     <Compile Include="Support\ICallable.cs" />
     <Compile Include="Support\ICharSequence.cs" />
     <Compile Include="Support\ICompletionService.cs" />
+    <Compile Include="Support\IDataInput.cs" />
+    <Compile Include="Support\IDataOutput.cs" />
     <Compile Include="Support\IdentityComparer.cs" />
     <Compile Include="Support\IdentityHashMap.cs" />
     <Compile Include="Support\IdentityHashSet.cs" />

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Core/Support/DataInputStream.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Support/DataInputStream.cs b/src/Lucene.Net.Core/Support/DataInputStream.cs
new file mode 100644
index 0000000..30dc6a6
--- /dev/null
+++ b/src/Lucene.Net.Core/Support/DataInputStream.cs
@@ -0,0 +1,323 @@
+using System;
+using System.IO;
+
+namespace Lucene.Net.Support
+{
+    /// <summary>
+    /// Java's DataInputStream is similar to .NET's BinaryReader. However, it reads
+    /// using a modified UTF-8 format that cannot be read using BinaryReader.
+    /// This is a port of DataInputStream that is fully compatible with Java's DataOutputStream.
+    /// </summary>
+    public class DataInputStream : IDataInput, IDisposable
+    {
+        private readonly Stream @in;
+
+        /// <summary>
+        /// Creates a DataInputStream that uses the specified
+        /// underlying InputStream.
+        /// </summary>
+        /// <param name="in">the specified input stream</param>
+        public DataInputStream(Stream @in)
+        {
+            this.@in = @in;
+        }
+
+        /// <summary>
+        /// working arrays initialized on demand by readUTF
+        /// </summary>
+        private byte[] bytearr = new byte[80];
+        private char[] chararr = new char[80];
+
+        public int Read(byte[] b)
+        {
+            return @in.Read(b, 0, b.Length);
+        }
+
+        public int Read(byte[] b, int off, int len)
+        {
+            return @in.Read(b, off, len);
+        }
+
+        public void ReadFully(byte[] b)
+        {
+            ReadFully(b, 0, b.Length);
+        }
+
+        public void ReadFully(byte[] b, int off, int len)
+        {
+            if (len < 0)
+                throw new IndexOutOfRangeException();
+            int n = 0;
+            while (n < len)
+            {
+                int count = @in.Read(b, off + n, len - n);
+                if (count == 0)
+                    throw new EndOfStreamException();
+                n += count;
+            }
+        }
+
+        public int SkipBytes(int n)
+        {
+            int total = 0;
+            int cur = 0;
+
+            while ((total < n) && ((cur = (int)@in.Seek(n - total, SeekOrigin.Begin)) > 0))
+            {
+                total += cur;
+            }
+
+            return total;
+        }
+
+        public bool ReadBoolean()
+        {
+            int ch = @in.ReadByte();
+            if (ch < 0)
+                throw new EndOfStreamException();
+            return (ch != 0);
+        }
+
+        public byte ReadByte()
+        {
+            int ch = @in.ReadByte();
+            if (ch < 0)
+                throw new EndOfStreamException();
+            return (byte)(ch);
+        }
+
+        public int ReadUnsignedByte()
+        {
+            int ch = @in.ReadByte();
+            if (ch < 0)
+                throw new EndOfStreamException();
+            return ch;
+        }
+
+        public short ReadShort()
+        {
+            int ch1 = @in.ReadByte();
+            int ch2 = @in.ReadByte();
+            if ((ch1 | ch2) < 0)
+                throw new EndOfStreamException();
+            return (short)((ch1 << 8) + (ch2 << 0));
+        }
+
+        public int ReadUnsignedShort()
+        {
+            int ch1 = @in.ReadByte();
+            int ch2 = @in.ReadByte();
+            if ((ch1 | ch2) < 0)
+                throw new EndOfStreamException();
+            return (ch1 << 8) + (ch2 << 0);
+        }
+
+        public char ReadChar()
+        {
+            int ch1 = @in.ReadByte();
+            int ch2 = @in.ReadByte();
+            if ((ch1 | ch2) < 0)
+                throw new EndOfStreamException();
+            return (char)((ch1 << 8) + (ch2 << 0));
+        }
+
+        public int ReadInt()
+        {
+            int ch1 = @in.ReadByte();
+            int ch2 = @in.ReadByte();
+            int ch3 = @in.ReadByte();
+            int ch4 = @in.ReadByte();
+            if ((ch1 | ch2 | ch3 | ch4) < 0)
+                throw new EndOfStreamException();
+            return ((ch1 << 24) + (ch2 << 16) + (ch3 << 8) + (ch4 << 0));
+        }
+
+        private byte[] readBuffer = new byte[8];
+
+        public long ReadLong()
+        {
+            ReadFully(readBuffer, 0, 8);
+            return (((long)readBuffer[0] << 56) +
+                    ((long)(readBuffer[1] & 255) << 48) +
+                    ((long)(readBuffer[2] & 255) << 40) +
+                    ((long)(readBuffer[3] & 255) << 32) +
+                    ((long)(readBuffer[4] & 255) << 24) +
+                    ((readBuffer[5] & 255) << 16) +
+                    ((readBuffer[6] & 255) << 8) +
+                    ((readBuffer[7] & 255) << 0));
+        }
+
+        public float ReadFloat()
+        {
+            return Number.IntBitsToFloat(ReadInt());
+        }
+
+        public double ReadDouble()
+        {
+            throw new NotImplementedException();
+            //return Number.LongBitsToDouble(ReadLong());
+        }
+
+        private char[] lineBuffer;
+
+        [Obsolete]
+        public string ReadLine()
+        {
+            char[] buf = lineBuffer;
+
+            if (buf == null)
+            {
+                buf = lineBuffer = new char[128];
+            }
+
+            int room = buf.Length;
+            int offset = 0;
+            int c;
+
+            while (true)
+            {
+                switch (c = @in.ReadByte())
+                {
+                    case -1:
+                    case '\n':
+                        goto loop;
+
+                    case '\r':
+                        int c2 = @in.ReadByte();
+                        if ((c2 != '\n') && (c2 != -1))
+                        {
+                            using (StreamReader reader = new StreamReader(@in))
+                            {
+                                c2 = reader.Peek();
+                            }
+                            // http://stackoverflow.com/a/8021738/181087
+                            //if (!(in is PushbackInputStream)) {
+                            //    this.in = new PushbackInputStream(in);
+                            //}
+                            //((PushbackInputStream)in).unread(c2);
+                        }
+                        goto loop;
+
+                    default:
+                        if (--room < 0)
+                        {
+                            buf = new char[offset + 128];
+                            room = buf.Length - offset - 1;
+                            System.Array.Copy(lineBuffer, 0, buf, 0, offset);
+                            lineBuffer = buf;
+                        }
+                        buf[offset++] = (char)c;
+                        break;
+                }
+            }
+            loop:
+            if ((c == -1) && (offset == 0))
+            {
+                return null;
+            }
+            return new string(buf, 0, offset);
+        }
+
+        public string ReadUTF()
+        {
+            return ReadUTF(this);
+        }
+
+        public static string ReadUTF(IDataInput @in)
+        {
+            int utflen = @in.ReadUnsignedShort();
+            byte[] bytearr = null;
+            char[] chararr = null;
+            if (@in is DataInputStream)
+            {
+                DataInputStream dis = (DataInputStream)@in;
+                if (dis.bytearr.Length < utflen)
+                {
+                    dis.bytearr = new byte[utflen * 2];
+                    dis.chararr = new char[utflen * 2];
+                }
+                chararr = dis.chararr;
+                bytearr = dis.bytearr;
+            }
+            else
+            {
+                bytearr = new byte[utflen];
+                chararr = new char[utflen];
+            }
+
+            int c, char2, char3;
+            int count = 0;
+            int chararr_count = 0;
+
+            @in.ReadFully(bytearr, 0, utflen);
+
+            while (count < utflen)
+            {
+                c = (int)bytearr[count] & 0xff;
+                if (c > 127) break;
+                count++;
+                chararr[chararr_count++] = (char)c;
+            }
+
+            while (count < utflen)
+            {
+                c = (int)bytearr[count] & 0xff;
+                switch (c >> 4)
+                {
+                    case 0:
+                    case 1:
+                    case 2:
+                    case 3:
+                    case 4:
+                    case 5:
+                    case 6:
+                    case 7:
+                        /* 0xxxxxxx*/
+                        count++;
+                        chararr[chararr_count++] = (char)c;
+                        break;
+                    case 12:
+                    case 13:
+                        /* 110x xxxx   10xx xxxx*/
+                        count += 2;
+                        if (count > utflen)
+                            throw new FormatException(
+                                "malformed input: partial character at end");
+                        char2 = (int)bytearr[count - 1];
+                        if ((char2 & 0xC0) != 0x80)
+                            throw new FormatException(
+                                "malformed input around byte " + count);
+                        chararr[chararr_count++] = (char)(((c & 0x1F) << 6) |
+                                                        (char2 & 0x3F));
+                        break;
+                    case 14:
+                        /* 1110 xxxx  10xx xxxx  10xx xxxx */
+                        count += 3;
+                        if (count > utflen)
+                            throw new FormatException(
+                                "malformed input: partial character at end");
+                        char2 = (int)bytearr[count - 2];
+                        char3 = (int)bytearr[count - 1];
+                        if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80))
+                            throw new FormatException(
+                                "malformed input around byte " + (count - 1));
+                        chararr[chararr_count++] = (char)(((c & 0x0F) << 12) |
+                                                        ((char2 & 0x3F) << 6) |
+                                                        ((char3 & 0x3F) << 0));
+                        break;
+                    default:
+                        /* 10xx xxxx,  1111 xxxx */
+                        throw new FormatException(
+                            "malformed input around byte " + count);
+                }
+            }
+            // The number of chars produced may be less than utflen
+            return new string(chararr, 0, chararr_count);
+        }
+
+        public void Dispose()
+        {
+            @in.Dispose();
+        }
+    }
+}


Mime
View raw message