lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nightowl...@apache.org
Subject [47/50] [abbrv] lucenenet git commit: Ported Analysis.Stempel + tests (closes #190)
Date Sun, 23 Oct 2016 13:02:33 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Core/Support/DataOutputStream.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Support/DataOutputStream.cs b/src/Lucene.Net.Core/Support/DataOutputStream.cs
new file mode 100644
index 0000000..518dba7
--- /dev/null
+++ b/src/Lucene.Net.Core/Support/DataOutputStream.cs
@@ -0,0 +1,256 @@
+using System;
+using System.IO;
+using System.Runtime.CompilerServices;
+
+namespace Lucene.Net.Support
+{
+    /// <summary>
+    /// Java's DataOutputStream is similar to .NET's BinaryWriter. However, it writes
+    /// in a modified UTF-8 format that cannot be read (or duplicated) using BinaryWriter.
+    /// This is a port of DataOutputStream that is fully compatible with Java's DataInputStream.
+    /// </summary>
+    public class DataOutputStream : IDataOutput, IDisposable
+    {
+        
+        /// <summary>
+        /// The number of bytes written to the data output stream so far.
+        /// If this counter overflows, it will be wrapped to <see cref="int.MaxValue"/>.
+        /// </summary>
+        protected int written;
+
+        /// <summary>
+        /// bytearr is initialized on demand by writeUTF
+        /// </summary>
+        private byte[] bytearr = null;
+
+
+        private readonly Stream @out;
+
+        /// <summary>
+        /// Creates a new data output stream to write data to the specified
+        /// underlying output stream. The counter <code>written</code> is
+        /// set to zero.
+        /// </summary>
+        /// <param name="out">the underlying output stream, to be saved for later use.</param>
+        public DataOutputStream(Stream @out)
+        {
+            this.@out = @out;
+        }
+
+        /// <summary>
+        /// Increases the written counter by the specified value
+        /// until it reaches <see cref="int.MaxValue"/>.
+        /// </summary>
+        private void IncCount(int value)
+        {
+            int temp = written + value;
+            if (temp < 0)
+            {
+                temp = int.MaxValue;
+            }
+            written = temp;
+        }
+
+        /// <summary>
+        /// Writes the specified byte (the low eight bits of the argument
+        /// <code>b</code>) to the underlying output stream.If no exception
+        /// is thrown, the counter<code>written</code> is incremented by
+        /// <code>1</code>.
+        /// </summary>
+        /// <param name="b">the <code>byte</code> to be written.</param>
+        [MethodImpl(MethodImplOptions.Synchronized)]
+        public virtual void Write(int b) 
+        {
+            @out.WriteByte((byte)b);
+            IncCount(1);
+        }
+
+        [MethodImpl(MethodImplOptions.Synchronized)]
+        public virtual void Write(byte[] b, int off, int len)
+        {
+            @out.Write(b, off, len);
+            IncCount(len);
+        }
+
+        public virtual void Flush() 
+        {
+            @out.Flush();
+        }
+
+        public void WriteBoolean(bool v)
+        {
+            @out.WriteByte((byte)(v ? 1 : 0));
+            IncCount(1);
+        }
+
+        public void WriteByte(int v)
+        {
+            @out.WriteByte((byte)v);
+            IncCount(1);
+        }
+
+        public void WriteShort(int v)
+        {
+            @out.WriteByte((byte)((int)((uint)v >> 8) & 0xFF));
+            @out.WriteByte((byte)((int)((uint)v >> 0) & 0xFF));
+            IncCount(2);
+        }
+
+        public void WriteChar(int v)
+        {
+            @out.WriteByte((byte)((int)((uint)v >> 8) & 0xFF));
+            @out.WriteByte((byte)((int)((uint)v >> 0) & 0xFF));
+            IncCount(2);
+        }
+
+        public void WriteInt(int v)
+        {
+            @out.WriteByte((byte)(int)(((uint)v >> 24) & 0xFF));
+            @out.WriteByte((byte)(int)(((uint)v >> 16) & 0xFF));
+            @out.WriteByte((byte)(int)(((uint)v >>  8) & 0xFF));
+            @out.WriteByte((byte)(int)(((uint)v >>  0) & 0xFF));
+            IncCount(4);
+        }
+
+        private byte[] writeBuffer = new byte[8];
+
+        public void WriteLong(long v)
+        {
+            writeBuffer[0] = (byte)(long)((ulong)v >> 56);
+            writeBuffer[1] = (byte)(long)((ulong)v >> 48);
+            writeBuffer[2] = (byte)(long)((ulong)v >> 40);
+            writeBuffer[3] = (byte)(long)((ulong)v >> 32);
+            writeBuffer[4] = (byte)(long)((ulong)v >> 24);
+            writeBuffer[5] = (byte)(long)((ulong)v >> 16);
+            writeBuffer[6] = (byte)(long)((ulong)v >> 8);
+            writeBuffer[7] = (byte)(long)((ulong)v >> 0);
+            @out.Write(writeBuffer, 0, 8);
+            IncCount(8);
+        }
+
+        public void WriteFloat(float v)
+        {
+            WriteInt(Number.FloatToIntBits(v));
+        }
+
+        public void WriteDouble(double v)
+        {
+            WriteLong(Number.DoubleToLongBits(v));
+        }
+
+        public void WriteBytes(string s)
+        {
+            int len = s.Length;
+            for (int i = 0; i < len; i++)
+            {
+                @out.WriteByte((byte)s[i]);
+            }
+            IncCount(len);
+        }
+
+        public void WriteChars(string s)
+        {
+            int len = s.Length;
+            for (int i = 0; i < len; i++)
+            {
+                int v = s[i];
+                @out.WriteByte((byte)(int)(((uint)v >> 8) & 0xFF));
+                @out.WriteByte((byte)(int)(((uint)v >> 0) & 0xFF));
+            }
+            IncCount(len * 2);
+        }
+
+        public void WriteUTF(string str) 
+        {
+            WriteUTF(str, this);
+        }
+
+        internal static int WriteUTF(string str, IDataOutput @out)
+        {
+            int strlen = str.Length;
+            int utflen = 0;
+            int c, count = 0;
+
+            /* use charAt instead of copying String to char array */
+            for (int i = 0; i < strlen; i++)
+            {
+                c = str[i];
+                if ((c >= 0x0001) && (c <= 0x007F))
+                {
+                    utflen++;
+                }
+                else if (c > 0x07FF)
+                {
+                    utflen += 3;
+                }
+                else
+                {
+                    utflen += 2;
+                }
+            }
+
+            if (utflen > 65535)
+                throw new FormatException(
+                    "encoded string too long: " + utflen + " bytes");
+
+            byte[] bytearr = null;
+            if (@out is DataOutputStream) {
+                DataOutputStream dos = (DataOutputStream)@out;
+                if (dos.bytearr == null || (dos.bytearr.Length < (utflen + 2)))
+                    dos.bytearr = new byte[(utflen * 2) + 2];
+                bytearr = dos.bytearr;
+            } else {
+                bytearr = new byte[utflen + 2];
+            }
+
+            bytearr[count++] = (byte)(int)(((uint)utflen >> 8) & 0xFF);
+            bytearr[count++] = (byte)(int)(((uint)utflen >> 0) & 0xFF);
+
+            int i2 = 0;
+            for (i2 = 0; i2 < strlen; i2++)
+            {
+                c = str[i2];
+                if (!((c >= 0x0001) && (c <= 0x007F))) break;
+                bytearr[count++] = (byte)c;
+            }
+
+            for (; i2 < strlen; i2++)
+            {
+                c = str[i2];
+                if ((c >= 0x0001) && (c <= 0x007F))
+                {
+                    bytearr[count++] = (byte)c;
+
+                }
+                else if (c > 0x07FF)
+                {
+                    bytearr[count++] = (byte)(0xE0 | ((c >> 12) & 0x0F));
+                    bytearr[count++] = (byte)(0x80 | ((c >> 6) & 0x3F));
+                    bytearr[count++] = (byte)(0x80 | ((c >> 0) & 0x3F));
+                }
+                else
+                {
+                    bytearr[count++] = (byte)(0xC0 | ((c >> 6) & 0x1F));
+                    bytearr[count++] = (byte)(0x80 | ((c >> 0) & 0x3F));
+                }
+            }
+            @out.Write(bytearr, 0, utflen + 2);
+            return utflen + 2;
+        }
+
+
+        #region From FilterOutputStream
+
+        public void Write(byte[] b)
+        {
+            Write(b, 0, b.Length);
+        }
+
+        public void Dispose()
+        {
+            @out.Dispose();
+        }
+
+        #endregion
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Core/Support/IDataInput.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Support/IDataInput.cs b/src/Lucene.Net.Core/Support/IDataInput.cs
new file mode 100644
index 0000000..40d56cf
--- /dev/null
+++ b/src/Lucene.Net.Core/Support/IDataInput.cs
@@ -0,0 +1,24 @@
+namespace Lucene.Net.Support
+{
+    /// <summary>
+    /// Equivalent to Java's DataInput interface
+    /// </summary>
+    public interface IDataInput
+    {
+        void ReadFully(byte[] b);
+        void ReadFully(byte[] b, int off, int len);
+        int SkipBytes(int n);
+        bool ReadBoolean();
+        byte ReadByte();
+        int ReadUnsignedByte();
+        short ReadShort();
+        int ReadUnsignedShort();
+        char ReadChar();
+        int ReadInt();
+        long ReadLong();
+        float ReadFloat();
+        double ReadDouble();
+        string ReadLine();
+        string ReadUTF();
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Core/Support/IDataOutput.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Support/IDataOutput.cs b/src/Lucene.Net.Core/Support/IDataOutput.cs
new file mode 100644
index 0000000..6f81351
--- /dev/null
+++ b/src/Lucene.Net.Core/Support/IDataOutput.cs
@@ -0,0 +1,23 @@
+namespace Lucene.Net.Support
+{
+    /// <summary>
+    /// Equivalent to Java's DataOutut interface
+    /// </summary>
+    public interface IDataOutput
+    {
+        void Write(int b);
+        void Write(byte[] b);
+        void Write(byte[] b, int off, int len);
+        void WriteBoolean(bool v);
+        void WriteByte(int v);
+        void WriteShort(int v);
+        void WriteChar(int v);
+        void WriteInt(int v);
+        void WriteLong(long v);
+        void WriteFloat(float v);
+        void WriteDouble(double v);
+        void WriteBytes(string s);
+        void WriteChars(string s);
+        void WriteUTF(string s);
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Tests.Analysis.Stempel/Egothor.Stemmer/TestCompile.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Stempel/Egothor.Stemmer/TestCompile.cs b/src/Lucene.Net.Tests.Analysis.Stempel/Egothor.Stemmer/TestCompile.cs
new file mode 100644
index 0000000..1c0efc3
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Stempel/Egothor.Stemmer/TestCompile.cs
@@ -0,0 +1,211 @@
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System;
+using System.IO;
+using System.Text;
+
+/*
+ Egothor Software License version 1.00
+ Copyright (C) 1997-2004 Leo Galambos.
+ Copyright (C) 2002-2004 "Egothor developers"
+ on behalf of the Egothor Project.
+ All rights reserved.
+
+ This  software  is  copyrighted  by  the "Egothor developers". If this
+ license applies to a single file or document, the "Egothor developers"
+ are the people or entities mentioned as copyright holders in that file
+ or  document.  If  this  license  applies  to the Egothor project as a
+ whole,  the  copyright holders are the people or entities mentioned in
+ the  file CREDITS. This file can be found in the same location as this
+ license in the distribution.
+
+ Redistribution  and  use  in  source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+ 1. Redistributions  of  source  code  must retain the above copyright
+ notice, the list of contributors, this list of conditions, and the
+ following disclaimer.
+ 2. Redistributions  in binary form must reproduce the above copyright
+ notice, the list of contributors, this list of conditions, and the
+ disclaimer  that  follows  these  conditions  in the documentation
+ and/or other materials provided with the distribution.
+ 3. The name "Egothor" must not be used to endorse or promote products
+ derived  from  this software without prior written permission. For
+ written permission, please contact Leo.G@seznam.cz
+ 4. Products  derived  from this software may not be called "Egothor",
+ nor  may  "Egothor"  appear  in  their name, without prior written
+ permission from Leo.G@seznam.cz.
+
+ In addition, we request that you include in the end-user documentation
+ provided  with  the  redistribution  and/or  in the software itself an
+ acknowledgement equivalent to the following:
+ "This product includes software developed by the Egothor Project.
+ http://egothor.sf.net/"
+
+ THIS  SOFTWARE  IS  PROVIDED  ``AS  IS''  AND ANY EXPRESSED OR IMPLIED
+ WARRANTIES,  INCLUDING,  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ MERCHANTABILITY  AND  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ IN  NO  EVENT  SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+ FOR   ANY   DIRECT,   INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR
+ CONSEQUENTIAL  DAMAGES  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE  GOODS  OR  SERVICES;  LOSS  OF  USE,  DATA, OR PROFITS; OR
+ BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ WHETHER  IN  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ This  software  consists  of  voluntary  contributions  made  by  many
+ individuals  on  behalf  of  the  Egothor  Project  and was originally
+ created by Leo Galambos (Leo.G@seznam.cz).
+ */
+
+namespace Egothor.Stemmer
+{
+    public class TestCompile_ : LuceneTestCase
+    {
+        private const string RULES_FILE = "Lucene.Net.Tests.Analysis.Stempel.Egothor.Stemmer.testRules.txt";
+
+        [Test]
+        public void TestCompile()
+        {
+            DirectoryInfo dir = CreateTempDir("testCompile");
+            dir.Create();
+            FileInfo output;
+            using (Stream input = GetType().Assembly.GetManifestResourceStream(RULES_FILE))
+            {
+                output = new FileInfo(Path.Combine(dir.FullName, "testRules.txt"));
+                Copy(input, output);
+            }
+            string path = output.FullName;
+            Compile.Main(new string[] {"test", path });
+            string compiled = path + ".out";
+            Trie trie = LoadTrie(compiled);
+            AssertTrie(trie, path, true, true);
+            AssertTrie(trie, path, false, true);
+            new FileInfo(compiled).Delete();
+        }
+
+        [Test]
+        public void TestCompileBackwards()
+        {
+            DirectoryInfo dir = CreateTempDir("testCompile");
+            dir.Create();
+            FileInfo output;
+            using (Stream input = GetType().Assembly.GetManifestResourceStream(RULES_FILE))
+            {
+                output = new FileInfo(Path.Combine(dir.FullName, "testRules.txt"));
+                Copy(input, output);
+            }
+            string path = output.FullName;
+            Compile.Main(new string[] { "-test", path });
+            string compiled = path + ".out";
+            Trie trie = LoadTrie(compiled);
+            AssertTrie(trie, path, true, true);
+            AssertTrie(trie, path, false, true);
+            new FileInfo(compiled).Delete();
+        }
+
+        [Test]
+        public void TestCompileMulti()
+        {
+            DirectoryInfo dir = CreateTempDir("testCompile");
+            dir.Create();
+            FileInfo output;
+            using (Stream input = GetType().Assembly.GetManifestResourceStream(RULES_FILE))
+            {
+                output = new FileInfo(Path.Combine(dir.FullName, "testRules.txt"));
+                Copy(input, output);
+            }
+            string path = output.FullName;
+            Compile.Main(new string[] { "Mtest", path });
+            string compiled = path + ".out";
+            Trie trie = LoadTrie(compiled);
+            AssertTrie(trie, path, true, true);
+            AssertTrie(trie, path, false, true);
+            new FileInfo(compiled).Delete();
+        }
+
+        internal static Trie LoadTrie(string path)
+        {
+            Trie trie;
+            using (DataInputStream @is = new DataInputStream(
+                new FileStream(path, FileMode.Open, FileAccess.Read)))
+            {
+                string method = @is.ReadUTF().ToUpperInvariant();
+                if (method.IndexOf('M') < 0)
+                {
+                    trie = new Trie(@is);
+                }
+                else
+                {
+                    trie = new MultiTrie(@is);
+                }
+            }
+            return trie;
+        }
+
+        private static void AssertTrie(Trie trie, string file, bool usefull,
+            bool storeorig)
+        {
+            using (TextReader @in =
+                new StreamReader(new FileStream(file, FileMode.Open), Encoding.UTF8))
+            {
+
+                for (string line = @in.ReadLine(); line != null; line = @in.ReadLine())
+                {
+                    try
+                    {
+                        line = line.ToLowerInvariant();
+                        StringTokenizer st = new StringTokenizer(line);
+                        string stem = st.NextToken();
+                        if (storeorig)
+                        {
+                            string cmd = (usefull) ? trie.GetFully(stem) : trie
+                                .GetLastOnPath(stem);
+                            StringBuilder stm = new StringBuilder(stem);
+                            Diff.Apply(stm, cmd);
+                            assertEquals(stem.ToLowerInvariant(), stm.ToString().ToLowerInvariant());
+                        }
+                        while (st.HasMoreTokens())
+                        {
+                            string token = st.NextToken();
+                            if (token.Equals(stem))
+                            {
+                                continue;
+                            }
+                            string cmd = (usefull) ? trie.GetFully(token) : trie
+                                .GetLastOnPath(token);
+                            StringBuilder stm = new StringBuilder(token);
+                            Diff.Apply(stm, cmd);
+                            assertEquals(stem.ToLowerInvariant(), stm.ToString().ToLowerInvariant());
+                        }
+                    }
+                    catch (InvalidOperationException /*x*/)
+                    {
+                        // no base token (stem) on a line
+                    }
+                }
+
+            }
+        }
+
+        private static void Copy(Stream input, FileInfo output)
+        {
+            FileStream os = new FileStream(output.FullName, FileMode.OpenOrCreate, FileAccess.Write);
+            try
+            {
+                byte[] buffer = new byte[1024];
+                int len;
+                while ((len = input.Read(buffer, 0, buffer.Length)) > 0)
+                {
+                    os.Write(buffer, 0, len);
+                }
+            }
+            finally
+            {
+                os.Dispose();
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Tests.Analysis.Stempel/Egothor.Stemmer/TestStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Stempel/Egothor.Stemmer/TestStemmer.cs b/src/Lucene.Net.Tests.Analysis.Stempel/Egothor.Stemmer/TestStemmer.cs
new file mode 100644
index 0000000..c5bf1e9
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Stempel/Egothor.Stemmer/TestStemmer.cs
@@ -0,0 +1,191 @@
+using Lucene.Net.Util;
+using NUnit.Framework;
+
+/*
+ Egothor Software License version 1.00
+ Copyright (C) 1997-2004 Leo Galambos.
+ Copyright (C) 2002-2004 "Egothor developers"
+ on behalf of the Egothor Project.
+ All rights reserved.
+
+ This  software  is  copyrighted  by  the "Egothor developers". If this
+ license applies to a single file or document, the "Egothor developers"
+ are the people or entities mentioned as copyright holders in that file
+ or  document.  If  this  license  applies  to the Egothor project as a
+ whole,  the  copyright holders are the people or entities mentioned in
+ the  file CREDITS. This file can be found in the same location as this
+ license in the distribution.
+
+ Redistribution  and  use  in  source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+ 1. Redistributions  of  source  code  must retain the above copyright
+ notice, the list of contributors, this list of conditions, and the
+ following disclaimer.
+ 2. Redistributions  in binary form must reproduce the above copyright
+ notice, the list of contributors, this list of conditions, and the
+ disclaimer  that  follows  these  conditions  in the documentation
+ and/or other materials provided with the distribution.
+ 3. The name "Egothor" must not be used to endorse or promote products
+ derived  from  this software without prior written permission. For
+ written permission, please contact Leo.G@seznam.cz
+ 4. Products  derived  from this software may not be called "Egothor",
+ nor  may  "Egothor"  appear  in  their name, without prior written
+ permission from Leo.G@seznam.cz.
+
+ In addition, we request that you include in the end-user documentation
+ provided  with  the  redistribution  and/or  in the software itself an
+ acknowledgement equivalent to the following:
+ "This product includes software developed by the Egothor Project.
+ http://egothor.sf.net/"
+
+ THIS  SOFTWARE  IS  PROVIDED  ``AS  IS''  AND ANY EXPRESSED OR IMPLIED
+ WARRANTIES,  INCLUDING,  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ MERCHANTABILITY  AND  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ IN  NO  EVENT  SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+ FOR   ANY   DIRECT,   INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR
+ CONSEQUENTIAL  DAMAGES  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE  GOODS  OR  SERVICES;  LOSS  OF  USE,  DATA, OR PROFITS; OR
+ BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ WHETHER  IN  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ This  software  consists  of  voluntary  contributions  made  by  many
+ individuals  on  behalf  of  the  Egothor  Project  and was originally
+ created by Leo Galambos (Leo.G@seznam.cz).
+ */
+
+namespace Egothor.Stemmer
+{
+    public class TestStemmer : LuceneTestCase
+    {
+        [Test]
+        public void TestTrie()
+        {
+            Trie t = new Trie(true);
+
+            string[] keys = { "a", "ba", "bb", "c" };
+            string[] vals = { "1", "2", "2", "4" };
+
+            for (int i = 0; i < keys.Length; i++)
+            {
+                t.Add(keys[i], vals[i]);
+            }
+
+            assertEquals(0, t.root);
+            assertEquals(2, t.rows.Count);
+            assertEquals(3, t.cmds.Count);
+            AssertTrieContents(t, keys, vals);
+        }
+
+        [Test]
+        public void TestTrieBackwards()
+        {
+            Trie t = new Trie(false);
+
+            string[] keys = { "a", "ba", "bb", "c" };
+            string[] vals = { "1", "2", "2", "4" };
+
+            for (int i = 0; i < keys.Length; i++)
+            {
+                t.Add(keys[i], vals[i]);
+            }
+
+            AssertTrieContents(t, keys, vals);
+        }
+
+        [Test]
+        public void TestMultiTrie()
+        {
+            Trie t = new MultiTrie(true);
+
+            string[] keys = { "a", "ba", "bb", "c" };
+            string[] vals = { "1", "2", "2", "4" };
+
+            for (int i = 0; i < keys.Length; i++)
+            {
+                t.Add(keys[i], vals[i]);
+            }
+
+            AssertTrieContents(t, keys, vals);
+        }
+
+        [Test]
+        public void TestMultiTrieBackwards()
+        {
+            Trie t = new MultiTrie(false);
+
+            string[] keys = { "a", "ba", "bb", "c" };
+            string[] vals = { "1", "2", "2", "4" };
+
+            for (int i = 0; i < keys.Length; i++)
+            {
+                t.Add(keys[i], vals[i]);
+            }
+
+            AssertTrieContents(t, keys, vals);
+        }
+
+        [Test]
+        public void TestMultiTrie2()
+        {
+            Trie t = new MultiTrie2(true);
+
+            string[] keys = { "a", "ba", "bb", "c" };
+            /* 
+             * short vals won't work, see line 155 for example
+             * the IOOBE is caught (wierd), but shouldnt affect patch cmds?
+             */
+            string[] vals = { "1111", "2222", "2223", "4444" };
+
+            for (int i = 0; i < keys.Length; i++)
+            {
+                t.Add(keys[i], vals[i]);
+            }
+
+            AssertTrieContents(t, keys, vals);
+        }
+
+        [Test]
+        public void TestMultiTrie2Backwards()
+        {
+            Trie t = new MultiTrie2(false);
+
+            string[] keys = { "a", "ba", "bb", "c" };
+            /* 
+             * short vals won't work, see line 155 for example
+             * the IOOBE is caught (wierd), but shouldnt affect patch cmds?
+             */
+            string[] vals = { "1111", "2222", "2223", "4444" };
+
+            for (int i = 0; i < keys.Length; i++)
+            {
+                t.Add(keys[i], vals[i]);
+            }
+
+            AssertTrieContents(t, keys, vals);
+        }
+
+        private static void AssertTrieContents(Trie trie, string[] keys, string[] vals)
+        {
+            Trie[] tries = new Trie[] {
+                trie,
+                trie.Reduce(new Optimizer()),
+                trie.Reduce(new Optimizer2()),
+                trie.Reduce(new Gener()),
+                trie.Reduce(new Lift(true)),
+                trie.Reduce(new Lift(false))
+            };
+
+            foreach (Trie t in tries)
+            {
+                for (int i = 0; i < keys.Length; i++)
+                {
+                    assertEquals(vals[i], t.GetFully(keys[i]).ToString());
+                    assertEquals(vals[i], t.GetLastOnPath(keys[i]).ToString());
+                }
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Tests.Analysis.Stempel/Egothor.Stemmer/testRules.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Stempel/Egothor.Stemmer/testRules.txt b/src/Lucene.Net.Tests.Analysis.Stempel/Egothor.Stemmer/testRules.txt
new file mode 100644
index 0000000..ead2823
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Stempel/Egothor.Stemmer/testRules.txt
@@ -0,0 +1,4 @@
+act acted acting actor
+walk walked walking
+wander wandered wanderer
+want wanted wanting

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Tests.Analysis.Stempel/Lucene.Net.Tests.Analysis.Stempel.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Stempel/Lucene.Net.Tests.Analysis.Stempel.csproj
b/src/Lucene.Net.Tests.Analysis.Stempel/Lucene.Net.Tests.Analysis.Stempel.csproj
new file mode 100644
index 0000000..8be32c0
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Stempel/Lucene.Net.Tests.Analysis.Stempel.csproj
@@ -0,0 +1,89 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props"
Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')"
/>
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <ProjectGuid>{940A6AB1-F00A-40E2-BC1A-2898EFA8C48F}</ProjectGuid>
+    <OutputType>Library</OutputType>
+    <AppDesignerFolder>Properties</AppDesignerFolder>
+    <RootNamespace>Lucene.Net.Tests.Analysis.Stempel</RootNamespace>
+    <AssemblyName>Lucene.Net.Tests.Analysis.Stempel</AssemblyName>
+    <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
+    <FileAlignment>512</FileAlignment>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+    <DebugSymbols>true</DebugSymbols>
+    <DebugType>full</DebugType>
+    <Optimize>false</Optimize>
+    <OutputPath>bin\Debug\</OutputPath>
+    <DefineConstants>DEBUG;TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+    <DebugType>pdbonly</DebugType>
+    <Optimize>true</Optimize>
+    <OutputPath>bin\Release\</OutputPath>
+    <DefineConstants>TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+  </PropertyGroup>
+  <ItemGroup>
+    <Reference Include="nunit.framework, Version=2.6.3.13283, Culture=neutral, PublicKeyToken=96d09a1eb7f44a77,
processorArchitecture=MSIL">
+      <HintPath>..\..\packages\NUnit.2.6.3\lib\nunit.framework.dll</HintPath>
+      <Private>True</Private>
+    </Reference>
+    <Reference Include="System" />
+    <Reference Include="System.Core" />
+    <Reference Include="System.Xml.Linq" />
+    <Reference Include="System.Data.DataSetExtensions" />
+    <Reference Include="Microsoft.CSharp" />
+    <Reference Include="System.Data" />
+    <Reference Include="System.Net.Http" />
+    <Reference Include="System.Xml" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="Egothor.Stemmer\TestCompile.cs" />
+    <Compile Include="Egothor.Stemmer\TestStemmer.cs" />
+    <Compile Include="Pl\TestPolishAnalyzer.cs" />
+    <Compile Include="Properties\AssemblyInfo.cs" />
+    <Compile Include="Stempel\TestStempelPolishStemFilterFactory.cs" />
+  </ItemGroup>
+  <ItemGroup>
+    <None Include="packages.config" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\Lucene.Net.Analysis.Common\Lucene.Net.Analysis.Common.csproj">
+      <Project>{4add0bbc-b900-4715-9526-d871de8eea64}</Project>
+      <Name>Lucene.Net.Analysis.Common</Name>
+    </ProjectReference>
+    <ProjectReference Include="..\Lucene.Net.Analysis.Stempel\Lucene.Net.Analysis.Stempel.csproj">
+      <Project>{a76dad88-e3a5-40f9-9114-facd77bd8265}</Project>
+      <Name>Lucene.Net.Analysis.Stempel</Name>
+    </ProjectReference>
+    <ProjectReference Include="..\Lucene.Net.Core\Lucene.Net.csproj">
+      <Project>{5d4ad9be-1ffb-41ab-9943-25737971bf57}</Project>
+      <Name>Lucene.Net</Name>
+    </ProjectReference>
+    <ProjectReference Include="..\Lucene.Net.TestFramework\Lucene.Net.TestFramework.csproj">
+      <Project>{b2c0d749-ce34-4f62-a15e-00cb2ff5ddb3}</Project>
+      <Name>Lucene.Net.TestFramework</Name>
+    </ProjectReference>
+  </ItemGroup>
+  <ItemGroup>
+    <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+  </ItemGroup>
+  <ItemGroup>
+    <EmbeddedResource Include="Egothor.Stemmer\testRules.txt" />
+  </ItemGroup>
+  <ItemGroup />
+  <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+  <!-- To modify your build process, add your task inside one of the targets below and
uncomment it. 
+       Other similar extension points exist, see Microsoft.Common.targets.
+  <Target Name="BeforeBuild">
+  </Target>
+  <Target Name="AfterBuild">
+  </Target>
+  -->
+</Project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Tests.Analysis.Stempel/Pl/TestPolishAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Stempel/Pl/TestPolishAnalyzer.cs b/src/Lucene.Net.Tests.Analysis.Stempel/Pl/TestPolishAnalyzer.cs
new file mode 100644
index 0000000..0bd90bf
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Stempel/Pl/TestPolishAnalyzer.cs
@@ -0,0 +1,102 @@
+using Lucene.Net.Analysis.Util;
+using NUnit.Framework;
+using System;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Pl
+{
+    /*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+    public class TestPolishAnalyzer : BaseTokenStreamTestCase
+    {
+        /// <summary>
+        /// This test fails with NPE when the 
+        /// stopwords file is missing in classpath
+        /// </summary>
+        [Test]
+        public void TestResourcesAvailable()
+        {
+            new PolishAnalyzer(TEST_VERSION_CURRENT);
+        }
+
+        /// <summary>
+        /// test stopwords and stemming
+        /// </summary>
+        [Test]
+        public void TestBasics()
+        {
+            Analyzer a = new PolishAnalyzer(TEST_VERSION_CURRENT);
+            // stemming
+            CheckOneTerm(a, "studenta", "student");
+            CheckOneTerm(a, "studenci", "student");
+            // stopword
+            AssertAnalyzesTo(a, "był", new String[] { });
+        }
+
+        /// <summary>
+        /// test use of exclusion set
+        /// </summary>
+        [Test]
+        public void TestExclude()
+        {
+            CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, AsSet("studenta"),
false); ;
+            Analyzer a = new PolishAnalyzer(TEST_VERSION_CURRENT,
+                PolishAnalyzer.GetDefaultStopSet(), exclusionSet);
+            CheckOneTerm(a, "studenta", "studenta");
+            CheckOneTerm(a, "studenci", "student");
+        }
+
+        /// <summary>
+        /// blast some random strings through the analyzer
+        /// </summary>
+        [Test]
+        public void TestRandomStrings()
+        {
+            CheckRandomData(Random(), new PolishAnalyzer(TEST_VERSION_CURRENT), 1000 * RANDOM_MULTIPLIER);
+        }
+
+        /// <summary>
+        /// LUCENENET specific. The original Java implementation relied on String.subSequence(int,
int) to throw an IndexOutOfBoundsException 
+        /// (in .NET, it would be string.SubString(int, int) and an ArgumentOutOfRangeException).

+        /// However, the logic was corrected for .NET to test when the argument is negative
and not 
+        /// throw an exception, since exceptions are expensive and not meant for "normal"
+        /// behavior in .NET. This test case was made trying to figure out that issue (since
initially an IndexOutOfRangeException,
+        /// rather than ArgumentOutOfRangeException, was in the catch block which made the
TestRandomStrings test fail). 
+        /// It will trigger the behavior that cause the second substring argument to be negative

+        /// (although that behavior no longer throws an exception).
+        /// </summary>
+        [Test]
+        public void TestOutOfRange()
+        {
+            var a = new PolishAnalyzer(TEST_VERSION_CURRENT);
+            var text = "zyaolz 96619727 p";
+            var reader = new StringReader(text);
+            int remainder = 2;
+            using (var ts = a.TokenStream("dummy", (TextReader)new MockCharFilter(reader,
remainder)))
+            {
+                ts.Reset();
+
+                while (ts.IncrementToken())
+                {
+                }
+
+                ts.End();
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Tests.Analysis.Stempel/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Stempel/Properties/AssemblyInfo.cs b/src/Lucene.Net.Tests.Analysis.Stempel/Properties/AssemblyInfo.cs
new file mode 100644
index 0000000..5332d92
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Stempel/Properties/AssemblyInfo.cs
@@ -0,0 +1,36 @@
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following 
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("Lucene.Net.Tests.Analysis.Stempel")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("")]
+[assembly: AssemblyProduct("Lucene.Net.Tests.Analysis.Stempel")]
+[assembly: AssemblyCopyright("Copyright ©  2016")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible 
+// to COM components.  If you need to access a type in this assembly from 
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("940a6ab1-f00a-40e2-bc1a-2898efa8c48f")]
+
+// Version information for an assembly consists of the following four values:
+//
+//      Major Version
+//      Minor Version 
+//      Build Number
+//      Revision
+//
+// You can specify all the values or you can default the Build and Revision Numbers 
+// by using the '*' as shown below:
+// [assembly: AssemblyVersion("1.0.*")]
+[assembly: AssemblyVersion("1.0.0.0")]
+[assembly: AssemblyFileVersion("1.0.0.0")]

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Tests.Analysis.Stempel/Stempel/TestStempelPolishStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Stempel/Stempel/TestStempelPolishStemFilterFactory.cs
b/src/Lucene.Net.Tests.Analysis.Stempel/Stempel/TestStempelPolishStemFilterFactory.cs
new file mode 100644
index 0000000..b6be4af
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Stempel/Stempel/TestStempelPolishStemFilterFactory.cs
@@ -0,0 +1,56 @@
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Stempel
+{
+    /*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+    /// <summary>
+    /// Tests for <see cref="StempelPolishStemFilterFactory"/>
+    /// </summary>
+    public class TestStempelPolishStemFilterFactory : BaseTokenStreamTestCase
+    {
+        [Test]
+        public void TestBasics()
+        {
+            TextReader reader = new StringReader("studenta studenci");
+            StempelPolishStemFilterFactory factory = new StempelPolishStemFilterFactory(new
Dictionary<string, string>());
+            TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+            stream = factory.Create(stream);
+            AssertTokenStreamContents(stream,
+                new string[] { "student", "student" });
+        }
+
+        /** Test that bogus arguments result in exception */
+        [Test]
+        public void TestBogusArguments()
+        {
+            try
+            {
+                new StempelPolishStemFilterFactory(new Dictionary<string, string>()
{ { "bogusArg", "bogusValue" } });
+                fail();
+            }
+            catch (ArgumentException expected)
+            {
+                assertTrue(expected.Message.Contains("Unknown parameters"));
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Tests.Analysis.Stempel/packages.config
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Stempel/packages.config b/src/Lucene.Net.Tests.Analysis.Stempel/packages.config
new file mode 100644
index 0000000..139d513
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Stempel/packages.config
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="utf-8"?>
+<packages>
+  <package id="NUnit" version="2.6.3" targetFramework="net451" />
+</packages>
\ No newline at end of file


Mime
View raw message