lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From d...@apache.org
Subject svn commit: r949519 [2/2] - in /lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net: SpellChecker.Net/ SpellChecker.Net/Spell/ Test/ Test/Test/
Date Sun, 30 May 2010 14:20:28 GMT
Added: lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/Test/TestLuceneDictionary.cs
URL: http://svn.apache.org/viewvc/lucene/lucene.net/trunk/C%23/contrib/SpellChecker.Net/Test/Test/TestLuceneDictionary.cs?rev=949519&view=auto
==============================================================================
--- lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/Test/TestLuceneDictionary.cs
(added)
+++ lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/Test/TestLuceneDictionary.cs
Sun May 30 14:20:28 2010
@@ -0,0 +1,272 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Text;
+
+using NUnit.Framework;
+
+using Lucene.Net.Store;
+using Lucene.Net.Index;
+using Lucene.Net.Analysis;
+using Lucene.Net.Documents;
+using SpellChecker.Net.Search.Spell;
+
+namespace SpellChecker.Net.Test.Search.Spell
+{
+    [TestFixture]
+    public class TestLuceneDictionary
+    {
+
+        private Directory store = new RAMDirectory();
+
+        private IndexReader indexReader = null;
+
+        private LuceneDictionary ld;
+        private IEnumerator it;
+
+        [SetUp]
+        public void SetUp()
+        {
+
+            IndexWriter writer = new IndexWriter(store, new WhitespaceAnalyzer(), true);
+
+            Document doc;
+
+            doc = new Document();
+            doc.Add(new Field("aaa", "foo", Field.Store.YES, Field.Index.ANALYZED));
+            writer.AddDocument(doc);
+
+            doc = new Document();
+            doc.Add(new Field("aaa", "foo", Field.Store.YES, Field.Index.ANALYZED));
+            writer.AddDocument(doc);
+
+            doc = new Document();
+            doc.Add(new Field("contents", "Tom", Field.Store.YES, Field.Index.ANALYZED));
+            writer.AddDocument(doc);
+
+            doc = new Document();
+            doc.Add(new Field("contents", "Jerry", Field.Store.YES, Field.Index.ANALYZED));
+            writer.AddDocument(doc);
+
+            doc = new Document();
+            doc.Add(new Field("zzz", "bar", Field.Store.YES, Field.Index.ANALYZED));
+            writer.AddDocument(doc);
+
+            writer.Optimize();
+            writer.Close();
+        }
+
+        [Test]
+        public void TestFieldNonExistent()
+        {
+            try
+            {
+                indexReader = IndexReader.Open(store);
+
+                ld = new LuceneDictionary(indexReader, "nonexistent_field");
+                it = ld.GetWordsIterator();
+
+                AssertFalse("More elements than expected", it.HasNext());
+                AssertTrue("Nonexistent element is really null", it.Next() == null);
+            }
+            finally
+            {
+                if (indexReader != null) { indexReader.Close(); }
+            }
+        }
+
+        [Test]
+        public void TestFieldAaa()
+        {
+            try
+            {
+                indexReader = IndexReader.Open(store);
+
+                ld = new LuceneDictionary(indexReader, "aaa");
+                it = ld.GetWordsIterator();
+
+                AssertTrue("First element doesn't exist.", it.HasNext());
+                AssertTrue("First element isn't correct", it.Next().Equals("foo"));
+                AssertFalse("More elements than expected", it.HasNext());
+                AssertTrue("Nonexistent element is really null", it.Next() == null);
+            }
+            finally
+            {
+                if (indexReader != null) { indexReader.Close(); }
+            }
+        }
+
+        [Test]
+        public void TestFieldContents_1()
+        {
+            try
+            {
+                indexReader = IndexReader.Open(store);
+
+                ld = new LuceneDictionary(indexReader, "contents");
+                it = ld.GetWordsIterator();
+
+                AssertTrue("First element doesn't exist.", it.HasNext());
+                AssertTrue("First element isn't correct", it.Next().Equals("Jerry"));
+                AssertTrue("Second element doesn't exist.", it.HasNext());
+                AssertTrue("Second element isn't correct", it.Next().Equals("Tom"));
+                AssertFalse("More elements than expected", it.HasNext());
+                AssertTrue("Nonexistent element is really null", it.Next() == null);
+
+                ld = new LuceneDictionary(indexReader, "contents");
+                it = ld.GetWordsIterator();
+
+                int counter = 2;
+                while (it.HasNext())
+                {
+                    it.Next();
+                    counter--;
+                }
+
+                AssertTrue("Number of words incorrect", counter == 0);
+            }
+            finally
+            {
+                if (indexReader != null) { indexReader.Close(); }
+            }
+        }
+
+        [Test]
+        public void TestFieldContents_2()
+        {
+            try
+            {
+                indexReader = IndexReader.Open(store);
+
+                ld = new LuceneDictionary(indexReader, "contents");
+                it = ld.GetWordsIterator();
+                
+                // hasNext() should have no side effects //{{DIGY}} But has. Need a fix?
+                //AssertTrue("First element isn't were it should be.", it.HasNext());
+                //AssertTrue("First element isn't were it should be.", it.HasNext());
+                //AssertTrue("First element isn't were it should be.", it.HasNext());
+
+                // just iterate through words
+                AssertTrue("First element isn't correct", it.Next().Equals("Jerry"));
+                AssertTrue("Second element isn't correct", it.Next().Equals("Tom"));
+                AssertTrue("Nonexistent element is really null", it.Next() == null);
+
+                // hasNext() should still have no side effects ...
+                AssertFalse("There should be any more elements", it.HasNext());
+                AssertFalse("There should be any more elements", it.HasNext());
+                AssertFalse("There should be any more elements", it.HasNext());
+
+                // .. and there are really no more words
+                AssertTrue("Nonexistent element is really null", it.Next() == null);
+                AssertTrue("Nonexistent element is really null", it.Next() == null);
+                AssertTrue("Nonexistent element is really null", it.Next() == null);
+            }
+            finally
+            {
+                if (indexReader != null) { indexReader.Close(); }
+            }
+        }
+
+        [Test]
+        public void TestFieldZzz()
+        {
+            try
+            {
+                indexReader = IndexReader.Open(store);
+
+                ld = new LuceneDictionary(indexReader, "zzz");
+                it = ld.GetWordsIterator();
+
+                AssertTrue("First element doesn't exist.", it.HasNext());
+                AssertTrue("First element isn't correct", it.Next().Equals("bar"));
+                AssertFalse("More elements than expected", it.HasNext());
+                AssertTrue("Nonexistent element is really null", it.Next() == null);
+            }
+            finally
+            {
+                if (indexReader != null) { indexReader.Close(); }
+            }
+        }
+
+        [Test]
+        public void TestSpellchecker()
+        {
+            SpellChecker.Net.Search.Spell.SpellChecker sc = new SpellChecker.Net.Search.Spell.SpellChecker(new
RAMDirectory());
+            indexReader = IndexReader.Open(store);
+            sc.IndexDictionary(new LuceneDictionary(indexReader, "contents"));
+            String[] suggestions = sc.SuggestSimilar("Tam", 1);
+            AssertEquals(1, suggestions.Length);
+            AssertEquals("Tom", suggestions[0]);
+            suggestions = sc.SuggestSimilar("Jarry", 1);
+            AssertEquals(1, suggestions.Length);
+            AssertEquals("Jerry", suggestions[0]);
+            indexReader.Close();
+        }
+        
+        #region .NET 
+        void AssertTrue(string s, bool b)
+        {
+            Assert.IsTrue(b, s);
+        }
+
+        void AssertFalse(string s, bool b)
+        {
+            Assert.IsFalse(b, s);
+        }
+
+        void AssertEquals(int i, int j)
+        {
+            Assert.AreEqual(i, j);
+        }
+
+        void AssertEquals(string i, string j)
+        {
+            Assert.AreEqual(i, j);
+        }
+        #endregion
+    }
+}
+
+#region .NET
+namespace SpellChecker.Net.Test.Search.Spell
+{
+    public static class Extensions
+    {
+        public static bool HasNext(this IEnumerator a)
+        {
+            return a.MoveNext();
+        }
+
+        public static object Next(this IEnumerator a)
+        {
+            return a.Current;
+        }
+    }
+}
+
+namespace System.Runtime.CompilerServices
+{
+    [AttributeUsage(AttributeTargets.Method)]
+    public sealed class ExtensionAttribute : Attribute
+    {
+        public ExtensionAttribute() { }
+    }
+}
+#endregion

Added: lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/Test/TestNGramDistance.cs
URL: http://svn.apache.org/viewvc/lucene/lucene.net/trunk/C%23/contrib/SpellChecker.Net/Test/Test/TestNGramDistance.cs?rev=949519&view=auto
==============================================================================
--- lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/Test/TestNGramDistance.cs (added)
+++ lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/Test/TestNGramDistance.cs Sun
May 30 14:20:28 2010
@@ -0,0 +1,148 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+using NUnit.Framework;
+
+using SpellChecker.Net.Search.Spell;
+
+namespace SpellChecker.Net.Test.Search.Spell
+{
+    [TestFixture]
+    public class TestNGramDistance
+    {
+        [Test]
+        public void TestGetDistance1()
+        {
+            StringDistance nsd = new NGramDistance(1);
+            float d = nsd.GetDistance("al", "al");
+            Assert.AreEqual(d, 1.0f, 0.001);
+            d = nsd.GetDistance("a", "a");
+            Assert.AreEqual(d, 1.0f, 0.001);
+            d = nsd.GetDistance("b", "a");
+            Assert.AreEqual(d, 0.0f, 0.001);
+            d = nsd.GetDistance("martha", "marhta");
+            Assert.AreEqual(d, 0.6666, 0.001);
+            d = nsd.GetDistance("jones", "johnson");
+            Assert.AreEqual(d, 0.4285, 0.001);
+            d = nsd.GetDistance("natural", "contrary");
+            Assert.AreEqual(d, 0.25, 0.001);
+            d = nsd.GetDistance("abcvwxyz", "cabvwxyz");
+            Assert.AreEqual(d, 0.75, 0.001);
+            d = nsd.GetDistance("dwayne", "duane");
+            Assert.AreEqual(d, 0.666, 0.001);
+            d = nsd.GetDistance("dixon", "dicksonx");
+            Assert.AreEqual(d, 0.5, 0.001);
+            d = nsd.GetDistance("six", "ten");
+            Assert.AreEqual(d, 0, 0.001);
+            float d1 = nsd.GetDistance("zac ephron", "zac efron");
+            float d2 = nsd.GetDistance("zac ephron", "kai ephron");
+            Assert.AreEqual(d1, d2, 0.001);
+            d1 = nsd.GetDistance("brittney spears", "britney spears");
+            d2 = nsd.GetDistance("brittney spears", "brittney startzman");
+            Assert.IsTrue(d1 > d2);
+            d1 = nsd.GetDistance("12345678", "12890678");
+            d2 = nsd.GetDistance("12345678", "72385698");
+            Assert.AreEqual(d1, d2, 001);
+        }
+
+        [Test]
+        public void TestGetDistance2()
+        {
+            StringDistance sd = new NGramDistance(2);
+            float d = sd.GetDistance("al", "al");
+            Assert.AreEqual(d, 1.0f, 0.001);
+            d = sd.GetDistance("a", "a");
+            Assert.AreEqual(d, 1.0f, 0.001);
+            d = sd.GetDistance("b", "a");
+            Assert.AreEqual(d, 0.0f, 0.001);
+            d = sd.GetDistance("a", "aa");
+            Assert.AreEqual(d, 0.5f, 0.001);
+            d = sd.GetDistance("martha", "marhta");
+            Assert.AreEqual(d, 0.6666, 0.001);
+            d = sd.GetDistance("jones", "johnson");
+            Assert.AreEqual(d, 0.4285, 0.001);
+            d = sd.GetDistance("natural", "contrary");
+            Assert.AreEqual(d, 0.25, 0.001);
+            d = sd.GetDistance("abcvwxyz", "cabvwxyz");
+            Assert.AreEqual(d, 0.625, 0.001);
+            d = sd.GetDistance("dwayne", "duane");
+            Assert.AreEqual(d, 0.5833, 0.001);
+            d = sd.GetDistance("dixon", "dicksonx");
+            Assert.AreEqual(d, 0.5, 0.001);
+            d = sd.GetDistance("six", "ten");
+            Assert.AreEqual(d, 0, 0.001);
+            float d1 = sd.GetDistance("zac ephron", "zac efron");
+            float d2 = sd.GetDistance("zac ephron", "kai ephron");
+            Assert.IsTrue(d1 > d2);
+            d1 = sd.GetDistance("brittney spears", "britney spears");
+            d2 = sd.GetDistance("brittney spears", "brittney startzman");
+            Assert.IsTrue(d1 > d2);
+            d1 = sd.GetDistance("0012345678", "0012890678");
+            d2 = sd.GetDistance("0012345678", "0072385698");
+            Assert.AreEqual(d1, d2, 0.001);
+        }
+
+        [Test]
+        public void TestGetDistance3()
+        {
+            StringDistance sd = new NGramDistance(3);
+            float d = sd.GetDistance("al", "al");
+            Assert.AreEqual(d, 1.0f, 0.001);
+            d = sd.GetDistance("a", "a");
+            Assert.AreEqual(d, 1.0f, 0.001);
+            d = sd.GetDistance("b", "a");
+            Assert.AreEqual(d, 0.0f, 0.001);
+            d = sd.GetDistance("martha", "marhta");
+            Assert.AreEqual(d, 0.7222, 0.001);
+            d = sd.GetDistance("jones", "johnson");
+            Assert.AreEqual(d, 0.4762, 0.001);
+            d = sd.GetDistance("natural", "contrary");
+            Assert.AreEqual(d, 0.2083, 0.001);
+            d = sd.GetDistance("abcvwxyz", "cabvwxyz");
+            Assert.AreEqual(d, 0.5625, 0.001);
+            d = sd.GetDistance("dwayne", "duane");
+            Assert.AreEqual(d, 0.5277, 0.001);
+            d = sd.GetDistance("dixon", "dicksonx");
+            Assert.AreEqual(d, 0.4583, 0.001);
+            d = sd.GetDistance("six", "ten");
+            Assert.AreEqual(d, 0, 0.001);
+            float d1 = sd.GetDistance("zac ephron", "zac efron");
+            float d2 = sd.GetDistance("zac ephron", "kai ephron");
+            Assert.IsTrue(d1 > d2);
+            d1 = sd.GetDistance("brittney spears", "britney spears");
+            d2 = sd.GetDistance("brittney spears", "brittney startzman");
+            Assert.IsTrue(d1 > d2);
+            d1 = sd.GetDistance("0012345678", "0012890678");
+            d2 = sd.GetDistance("0012345678", "0072385698");
+            Assert.IsTrue(d1 < d2);
+        }
+
+        public void TestEmpty()
+        {
+            StringDistance nsd = new NGramDistance(1);
+            float d = nsd.GetDistance("", "al");
+            Assert.AreEqual(d, 0.0f, 0.001);
+        }
+
+    }
+
+
+}

Added: lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/Test/TestPlainTextDictionary.cs
URL: http://svn.apache.org/viewvc/lucene/lucene.net/trunk/C%23/contrib/SpellChecker.Net/Test/Test/TestPlainTextDictionary.cs?rev=949519&view=auto
==============================================================================
--- lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/Test/TestPlainTextDictionary.cs
(added)
+++ lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/Test/TestPlainTextDictionary.cs
Sun May 30 14:20:28 2010
@@ -0,0 +1,50 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.IO;
+using System.Collections.Generic;
+using System.Text;
+
+using NUnit.Framework;
+
+using SpellChecker.Net.Search.Spell;
+
+using Lucene.Net.Store;
+
+namespace SpellChecker.Net.Test.Search.Spell
+{
+    [TestFixture]
+    public class TestPlainTextDictionary
+    {
+        [Test]
+        public void TestBuild()
+        {
+
+            String LF = System.Environment.NewLine;
+            String input = "oneword" + LF + "twoword" + LF + "threeword";
+            PlainTextDictionary ptd = new PlainTextDictionary( new MemoryStream( System.Text.Encoding.UTF8.GetBytes(input))
);
+            RAMDirectory ramDir = new RAMDirectory();
+            SpellChecker.Net.Search.Spell.SpellChecker spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(ramDir);
+            spellChecker.IndexDictionary(ptd);
+            String[] similar = spellChecker.SuggestSimilar("treeword", 2);
+            Assert.AreEqual(2, similar.Length);
+            Assert.AreEqual(similar[0], "threeword");
+            Assert.AreEqual(similar[1], "twoword");
+        }
+    }
+}

Modified: lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/Test/TestSpellChecker.cs
URL: http://svn.apache.org/viewvc/lucene/lucene.net/trunk/C%23/contrib/SpellChecker.Net/Test/Test/TestSpellChecker.cs?rev=949519&r1=949518&r2=949519&view=diff
==============================================================================
--- lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/Test/TestSpellChecker.cs (original)
+++ lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/Test/TestSpellChecker.cs Sun
May 30 14:20:28 2010
@@ -28,118 +28,469 @@ using Field = Lucene.Net.Documents.Field
 using IndexReader = Lucene.Net.Index.IndexReader;
 using Directory = Lucene.Net.Store.Directory;
 using LuceneDictionary = SpellChecker.Net.Search.Spell.LuceneDictionary;
+using System.Collections;
+using Lucene.Net.Store;
+using System.Threading;
+using SpellChecker.Net.Search.Spell;
+using Lucene.Net.Search;
 
 namespace SpellChecker.Net.Test.Search.Spell
 {
-	
-	
-	/// <summary> Test case
-	/// 
-	/// </summary>
-	/// <author>  Nicolas Maisonneuve
-	/// </author>
-	[TestFixture]
+
+
+    /// <summary> Test case
+    /// 
+    /// </summary>
+    /// <author>  Nicolas Maisonneuve
+    /// </author>
+    [TestFixture]
     public class TestSpellChecker
-	{
-		private SpellChecker.Net.Search.Spell.SpellChecker spellChecker;
-		private Directory userindex, spellindex;
-		
+    {
+        private SpellCheckerMock spellChecker;
+        private Directory userindex, spellindex;
+        public ArrayList searchers;
+        private Random random = new Random();
+
+
         [SetUp]
-        public virtual void  SetUp()
-		{
-			//create a user index
-			userindex = new RAMDirectory();
-			IndexWriter writer = new IndexWriter(userindex, new SimpleAnalyzer(), true);
-			
-			for (int i = 0; i < 1000; i++)
-			{
-				Document doc = new Document();
-				doc.Add(new Field("field1", English.IntToEnglish(i), Field.Store.YES, Field.Index.TOKENIZED));
-				doc.Add(new Field("field2", English.IntToEnglish(i + 1), Field.Store.YES, Field.Index.TOKENIZED));
// + word thousand
-				writer.AddDocument(doc);
-			}
-			writer.Close();
-			
-			// create the spellChecker
-			spellindex = new RAMDirectory();
-			spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(spellindex);
-		}
-		
-		[Test]
-		public virtual void  TestBuild()
-		{
-			try
-			{
-				IndexReader r = IndexReader.Open(userindex);
-				
-				spellChecker.ClearIndex();
-				
-				Addwords(r, "field1");
-				int num_field1 = this.Numdoc();
-				
-				Addwords(r, "field2");
-				int num_field2 = this.Numdoc();
-				
-				Assert.AreEqual(num_field2, num_field1 + 1);
-				
-				// test small word
-				System.String[] similar = spellChecker.SuggestSimilar("fvie", 2);
-				Assert.AreEqual(1, similar.Length);
-				Assert.AreEqual(similar[0], "five");
-				
-				similar = spellChecker.SuggestSimilar("five", 2);
-				Assert.AreEqual(1, similar.Length);
-				Assert.AreEqual(similar[0], "nine"); // don't suggest a word for itself
-				
-				similar = spellChecker.SuggestSimilar("fiv", 2);
-				Assert.AreEqual(1, similar.Length);
-				Assert.AreEqual(similar[0], "five");
-				
-				similar = spellChecker.SuggestSimilar("ive", 2);
-				Assert.AreEqual(1, similar.Length);
-				Assert.AreEqual(similar[0], "five");
-				
-				similar = spellChecker.SuggestSimilar("fives", 2);
-				Assert.AreEqual(1, similar.Length);
-				Assert.AreEqual(similar[0], "five");
-				
-				similar = spellChecker.SuggestSimilar("fie", 2);
-				Assert.AreEqual(1, similar.Length);
-				Assert.AreEqual(similar[0], "five");
-				
-				similar = spellChecker.SuggestSimilar("fi", 2);
-				Assert.AreEqual(0, similar.Length);
-				
-				// test restraint to a field
-				similar = spellChecker.SuggestSimilar("tousand", 10, r, "field1", false);
-				Assert.AreEqual(0, similar.Length); // there isn't the term thousand in the field field1
-				
-				similar = spellChecker.SuggestSimilar("tousand", 10, r, "field2", false);
-				Assert.AreEqual(1, similar.Length); // there is the term thousand in the field field2
-			}
-			catch (System.IO.IOException e)
-			{
+        public virtual void SetUp()
+        {
+            //create a user index
+            userindex = new RAMDirectory();
+            IndexWriter writer = new IndexWriter(userindex, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
+
+            for (int i = 0; i < 1000; i++)
+            {
+                Document doc = new Document();
+                doc.Add(new Field("field1", English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
+                doc.Add(new Field("field2", English.IntToEnglish(i + 1), Field.Store.YES,
Field.Index.ANALYZED)); // + word thousand
+                writer.AddDocument(doc);
+            }
+            writer.Close();
+
+            // create the spellChecker
+            spellindex = new RAMDirectory();
+            searchers = ArrayList.Synchronized(new ArrayList()); 
+            spellChecker = new SpellCheckerMock(spellindex, this);
+        }
+
+        [Test]
+        public virtual void TestBuild()
+        {
+            try
+            {
+                IndexReader r = IndexReader.Open(userindex, true);
+
+                spellChecker.ClearIndex();
+
+                Addwords(r, "field1");
+                int num_field1 = this.Numdoc();
+
+                Addwords(r, "field2");
+                int num_field2 = this.Numdoc();
+
+                Assert.AreEqual (num_field2, num_field1 + 1);
+                
+                AssertLastSearcherOpen(4);
+
+                CheckCommonSuggestions(r);
+                CheckLevenshteinSuggestions(r);
+
+                spellChecker.setStringDistance(new JaroWinklerDistance());
+                spellChecker.SetAccuracy(0.8f);
+                CheckCommonSuggestions(r);
+                CheckJaroWinklerSuggestions();
+
+                spellChecker.setStringDistance(new NGramDistance(2));
+                spellChecker.SetAccuracy(0.5f);
+                CheckCommonSuggestions(r);
+                CheckNGramSuggestions();
+            }
+            catch (System.IO.IOException e)
+            {
                 System.Console.Error.WriteLine(e.StackTrace);
-				Assert.Fail();
-			}
-		}
-		
-		private void  Addwords(IndexReader r, System.String field)
-		{
-			long time = (System.DateTime.Now.Ticks - 621355968000000000) / 10000;
-			spellChecker.IndexDictionary(new LuceneDictionary(r, field));
-			time = (System.DateTime.Now.Ticks - 621355968000000000) / 10000 - time;
-			//System.out.println("time to build " + field + ": " + time);
-		}
-		
-		private int Numdoc()
-		{
-			IndexReader rs = IndexReader.Open(spellindex);
-			int num = rs.NumDocs();
-			Assert.IsTrue(num != 0);
-			//System.out.println("num docs: " + num);
-			rs.Close();
-			return num;
-		}
-	}
+                Assert.Fail();
+            }
+        }
+        private void CheckCommonSuggestions(IndexReader r)
+        {
+            String[] similar = spellChecker.SuggestSimilar("fvie", 2);
+            Assert.True(similar.Length > 0);
+            Assert.AreEqual(similar[0], "five");
+
+            similar = spellChecker.SuggestSimilar("five", 2);
+            if (similar.Length > 0)
+            {
+                Assert.False(similar[0].Equals("five")); // don't suggest a word for itself
+            }
+
+            similar = spellChecker.SuggestSimilar("fiv", 2);
+            Assert.True(similar.Length > 0);
+            Assert.AreEqual(similar[0], "five");
+
+            similar = spellChecker.SuggestSimilar("fives", 2);
+            Assert.True(similar.Length > 0);
+            Assert.AreEqual(similar[0], "five");
+
+            Assert.True(similar.Length > 0);
+            similar = spellChecker.SuggestSimilar("fie", 2);
+            Assert.AreEqual(similar[0], "five");
+
+            //  test restraint to a field
+            similar = spellChecker.SuggestSimilar("tousand", 10, r, "field1", false);
+            Assert.AreEqual(0, similar.Length); // there isn't the term thousand in the field
field1
+
+            similar = spellChecker.SuggestSimilar("tousand", 10, r, "field2", false);
+            Assert.AreEqual(1, similar.Length); // there is the term thousand in the field
field2
+        }
+
+        private void CheckLevenshteinSuggestions(IndexReader r)
+        {
+            // test small word
+            String[] similar = spellChecker.SuggestSimilar("fvie", 2);
+            Assert.AreEqual(1, similar.Length);
+            Assert.AreEqual(similar[0], "five");
+
+            similar = spellChecker.SuggestSimilar("five", 2);
+            Assert.AreEqual(1, similar.Length);
+            Assert.AreEqual(similar[0], "nine");     // don't suggest a word for itself
+
+            similar = spellChecker.SuggestSimilar("fiv", 2);
+            Assert.AreEqual(1, similar.Length);
+            Assert.AreEqual(similar[0], "five");
+
+            similar = spellChecker.SuggestSimilar("ive", 2);
+            Assert.AreEqual(2, similar.Length);
+            Assert.AreEqual(similar[0], "five");
+            Assert.AreEqual(similar[1], "nine");
+
+            similar = spellChecker.SuggestSimilar("fives", 2);
+            Assert.AreEqual(1, similar.Length);
+            Assert.AreEqual(similar[0], "five");
+
+            similar = spellChecker.SuggestSimilar("fie", 2);
+            Assert.AreEqual(2, similar.Length);
+            Assert.AreEqual(similar[0], "five");
+            Assert.AreEqual(similar[1], "nine");
+
+            similar = spellChecker.SuggestSimilar("fi", 2);
+            Assert.AreEqual(1, similar.Length);
+            Assert.AreEqual(similar[0], "five");
+
+            // test restraint to a field
+            similar = spellChecker.SuggestSimilar("tousand", 10, r, "field1", false);
+            Assert.AreEqual(0, similar.Length); // there isn't the term thousand in the field
field1
+
+            similar = spellChecker.SuggestSimilar("tousand", 10, r, "field2", false);
+            Assert.AreEqual(1, similar.Length); // there is the term thousand in the field
field2
+
+            similar = spellChecker.SuggestSimilar("onety", 2);
+            Assert.AreEqual(2, similar.Length);
+            Assert.AreEqual(similar[0], "ninety");
+            Assert.AreEqual(similar[1], "one");
+            try
+            {
+                similar = spellChecker.SuggestSimilar("tousand", 10, r, null, false);
+            }
+            catch (NullReferenceException e)
+            {
+                Assert.True(false, "threw an NPE, and it shouldn't have");
+            }
+        }
+
+        private void CheckJaroWinklerSuggestions()
+        {
+            String[] similar = spellChecker.SuggestSimilar("onety", 2);
+            Assert.AreEqual(2, similar.Length);
+            Assert.AreEqual(similar[0], "one");
+            Assert.AreEqual(similar[1], "ninety");
+        }
+
+        private void CheckNGramSuggestions()
+        {
+            String[] similar = spellChecker.SuggestSimilar("onety", 2);
+            Assert.AreEqual(2, similar.Length);
+            Assert.AreEqual(similar[0], "one");
+            Assert.AreEqual(similar[1], "ninety");
+        }
+
+        private void Addwords(IndexReader r, System.String field)
+        {
+            long time = (System.DateTime.Now.Ticks - 621355968000000000) / 10000;
+            spellChecker.IndexDictionary(new LuceneDictionary(r, field));
+            time = (System.DateTime.Now.Ticks - 621355968000000000) / 10000 - time;
+            //System.out.println("time to build " + field + ": " + time);
+        }
+
+        private int Numdoc()
+        {
+            IndexReader rs = IndexReader.Open(spellindex);
+            int num = rs.NumDocs();
+            Assert.IsTrue(num != 0);
+            //System.out.println("num docs: " + num);
+            rs.Close();
+            return num;
+        }
+
+        [Test]
+        public void TestClose()
+        {
+            IndexReader r = IndexReader.Open(userindex, true);
+            spellChecker.ClearIndex();
+            String field = "field1";
+            Addwords(r, "field1");
+            int num_field1 = this.Numdoc();
+            Addwords(r, "field2");
+            int num_field2 = this.Numdoc();
+            Assert.AreEqual(num_field2, num_field1 + 1);
+            CheckCommonSuggestions(r);
+            AssertLastSearcherOpen(4);
+            spellChecker.Close();
+            AssertSearchersClosed();
+            try
+            {
+                spellChecker.Close();
+                Assert.Fail("spellchecker was already closed");
+            }
+            catch (AlreadyClosedException e)
+            {
+                // expected
+            }
+            try
+            {
+                CheckCommonSuggestions(r);
+                Assert.Fail("spellchecker was already closed");
+            }
+            catch (AlreadyClosedException e)
+            {
+                // expected
+            }
+
+            try
+            {
+                spellChecker.ClearIndex();
+                Assert.Fail("spellchecker was already closed");
+            }
+            catch (AlreadyClosedException e)
+            {
+                // expected
+            }
+
+            try
+            {
+                spellChecker.IndexDictionary(new LuceneDictionary(r, field));
+                Assert.Fail("spellchecker was already closed");
+            }
+            catch (AlreadyClosedException e)
+            {
+                // expected
+            }
+
+            try
+            {
+                spellChecker.SetSpellIndex(spellindex);
+                Assert.Fail("spellchecker was already closed");
+            }
+            catch (AlreadyClosedException e)
+            {
+                // expected
+            }
+            Assert.AreEqual(4, searchers.Count);
+            AssertSearchersClosed();
+        }
+
+        /*
+         * tests if the internally shared indexsearcher is correctly closed 
+         * when the spellchecker is concurrently accessed and closed.
+         */
+        [Test]
+        public void TestConcurrentAccess()
+        {
+            Assert.AreEqual(1, searchers.Count);
+            IndexReader r = IndexReader.Open(userindex, true);
+            spellChecker.ClearIndex();
+            Assert.AreEqual(2, searchers.Count);
+            Addwords(r, "field1");
+            Assert.AreEqual(3, searchers.Count);
+            int num_field1 = this.Numdoc();
+            Addwords(r, "field2");
+            Assert.AreEqual(4, searchers.Count);
+            int num_field2 = this.Numdoc();
+            Assert.AreEqual(num_field2, num_field1 + 1);
+            int numThreads = 5 + this.random.Next(5);
+            SpellCheckWorker[] workers = new SpellCheckWorker[numThreads];
+            for (int i = 0; i < numThreads; i++)
+            {
+                SpellCheckWorker spellCheckWorker = new SpellCheckWorker(r, this);
+                spellCheckWorker.start();
+                workers[i] = spellCheckWorker;
+
+            }
+            int iterations = 5 + random.Next(5);
+            for (int i = 0; i < iterations; i++)
+            {
+                Thread.Sleep(100);
+                // concurrently reset the spell index
+                spellChecker.SetSpellIndex(this.spellindex);
+                // for debug - prints the internal Open searchers 
+                // showSearchersOpen();
+            }
+
+            spellChecker.Close();
+            joinAll(workers, 5000);
+
+            for (int i = 0; i < workers.Length; i++)
+            {
+                Assert.False(workers[i].failed);
+                Assert.True(workers[i].terminated);
+            }
+            // 4 searchers more than iterations
+            // 1. at creation
+            // 2. ClearIndex()
+            // 2. and 3. during Addwords
+            Assert.AreEqual(iterations + 4, searchers.Count);
+            AssertSearchersClosed();
+
+        }
+        private void joinAll(SpellCheckWorker[] workers, long timeout)
+        {
+            for (int j = 0; j < workers.Length; j++)
+            {
+                long time = (long)DateTime.Now.TimeOfDay.TotalMilliseconds;
+                if (timeout < 0)
+                {
+                    // this could be helpful if it Assert.Fails one day
+                    Console.WriteLine("Warning: " + (workers.Length - j)
+                        + " threads have not joined but joinall timed out");
+                    break;
+                }
+                workers[j].join(timeout);
+                timeout -= (long)DateTime.Now.TimeOfDay.TotalMilliseconds - time;
+            }
+        }
+
+        private void AssertLastSearcherOpen(int numSearchers)
+        {
+            Assert.AreEqual(numSearchers, searchers.Count);
+            Object[] searcherArray = searchers.ToArray();
+            for (int i = 0; i < searcherArray.Length; i++)
+            {
+                if (i == searcherArray.Length - 1)
+                {
+                    Assert.True(
+                        ((IndexSearcher)searcherArray[i]).GetIndexReader().GetRefCount()
> 0,
+                        "expected last searcher Open but was closed");
+                }
+                else
+                {
+                    Assert.False(
+                        ((IndexSearcher)searcherArray[i]).GetIndexReader().GetRefCount()
> 0,
+                        "expected closed searcher but was Open - Index: " + i);
+                }
+            }
+        }
+
+        private void AssertSearchersClosed()
+        {
+            Object[] searcherArray = searchers.ToArray();
+            for (int i = 0; i < searcherArray.Length; i++)
+            {
+                Assert.AreEqual(0, ((IndexSearcher)searcherArray[i]).GetIndexReader().GetRefCount());
+            }
+        }
+
+        private void ShowSearchersOpen()
+        {
+            int count = 0;
+            Object[] searcherArray = searchers.ToArray();
+            for (int i = 0; i < searcherArray.Length; i++)
+            {
+                if (((IndexSearcher)searcherArray[i]).GetIndexReader().GetRefCount() >
0)
+                    ++count;
+            }
+            Console.WriteLine(count);
+        }
+
+
+        private class SpellCheckWorker
+        {
+            private readonly IndexReader reader;
+            public bool terminated = false;
+            public bool failed = false;
+            private Thread m_thread;
+            private TestSpellChecker enclosingInstance;
+
+            public SpellCheckWorker(IndexReader reader, TestSpellChecker enclInstance)
+                : base()
+            {
+                this.reader = reader;
+                enclosingInstance = enclInstance;
+                m_thread = new Thread(run);
+            }
+
+            public void run()
+            {
+                try
+                {
+                    while (true)
+                    {
+                        try
+                        {
+                            enclosingInstance.CheckCommonSuggestions(reader);
+                        }
+                        catch (AlreadyClosedException e)
+                        {
+
+                            return;
+                        }
+                        catch (Exception e)
+                        {
+                            Console.WriteLine(e.StackTrace);
+                            failed = true;
+                            return;
+                        }
+                    }
+                }
+                finally
+                {
+                    this.terminated = true;
+                }
+            }
+
+            public void join(long timeout)
+            {
+                m_thread.Join((int)timeout);
+            }
+
+            public void start()
+            {
+                m_thread.Start();
+            }
+        }
+
+        public class SpellCheckerMock : SpellChecker.Net.Search.Spell.SpellChecker
+        {
+            private TestSpellChecker enclosingInstance;
+            ArrayList searchers = ArrayList.Synchronized(new ArrayList());  // <--New
!!!!!!!
+            public SpellCheckerMock(Directory spellIndex, TestSpellChecker inst)
+                : base(spellIndex)
+            {
+                enclosingInstance = inst;
+                enclosingInstance.searchers = searchers; //Note: this code is invoked after
createSearcher
+            }
+
+            public SpellCheckerMock(Directory spellIndex, StringDistance sd)
+                : base(spellIndex, sd)
+            {
+            }
+
+            public override IndexSearcher CreateSearcher(Directory dir)
+            {
+                IndexSearcher searcher = base.CreateSearcher(dir);
+                searchers.Add(searcher);
+                return searcher;
+            }
+        }
+
+    }
 }
\ No newline at end of file



Mime
View raw message