lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From pnas...@apache.org
Subject [Lucene.Net] svn commit: r1206365 - in /incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/WordNet: SynExpand/Contrib.WordNet.SynExpand.csproj SynExpand/SynExpand.cs SynLookup/SynLookup.cs Syns2Index/Syns2Index.cs
Date Fri, 25 Nov 2011 22:39:34 GMT
Author: pnasser
Date: Fri Nov 25 22:39:33 2011
New Revision: 1206365

URL: http://svn.apache.org/viewvc?rev=1206365&view=rev
Log:
Contrib.WordNet - compiles and present files are updated, still missing a few files

Modified:
    incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/WordNet/SynExpand/Contrib.WordNet.SynExpand.csproj
    incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/WordNet/SynExpand/SynExpand.cs
    incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/WordNet/SynLookup/SynLookup.cs
    incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/WordNet/Syns2Index/Syns2Index.cs

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/WordNet/SynExpand/Contrib.WordNet.SynExpand.csproj
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/WordNet/SynExpand/Contrib.WordNet.SynExpand.csproj?rev=1206365&r1=1206364&r2=1206365&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/WordNet/SynExpand/Contrib.WordNet.SynExpand.csproj
(original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/WordNet/SynExpand/Contrib.WordNet.SynExpand.csproj
Fri Nov 25 22:39:33 2011
@@ -19,7 +19,6 @@
  under the License.
 
 -->
-
 <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"
DefaultTargets="Build">
   <PropertyGroup>
     <ProjectType>Local</ProjectType>
@@ -116,12 +115,6 @@
     <Content Include="App.ico" />
   </ItemGroup>
   <ItemGroup>
-    <ProjectReference Include="..\..\..\core\Lucene.Net.csproj">
-      <Project>{5D4AD9BE-1FFB-41AB-9943-25737971BF57}</Project>
-      <Name>Lucene.Net</Name>
-    </ProjectReference>
-  </ItemGroup>
-  <ItemGroup>
     <BootstrapperPackage Include=".NETFramework,Version=v4.0">
       <Visible>False</Visible>
       <ProductName>Microsoft .NET Framework 4 %28x86 and x64%29</ProductName>
@@ -143,6 +136,12 @@
       <Install>true</Install>
     </BootstrapperPackage>
   </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\..\..\core\Lucene.Net.csproj">
+      <Project>{5D4AD9BE-1FFB-41AB-9943-25737971BF57}</Project>
+      <Name>Lucene.Net</Name>
+    </ProjectReference>
+  </ItemGroup>
   <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
   <PropertyGroup>
     <PreBuildEvent />

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/WordNet/SynExpand/SynExpand.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/WordNet/SynExpand/SynExpand.cs?rev=1206365&r1=1206364&r2=1206365&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/WordNet/SynExpand/SynExpand.cs
(original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/WordNet/SynExpand/SynExpand.cs
Fri Nov 25 22:39:33 2011
@@ -16,152 +16,179 @@
  */
 
 using System;
-
-using Lucene.Net.Store;
-using Lucene.Net.Search;
-using Lucene.Net.Index;
-using Lucene.Net.Documents;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
 using Lucene.Net.Analysis;
 using Lucene.Net.Analysis.Standard;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Store;
 
 namespace WorldNet.Net
 {
-	
-	
-	/// <summary> Expand a query by looking up synonyms for every term.
-	/// You need to invoke <see cref="Syns2Index"/> first to build the synonym index.
-	/// 
-	/// </summary>
-	/// <seealso cref="Syns2Index">
-	/// </seealso>
-	public sealed class SynExpand
-	{
-		
-		/// <summary> Test driver for synonym expansion.
-		/// Uses boost factor of 0.9 for illustrative purposes.
-		/// 
-		/// If you pass in the query "big dog" then it prints out:
-		/// 
-		/// <pre>
-		/// Query: big adult^0.9 bad^0.9 bighearted^0.9 boastful^0.9 boastfully^0.9 bounteous^0.9
bountiful^0.9 braggy^0.9 crowing^0.9 freehanded^0.9 giving^0.9 grown^0.9 grownup^0.9 handsome^0.9
large^0.9 liberal^0.9 magnanimous^0.9 momentous^0.9 openhanded^0.9 prominent^0.9 swelled^0.9
vainglorious^0.9 vauntingly^0.9
-		/// dog andiron^0.9 blackguard^0.9 bounder^0.9 cad^0.9 chase^0.9 click^0.9 detent^0.9 dogtooth^0.9
firedog^0.9 frank^0.9 frankfurter^0.9 frump^0.9 heel^0.9 hotdog^0.9 hound^0.9 pawl^0.9 tag^0.9
tail^0.9 track^0.9 trail^0.9 weenie^0.9 wiener^0.9 wienerwurst^0.9
-		/// </pre>
-		/// </summary>
-		[STAThread]
-		public static void  Main(System.String[] args)
-		{
-			if (args.Length != 2)
-			{
-				System.Console.Out.WriteLine(typeof(SynExpand) + " <index path> <query>");
+
+
+    /// <summary> Expand a query by looking up synonyms for every term.
+    /// You need to invoke <see cref="Syns2Index"/> first to build the synonym index.
+    /// 
+    /// </summary>
+    /// <seealso cref="Syns2Index" />
+    public sealed class SynExpand
+    {
+        static List<String> already;
+        private static BooleanQuery tmp;
+
+        /// <summary> Test driver for synonym expansion.
+        /// Uses boost factor of 0.9 for illustrative purposes.
+        /// 
+        /// If you pass in the query "big dog" then it prints out:
+        /// 
+        /// <pre>
+        /// Query: big adult^0.9 bad^0.9 bighearted^0.9 boastful^0.9 boastfully^0.9 bounteous^0.9
bountiful^0.9 braggy^0.9 crowing^0.9 freehanded^0.9 giving^0.9 grown^0.9 grownup^0.9 handsome^0.9
large^0.9 liberal^0.9 magnanimous^0.9 momentous^0.9 openhanded^0.9 prominent^0.9 swelled^0.9
vainglorious^0.9 vauntingly^0.9
+        /// dog andiron^0.9 blackguard^0.9 bounder^0.9 cad^0.9 chase^0.9 click^0.9 detent^0.9
dogtooth^0.9 firedog^0.9 frank^0.9 frankfurter^0.9 frump^0.9 heel^0.9 hotdog^0.9 hound^0.9
pawl^0.9 tag^0.9 tail^0.9 track^0.9 trail^0.9 weenie^0.9 wiener^0.9 wienerwurst^0.9
+        /// </pre>
+        /// </summary>
+        [STAThread]
+        public static void Main(String[] args)
+        {
+            if (args.Length != 2)
+            {
+                Console.Out.WriteLine(typeof(SynExpand) + " <index path> <query>");
                 return;
-			}
-			
-			FSDirectory directory = FSDirectory.GetDirectory(args[0], false);
-			IndexSearcher searcher = new IndexSearcher(directory);
-			
-			System.String query = args[1];
-			System.String field = "contents";
-			
-			Query q = Expand(query, searcher, new StandardAnalyzer(), field, 0.9f);
-			System.Console.Out.WriteLine("Query: " + q.ToString(field));
-			
-			
-			
-			searcher.Close();
-			directory.Close();
-		}
-		
-		
-		/// <summary> Perform synonym expansion on a query.
-		/// 
-		/// </summary>
-		/// <param name="query">users query that is assumed to not have any "special" query
syntax, thus it should be just normal words, so "big dog" makes sense, but a query like "title:foo^1.2"
doesn't as this should presumably be passed directly to the default query parser.
-		/// 
-		/// </param>
-		/// <param name="syns">a opened to the Lucene index you previously created with <see
cref="Syns2Index"/>. The searcher is not closed or otherwise altered.
-		/// 
-		/// </param>
-		/// <param name="a">optional analyzer used to parse the users query else <see
cref="StandardAnalyzer"/> is used
-		/// 
-		/// </param>
-		/// <param name="field">optional field name to search in or null if you want the
default of "contents"
-		/// 
-		/// </param>
-		/// <param name="boost">optional boost applied to synonyms else no boost is applied
-		/// 
-		/// </param>
-		/// <returns> the expanded Query
-		/// </returns>
-		public static Query Expand(System.String query, Searcher syns, Analyzer a, System.String
field, float boost)
+            }
+
+            var directory = FSDirectory.Open(new DirectoryInfo(args[0]));
+            var searcher = new IndexSearcher(directory, true);
+
+            String query = args[1];
+            const string field = "contents";
+
+            Query q = Expand(query, searcher, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT),
field, 0.9f);
+            System.Console.Out.WriteLine("Query: " + q.ToString(field));
+
+            searcher.Close();
+            directory.Close();
+        }
+
+
+        /// <summary> 
+        /// Perform synonym expansion on a query.
+        /// </summary>
+        /// <param name="query">users query that is assumed to not have any "special"
query syntax, thus it should be just normal words, so "big dog" makes sense, but a query like
"title:foo^1.2" doesn't as this should presumably be passed directly to the default query
parser </param>
+        /// <param name="syns">a opened to the Lucene index you previously created
with <see cref="Syns2Index"/>. The searcher is not closed or otherwise altered. </param>
+        /// <param name="a">optional analyzer used to parse the users query else <see
cref="StandardAnalyzer"/> is used </param>
+        /// <param name="field">optional field name to search in or null if you want
the default of "contents" </param>
+        /// <param name="boost">optional boost applied to synonyms else no boost is
applied </param>
+        /// <returns>the expanded Query </returns>
+        public static Query Expand(String query,
+            Searcher syns,
+            Analyzer a,
+            String field,
+            float boost)
 		{
-			System.Collections.Hashtable already = new System.Collections.Hashtable(); // avoid dups

-			System.Collections.IList top = new System.Collections.ArrayList(); // needs to be separately
listed..
+			already = new List<String>(); // avoid dups 
+			var top = new List<String>(); // needs to be separately listed..
 			if (field == null)
 				field = "contents";
-			if (a == null)
-				a = new StandardAnalyzer();
+			
+            if (a == null)
+				a = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT);
 			
 			// [1] Parse query into separate words so that when we expand we can avoid dups
-			TokenStream ts = a.TokenStream(field, new System.IO.StringReader(query));
-			Lucene.Net.Analysis.Token t;
-			while ((t = ts.Next()) != null)
+			var ts = a.TokenStream(field, new StringReader(query));
+            var termAtt = ts.AddAttribute<TermAttribute>();
+		    
+            while (ts.IncrementToken())
 			{
-				System.String word = t.TermText();
-				if (already.Contains(word) == false)
+				var word = termAtt.Term();
+				
+                if (!already.Contains(word))
 				{
-					already.Add(word, word);
+					already.Add(word);
 					top.Add(word);
 				}
 			}
-			BooleanQuery tmp = new BooleanQuery();
+
+			tmp = new BooleanQuery();
 			
 			// [2] form query
 			System.Collections.IEnumerator it = top.GetEnumerator();
 			while (it.MoveNext())
 			{
 				// [2a] add to level words in
-				System.String word = (System.String) it.Current;
-				TermQuery tq = new TermQuery(new Term(field, word));
+				var word = (String) it.Current;
+				var tq = new TermQuery(new Term(field, word));
 				tmp.Add(tq, BooleanClause.Occur.SHOULD);
-				
-				// [2b] add in unique synonums
-				Hits hits = syns.Search(new TermQuery(new Term(Syns2Index.F_WORD, word)));
-				for (int i = 0; i < hits.Length(); i++)
-				{
-					Document doc = hits.Doc(i);
-					System.String[] values = doc.GetValues(Syns2Index.F_SYN);
-					for (int j = 0; j < values.Length; j++)
-					{
-						System.String syn = values[j];
-						if (already.Contains(syn) == false)
-						// avoid dups of top level words and synonyms
-						{
-							already.Add(syn, syn);
-							tq = new TermQuery(new Term(field, syn));
-							if (boost > 0)
-							// else keep normal 1.0
-								tq.SetBoost(boost);
-							tmp.Add(tq, BooleanClause.Occur.SHOULD);
-						}
-					}
-				}
+
+			    var c = new CollectorImpl(field, boost);
+                syns.Search(new TermQuery(new Term(Syns2Index.F_WORD, word)), c);
 			}
 			
-			
 			return tmp;
 		}
-	}
+	
+
+        /// <summary>
+        /// From project WordNet.Net.Syns2Index
+        /// </summary>
+        public class Syns2Index
+        {
+            /// <summary> </summary>
+            public const String F_SYN = "syn";
+
+            /// <summary> </summary>
+            public const String F_WORD = "word";
+        }
+
+        /// <summary>
+        /// CollectorImpl
+        /// </summary>
+        internal sealed class CollectorImpl : Collector
+        {
+            private IndexReader reader;
+            private readonly string field;
+            private readonly float boost;
+           
+            public CollectorImpl(string field, float boost)
+            {
+                this.field = field;
+                this.boost = boost;
+            }
+
+            public override void SetScorer(Scorer scorer)
+            {
+                // Ignore
+            }
+
+            public override void Collect(int doc)
+            {
+                var d = reader.Document(doc);
+                var values = d.GetValues(Syns2Index.F_SYN);
+                foreach (var syn in values.Where(syn => !already.Contains(syn)))
+                {
+                    already.Add(syn);
+
+                    var tq = new TermQuery(new Term(field, syn));
+                    if (boost > 0) // else keep normal 1.0
+                        tq.SetBoost(boost);
+
+                    tmp.Add(tq, BooleanClause.Occur.SHOULD);
+                }
+            }
+
+            public override void SetNextReader(IndexReader reader, int docBase)
+            {
+                this.reader = reader;
+            }
+
+            public override bool AcceptsDocsOutOfOrder()
+            {
+                return true;
+            }
 
+        }
 
-	/// <summary>
-	/// From project WordNet.Net.Syns2Index
-	/// </summary>
-	public class Syns2Index
-	{
-		/// <summary> </summary>
-		public const System.String F_SYN = "syn";
-
-		/// <summary> </summary>
-		public const System.String F_WORD = "word";
-	}
+    }
 }
\ No newline at end of file

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/WordNet/SynLookup/SynLookup.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/WordNet/SynLookup/SynLookup.cs?rev=1206365&r1=1206364&r2=1206365&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/WordNet/SynLookup/SynLookup.cs
(original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/WordNet/SynLookup/SynLookup.cs
Fri Nov 25 22:39:33 2011
@@ -16,140 +16,193 @@
  */
 
 using System;
-
-using Lucene.Net.Store;
-using Lucene.Net.Search;
-using Lucene.Net.Index;
-using Lucene.Net.Documents;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
 using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Store;
 
 namespace WorldNet.Net
 {
-	
-	
 	/// <summary> Test program to look up synonyms.</summary>
 	public class SynLookup
 	{
-		
+		static List<String> already;
+		private static BooleanQuery tmp;
+
 		[STAThread]
 		public static void  Main(System.String[] args)
 		{
 			if (args.Length != 2)
 			{
 				System.Console.Out.WriteLine(typeof(SynLookup) + " <index path> <word>");
-                return;
+				return;
 			}
 			
-			FSDirectory directory = FSDirectory.GetDirectory(args[0], false);
-			IndexSearcher searcher = new IndexSearcher(directory);
-			
-			System.String word = args[1];
-			Hits hits = searcher.Search(new TermQuery(new Term(Syns2Index.F_WORD, word)));
-			
-			if (hits.Length() == 0)
+			using (var directory = FSDirectory.Open(new DirectoryInfo(args[0])))
 			{
-				System.Console.Out.WriteLine("No synonyms found for " + word);
-			}
-			else
-			{
-				System.Console.Out.WriteLine("Synonyms found for \"" + word + "\":");
-			}
-			
-			for (int i = 0; i < hits.Length(); i++)
-			{
-				Document doc = hits.Doc(i);
-				
-				System.String[] values = doc.GetValues(Syns2Index.F_SYN);
-				
-				for (int j = 0; j < values.Length; j++)
+				using (var searcher = new IndexSearcher(directory, true))
 				{
-					System.Console.Out.WriteLine(values[j]);
+
+					String word = args[1];
+					Query query = new TermQuery(new Term(Syns2Index.F_WORD, word));
+					var countingCollector = new CountingCollector();
+					searcher.Search(query, countingCollector);
+
+					if (countingCollector.numHits == 0)
+					{
+						Console.Out.WriteLine("No synonyms found for " + word);
+					}
+					else
+					{
+						Console.Out.WriteLine("Synonyms found for \"" + word + "\":");
+					}
+
+					var hits = searcher.Search(query, countingCollector.numHits).ScoreDocs;
+
+					foreach (var v in
+						hits.Select(t => searcher.Doc(t.doc)).Select(doc => doc.GetValues(Syns2Index.F_SYN)).SelectMany(values
=> values))
+					{
+						Console.Out.WriteLine(v);
+					}
+
 				}
 			}
-			
-			searcher.Close();
-			directory.Close();
 		}
 		
-		
-		/// <summary> Perform synonym expansion on a query.
-		/// 
+		/// <summary> 
+		/// Perform synonym expansion on a query.
 		/// </summary>
-		/// <param name="">query
-		/// </param>
-		/// <param name="">syns
-		/// </param>
-		/// <param name="">a
-		/// </param>
-		/// <param name="">field
-		/// </param>
-		/// <param name="">boost
-		/// </param>
-		public static Query Expand(System.String query, Searcher syns, Analyzer a, System.String
field, float boost)
+		/// <param name="query">query</param>
+		/// <param name="syns">syns</param>
+		/// <param name="a">a</param>
+		/// <param name="field">field</param>
+		/// <param name="boost">boost</param>
+		public static Query Expand(String query, 
+			Searcher syns, 
+			Analyzer a, 
+			String field, 
+			float boost)
 		{
-			System.Collections.Hashtable already = new System.Collections.Hashtable(); // avoid dups
	
-			System.Collections.IList top = new System.Collections.ArrayList(); // needs to be separately
listed..
-			
-			// [1] Parse query into separate words so that when we expand we can avoid dups
-			TokenStream ts = a.TokenStream(field, new System.IO.StringReader(query));
-			Lucene.Net.Analysis.Token t;
-			while ((t = ts.Next()) != null)
+			already = new List<String>(); // avoid dups		
+			var top = new List<String>(); // needs to be separately listed..
+
+			var ts = a.TokenStream(field, new StringReader(query));
+			var termAtt = ts.AddAttribute<TermAttribute>();
+
+			while (ts.IncrementToken())
 			{
-				System.String word = t.TermText();
-				if (already.Contains(word) == false)
+				var word = termAtt.Term();
+
+				if (!already.Contains(word))
 				{
-					already.Add(word, word);
+					already.Add(word);
 					top.Add(word);
 				}
 			}
-			BooleanQuery tmp = new BooleanQuery();
-			
+
+			tmp = new BooleanQuery();
+
 			// [2] form query
 			System.Collections.IEnumerator it = top.GetEnumerator();
 			while (it.MoveNext())
 			{
 				// [2a] add to level words in
-				System.String word = (System.String) it.Current;
-				TermQuery tq = new TermQuery(new Term(field, word));
+				var word = (String)it.Current;
+				var tq = new TermQuery(new Term(field, word));
 				tmp.Add(tq, BooleanClause.Occur.SHOULD);
-				
-				// [2b] add in unique synonums
-				Hits hits = syns.Search(new TermQuery(new Term(Syns2Index.F_WORD, word)));
-				for (int i = 0; i < hits.Length(); i++)
-				{
-					Document doc = hits.Doc(i);
-					System.String[] values = doc.GetValues(Syns2Index.F_SYN);
-					for (int j = 0; j < values.Length; j++)
-					{
-						System.String syn = values[j];
-						if (already.Contains(syn) == false)
-						{
-							already.Add(syn, syn);
-							tq = new TermQuery(new Term(field, syn));
-							if (boost > 0)
-							// else keep normal 1.0
-								tq.SetBoost(boost);
-							tmp.Add(tq, BooleanClause.Occur.SHOULD);
-						}
-					}
-				}
+
+				var c = new CollectorImpl(field, boost);
+				syns.Search(new TermQuery(new Term(Syns2Index.F_WORD, word)), c);
 			}
-			
-			
+
 			return tmp;
 		}
-	}
 
+        internal sealed class CountingCollector : Collector
+        {
+            public int numHits;
+
+            public override void SetScorer(Scorer scorer)
+            { }
+
+            public override void Collect(int doc)
+            {
+                numHits++;
+            }
+
+            public override void SetNextReader(IndexReader reader, int docBase)
+            { }
+
+            public override bool AcceptsDocsOutOfOrder()
+            {
+                return true;
+            }
+        }
+
+        /// <summary>
+        /// CollectorImpl
+        /// </summary>
+        internal sealed class CollectorImpl : Collector
+        {
+            private IndexReader reader;
+            private readonly string field;
+            private readonly float boost;
+
+            public CollectorImpl(string field, float boost)
+            {
+                this.field = field;
+                this.boost = boost;
+            }
+
+            public override void SetScorer(Scorer scorer)
+            {
+                // Ignore
+            }
+
+            public override void Collect(int doc)
+            {
+                var d = reader.Document(doc);
+                var values = d.GetValues(Syns2Index.F_SYN);
+                foreach (var syn in values.Where(syn => !already.Contains(syn)))
+                {
+                    already.Add(syn);
+
+                    var tq = new TermQuery(new Term(field, syn));
+                    if (boost > 0) // else keep normal 1.0
+                        tq.SetBoost(boost);
+
+                    tmp.Add(tq, BooleanClause.Occur.SHOULD);
+                }
+            }
+
+            public override void SetNextReader(IndexReader reader, int docBase)
+            {
+                this.reader = reader;
+            }
+
+            public override bool AcceptsDocsOutOfOrder()
+            {
+                return true;
+            }
+
+        }
+
+        /// <summary>
+        /// From project WordNet.Net.Syns2Index
+        /// </summary>
+        public class Syns2Index
+        {
+            /// <summary> </summary>
+            public const String F_SYN = "syn";
+
+            /// <summary> </summary>
+            public const String F_WORD = "word";
+        }
 
-    /// <summary>
-    /// From project WordNet.Net.Syns2Index
-    /// </summary>
-    public class Syns2Index
-    {
-        /// <summary> </summary>
-        public const System.String F_SYN = "syn";
-
-        /// <summary> </summary>
-        public const System.String F_WORD = "word";
     }
+
 }
\ No newline at end of file

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/WordNet/Syns2Index/Syns2Index.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/WordNet/Syns2Index/Syns2Index.cs?rev=1206365&r1=1206364&r2=1206365&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/WordNet/Syns2Index/Syns2Index.cs
(original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/WordNet/Syns2Index/Syns2Index.cs
Fri Nov 25 22:39:33 2011
@@ -16,8 +16,12 @@
  */
 
 using System;
-
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using Lucene.Net.Store;
 using Analyzer = Lucene.Net.Analysis.Analyzer;
+using Directory = System.IO.Directory;
 using StandardAnalyzer = Lucene.Net.Analysis.Standard.StandardAnalyzer;
 using Document = Lucene.Net.Documents.Document;
 using Field = Lucene.Net.Documents.Field;
@@ -44,18 +48,12 @@ namespace WorldNet.Net
 	/// While the WordNet file distinguishes groups of synonyms with
 	/// related meanings we don't do that here.
 	/// </p>
-	/// 
 	/// This can take 4 minutes to execute and build an index on a "fast" system and the index
takes up almost 3 MB.
-	/// 
 	/// </summary>
-	/// <author>  Dave Spencer, dave&#064;searchmorph.com
-	/// </author>
-	/// <seealso cref="href="http://www.cogsci.princeton.edu/~wn/">WordNet home page</a>">
-	/// </seealso>
-	/// <seealso cref="href="http://www.cogsci.princeton.edu/~wn/man/prologdb.5WN.html">prologdb
man page</a>">
-	/// </seealso>
-	/// <seealso cref="href="http://www.hostmon.com/rfc/advanced.jsp">sample site that
uses it</a>">
-	/// </seealso>
+	/// 
+	/// <seealso cref="http://www.cogsci.princeton.edu/~wn/"></seealso>
+	/// <seealso cref="http://www.cogsci.princeton.edu/~wn/man/prologdb.5WN.html"></seealso>
+	/// <seealso cref="http://www.hostmon.com/rfc/advanced.jsp"> </seealso>
 	public class Syns2Index
 	{
 		/// <summary> </summary>
@@ -71,15 +69,17 @@ namespace WorldNet.Net
 		public const System.String F_WORD = "word";
 		
 		/// <summary> </summary>
-		private static readonly Analyzer ana = new StandardAnalyzer();
+		private static readonly Analyzer ana = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT);
 		
-		/// <summary> Takes arg of prolog file name and index directory.</summary>
+		/// <summary> 
+		/// Takes arg of prolog file name and index directory.
+		/// </summary>
 		[STAThread]
 		public static void  Main(System.String[] args)
 		{
 			// get command line arguments
-			System.String prologFilename = null; // name of file "wn_s.pl"
-			System.String indexDir = null;
+			String prologFilename = null; // name of file "wn_s.pl"
+			String indexDir = null;
 			if (args.Length == 2)
 			{
 				prologFilename = args[0];
@@ -88,45 +88,44 @@ namespace WorldNet.Net
 			else
 			{
 				Usage();
-				System.Environment.Exit(1);
+				Environment.Exit(1);
 			}
 			
 			// ensure that the prolog file is readable
-			if (!(new System.IO.FileInfo(prologFilename)).Exists)
+			if (!(new FileInfo(prologFilename)).Exists)
 			{
 				err.WriteLine("Error: cannot read Prolog file: " + prologFilename);
-				System.Environment.Exit(1);
+				Environment.Exit(1);
 			}
 			// exit if the target index directory already exists
-			if (System.IO.Directory.Exists((new System.IO.FileInfo(indexDir)).FullName))
+			if (Directory.Exists((new FileInfo(indexDir)).FullName))
 			{
 				err.WriteLine("Error: index directory already exists: " + indexDir);
 				err.WriteLine("Please specify a name of a non-existent directory");
-				System.Environment.Exit(1);
+				Environment.Exit(1);
 			}
 			
 			o.WriteLine("Opening Prolog file " + prologFilename);
-			System.IO.FileStream fis = new System.IO.FileStream(prologFilename, System.IO.FileMode.Open,
System.IO.FileAccess.Read);
-			System.IO.StreamReader br = new System.IO.StreamReader(new System.IO.StreamReader(fis,
System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(fis, System.Text.Encoding.Default).CurrentEncoding);
-			System.String line;
+			var fis = new FileStream(prologFilename, FileMode.Open, FileAccess.Read);
+			var br = new StreamReader(new StreamReader(fis, System.Text.Encoding.Default).BaseStream,
new StreamReader(fis, System.Text.Encoding.Default).CurrentEncoding);
+			String line;
 			
 			// maps a word to all the "groups" it's in
 			System.Collections.IDictionary word2Nums = new System.Collections.SortedList();
 			// maps a group to all the words in it
 			System.Collections.IDictionary num2Words = new System.Collections.SortedList();
 			// number of rejected words
-			int ndecent = 0;
+			var ndecent = 0;
 			
 			// status output
-			int mod = 1;
-			int row = 1;
+			var mod = 1;
+			var row = 1;
 			// parse prolog file
 			o.WriteLine("[1/2] Parsing " + prologFilename);
 			while ((line = br.ReadLine()) != null)
 			{
 				// occasional progress
-				if ((++row) % mod == 0)
-				// periodically print out line we read in
+				if ((++row) % mod == 0) // periodically print out line we read in
 				{
 					mod *= 2;
 					o.WriteLine("\t" + row + " " + line + " " + word2Nums.Count + " " + num2Words.Count
+ " ndecent=" + ndecent);
@@ -136,17 +135,17 @@ namespace WorldNet.Net
 				if (!line.StartsWith("s("))
 				{
 					err.WriteLine("OUCH: " + line);
-					System.Environment.Exit(1);
+					Environment.Exit(1);
 				}
 				
 				// parse line
 				line = line.Substring(2);
-				int comma = line.IndexOf((System.Char) ',');
-				System.String num = line.Substring(0, (comma) - (0));
-				int q1 = line.IndexOf((System.Char) '\'');
+				var comma = line.IndexOf(',');
+				var num = line.Substring(0, comma);
+				var q1 = line.IndexOf('\'');
 				line = line.Substring(q1 + 1);
-				int q2 = line.IndexOf((System.Char) '\'');
-				System.String word = line.Substring(0, (q2) - (0)).ToLower();
+				var q2 = line.IndexOf('\'');
+				var word = line.Substring(0, q2).ToLower().Replace("''", "'");
 				
 				// make sure is a normal word
 				if (!IsDecent(word))
@@ -157,11 +156,10 @@ namespace WorldNet.Net
 				
 				// 1/2: word2Nums map
 				// append to entry or add new one
-				System.Collections.IList lis = (System.Collections.IList) word2Nums[word];
+				var lis = (System.Collections.IList) word2Nums[word];
 				if (lis == null)
 				{
-					lis = new System.Collections.ArrayList();
-					lis.Add(num);
+					lis = new List<String> {num};
 					word2Nums[word] = lis;
 				}
 				else
@@ -171,8 +169,7 @@ namespace WorldNet.Net
 				lis = (System.Collections.IList) num2Words[num];
 				if (lis == null)
 				{
-					lis = new System.Collections.ArrayList();
-					lis.Add(word);
+					lis = new List<String> { word };
 					num2Words[num] = lis;
 				}
 				else
@@ -188,20 +185,18 @@ namespace WorldNet.Net
 			Index(indexDir, word2Nums, num2Words);
 		}
 		
-		/// <summary> Checks to see if a word contains only alphabetic characters by
+		/// <summary> 
+		/// Checks to see if a word contains only alphabetic characters by
 		/// checking it one character at a time.
-		/// 
 		/// </summary>
-		/// <param name="s">string to check
-		/// </param>
-		/// <returns> <c>true</c> if the string is decent
-		/// </returns>
-		private static bool IsDecent(System.String s)
+		/// <param name="s">string to check </param>
+		/// <returns> <c>true</c> if the string is decent</returns>
+		private static bool IsDecent(String s)
 		{
-			int len = s.Length;
-			for (int i = 0; i < len; i++)
+			var len = s.Length;
+			for (var i = 0; i < len; i++)
 			{
-				if (!System.Char.IsLetter(s[i]))
+				if (!Char.IsLetter(s[i]))
 				{
 					return false;
 				}
@@ -209,75 +204,73 @@ namespace WorldNet.Net
 			return true;
 		}
 		
-		/// <summary> Forms a Lucene index based on the 2 maps.
-		/// 
+		/// <summary> 
+		/// Forms a Lucene index based on the 2 maps.
 		/// </summary>
-		/// <param name="indexDir">the direcotry where the index should be created
-		/// </param>
-		/// <param name="">word2Nums
-		/// </param>
-		/// <param name="">num2Words
-		/// </param>
-		private static void  Index(System.String indexDir, System.Collections.IDictionary word2Nums,
System.Collections.IDictionary num2Words)
+		/// <param name="indexDir">the direcotry where the index should be created</param>
+		/// <param name="word2Nums">word2Nums</param>
+		/// <param name="num2Words">num2Words</param>
+		private static void  Index(String indexDir, System.Collections.IDictionary word2Nums, System.Collections.IDictionary
num2Words)
 		{
-			int row = 0;
-			int mod = 1;
+			var row = 0;
+			var mod = 1;
 			
-			// override the specific index if it already exists
-			IndexWriter writer = new IndexWriter(indexDir, ana, true);
-			writer.SetUseCompoundFile(true); // why?
-			// blindly up these parameters for speed
-			writer.SetMergeFactor(writer.GetMergeFactor() * 2);
-			writer.SetMaxBufferedDocs(writer.GetMaxBufferedDocs() * 2);
-			System.Collections.IEnumerator i1 = word2Nums.Keys.GetEnumerator();
-			while (i1.MoveNext())
-			// for each word
+			using (var dir = FSDirectory.Open(new DirectoryInfo(indexDir)))
 			{
-				System.String g = (System.String) i1.Current;
-				Document doc = new Document();
-				
-				int n = Index(word2Nums, num2Words, g, doc);
-				if (n > 0)
+				var writer = new IndexWriter(dir, ana, true, IndexWriter.MaxFieldLength.LIMITED);
+				writer.SetUseCompoundFile(true); // why?
+
+				var i1 = word2Nums.Keys.GetEnumerator();
+				while (i1.MoveNext())
 				{
-					doc.Add(new Field(F_WORD, g, Field.Store.YES, Field.Index.UN_TOKENIZED));
-					if ((++row % mod) == 0)
+					var g = (String)i1.Current;
+					var doc = new Document();
+
+					var n = Index(word2Nums, num2Words, g, doc);
+					if (n > 0)
 					{
-						o.WriteLine("\trow=" + row + "/" + word2Nums.Count + " doc= " + doc);
-						mod *= 2;
+						doc.Add(new Field(F_WORD, g, Field.Store.YES, Field.Index.NOT_ANALYZED));
+						if ((++row % mod) == 0)
+						{
+							o.WriteLine("\trow=" + row + "/" + word2Nums.Count + " doc= " + doc);
+							mod *= 2;
+						}
+						writer.AddDocument(doc);
 					}
-					writer.AddDocument(doc);
-				} // else degenerate
+				}
+				o.WriteLine("Optimizing..");
+				writer.Optimize();
+				writer.Close();
 			}
-			o.WriteLine("Optimizing..");
-			writer.Optimize();
-			writer.Close();
+			
 		}
 
-		/// <summary> Given the 2 maps fills a document for 1 word.</summary>
+		/// <summary> 
+		/// Given the 2 maps fills a document for 1 word.
+		/// </summary>
 		private static int Index(System.Collections.IDictionary word2Nums, System.Collections.IDictionary
num2Words, System.String g, Document doc)
 		{
-			System.Collections.IList keys = (System.Collections.IList) word2Nums[g]; // get list of
key#'s
-			System.Collections.IEnumerator i2 = keys.GetEnumerator();
+			var keys = (System.Collections.IList) word2Nums[g]; // get list of key#'s
+			var i2 = keys.GetEnumerator();
 			
-			System.Collections.SortedList already = new System.Collections.SortedList(); // keep them
sorted
+			var already = new System.Collections.SortedList(); // keep them sorted
 			
 			// pass 1: fill up 'already' with all words
 			while (i2.MoveNext()) // for each key#
 			{
-				foreach (object item in (System.Collections.IList) num2Words[i2.Current]) // get list
of words
+				foreach (var item in
+					((System.Collections.IList) num2Words[i2.Current]).Cast<object>().Where(item =>
already.Contains(item) == false))
 				{
-					if (already.Contains(item) == false)
-					{
-						already.Add(item, item); 
-					}
+					already.Add(item, item);
 				}
 			}
-			int num = 0;
+
+			var num = 0;
 			already.Remove(g); // of course a word is it's own syn
-			System.Collections.IDictionaryEnumerator it = already.GetEnumerator();
+			var it = already.GetEnumerator();
 			while (it.MoveNext())
 			{
-				System.String cur = (System.String) it.Key;
+				var cur = (String) it.Key;
 				// don't store things like 'pit bull' -> 'american pit bull'
 				if (!IsDecent(cur))
 				{
@@ -295,16 +288,5 @@ namespace WorldNet.Net
 			o.WriteLine("\n\n" + typeof(Syns2Index) + " <prolog file> <index dir>\n\n");
 		}
 
-		static Syns2Index()
-		{
-			System.IO.StreamWriter temp_writer;
-			temp_writer = new System.IO.StreamWriter(System.Console.OpenStandardOutput(), System.Console.Out.Encoding);
-			temp_writer.AutoFlush = true;
-			o = temp_writer;
-			System.IO.StreamWriter temp_writer2;
-			temp_writer2 = new System.IO.StreamWriter(System.Console.OpenStandardError(), System.Console.Error.Encoding);
-			temp_writer2.AutoFlush = true;
-			err = temp_writer2;
-		}
 	}
 }
\ No newline at end of file



Mime
View raw message