- ///
- [STAThread]
- public static void Main(System.String[] args)
- {
- if (args.Length != 2)
- {
- System.Console.Out.WriteLine(typeof(SynExpand) + " ");
+
+
+ /// Expand a query by looking up synonyms for every term.
+ /// You need to invoke first to build the synonym index.
+ ///
+ ///
+ ///
+ public sealed class SynExpand
+ {
+ static List already;
+ private static BooleanQuery tmp;
+
+ /// Test driver for synonym expansion.
+ /// Uses boost factor of 0.9 for illustrative purposes.
+ ///
+ /// If you pass in the query "big dog" then it prints out:
+ ///
+ ///
+ ///
+ [STAThread]
+ public static void Main(String[] args)
+ {
+ if (args.Length != 2)
+ {
+ Console.Out.WriteLine(typeof(SynExpand) + " ");
return;
- }
-
- FSDirectory directory = FSDirectory.GetDirectory(args[0], false);
- IndexSearcher searcher = new IndexSearcher(directory);
-
- System.String query = args[1];
- System.String field = "contents";
-
- Query q = Expand(query, searcher, new StandardAnalyzer(), field, 0.9f);
- System.Console.Out.WriteLine("Query: " + q.ToString(field));
-
-
-
- searcher.Close();
- directory.Close();
- }
-
-
- /// Perform synonym expansion on a query.
- ///
- ///
- /// users query that is assumed to not have any "special" query syntax, thus it should be just normal words, so "big dog" makes sense, but a query like "title:foo^1.2" doesn't as this should presumably be passed directly to the default query parser.
- ///
- ///
- /// a opened to the Lucene index you previously created with . The searcher is not closed or otherwise altered.
- ///
- ///
- /// optional analyzer used to parse the users query else is used
- ///
- ///
- /// optional field name to search in or null if you want the default of "contents"
- ///
- ///
- /// optional boost applied to synonyms else no boost is applied
- ///
- ///
- /// the expanded Query
- ///
- public static Query Expand(System.String query, Searcher syns, Analyzer a, System.String field, float boost)
+ }
+
+ var directory = FSDirectory.Open(new DirectoryInfo(args[0]));
+ var searcher = new IndexSearcher(directory, true);
+
+ String query = args[1];
+ const string field = "contents";
+
+ Query q = Expand(query, searcher, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT), field, 0.9f);
+ System.Console.Out.WriteLine("Query: " + q.ToString(field));
+
+ searcher.Close();
+ directory.Close();
+ }
+
+
+ ///
+ /// Perform synonym expansion on a query.
+ ///
+ /// users query that is assumed to not have any "special" query syntax, thus it should be just normal words, so "big dog" makes sense, but a query like "title:foo^1.2" doesn't as this should presumably be passed directly to the default query parser
+ /// a opened to the Lucene index you previously created with . The searcher is not closed or otherwise altered.
+ /// optional analyzer used to parse the users query else is used
+ /// optional field name to search in or null if you want the default of "contents"
+ /// optional boost applied to synonyms else no boost is applied
+ /// the expanded Query
+ public static Query Expand(String query,
+ Searcher syns,
+ Analyzer a,
+ String field,
+ float boost)
{
- System.Collections.Hashtable already = new System.Collections.Hashtable(); // avoid dups
- System.Collections.IList top = new System.Collections.ArrayList(); // needs to be separately listed..
+ already = new List(); // avoid dups
+ var top = new List(); // needs to be separately listed..
if (field == null)
field = "contents";
- if (a == null)
- a = new StandardAnalyzer();
+
+ if (a == null)
+ a = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT);
// [1] Parse query into separate words so that when we expand we can avoid dups
- TokenStream ts = a.TokenStream(field, new System.IO.StringReader(query));
- Lucene.Net.Analysis.Token t;
- while ((t = ts.Next()) != null)
+ var ts = a.TokenStream(field, new StringReader(query));
+ var termAtt = ts.AddAttribute();
+
+ while (ts.IncrementToken())
{
- System.String word = t.TermText();
- if (already.Contains(word) == false)
+ var word = termAtt.Term();
+
+ if (!already.Contains(word))
{
- already.Add(word, word);
+ already.Add(word);
top.Add(word);
}
}
- BooleanQuery tmp = new BooleanQuery();
+
+ tmp = new BooleanQuery();
// [2] form query
System.Collections.IEnumerator it = top.GetEnumerator();
while (it.MoveNext())
{
// [2a] add to level words in
- System.String word = (System.String) it.Current;
- TermQuery tq = new TermQuery(new Term(field, word));
+ var word = (String) it.Current;
+ var tq = new TermQuery(new Term(field, word));
tmp.Add(tq, BooleanClause.Occur.SHOULD);
-
- // [2b] add in unique synonums
- Hits hits = syns.Search(new TermQuery(new Term(Syns2Index.F_WORD, word)));
- for (int i = 0; i < hits.Length(); i++)
- {
- Document doc = hits.Doc(i);
- System.String[] values = doc.GetValues(Syns2Index.F_SYN);
- for (int j = 0; j < values.Length; j++)
- {
- System.String syn = values[j];
- if (already.Contains(syn) == false)
- // avoid dups of top level words and synonyms
- {
- already.Add(syn, syn);
- tq = new TermQuery(new Term(field, syn));
- if (boost > 0)
- // else keep normal 1.0
- tq.SetBoost(boost);
- tmp.Add(tq, BooleanClause.Occur.SHOULD);
- }
- }
- }
+
+ var c = new CollectorImpl(field, boost);
+ syns.Search(new TermQuery(new Term(Syns2Index.F_WORD, word)), c);
}
-
return tmp;
}
- }
+
+
+ ///
+ /// From project WordNet.Net.Syns2Index
+ ///
+ public class Syns2Index
+ {
+ ///
+ public const String F_SYN = "syn";
+
+ ///
+ public const String F_WORD = "word";
+ }
+
+ ///
+ /// CollectorImpl
+ ///
+ internal sealed class CollectorImpl : Collector
+ {
+ private IndexReader reader;
+ private readonly string field;
+ private readonly float boost;
+
+ public CollectorImpl(string field, float boost)
+ {
+ this.field = field;
+ this.boost = boost;
+ }
+
+ public override void SetScorer(Scorer scorer)
+ {
+ // Ignore
+ }
+
+ public override void Collect(int doc)
+ {
+ var d = reader.Document(doc);
+ var values = d.GetValues(Syns2Index.F_SYN);
+ foreach (var syn in values.Where(syn => !already.Contains(syn)))
+ {
+ already.Add(syn);
+
+ var tq = new TermQuery(new Term(field, syn));
+ if (boost > 0) // else keep normal 1.0
+ tq.SetBoost(boost);
+
+ tmp.Add(tq, BooleanClause.Occur.SHOULD);
+ }
+ }
+
+ public override void SetNextReader(IndexReader reader, int docBase)
+ {
+ this.reader = reader;
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return true;
+ }
+ }
- ///
- /// From project WordNet.Net.Syns2Index
- ///
- public class Syns2Index
- {
- ///
- public const System.String F_SYN = "syn";
-
- ///
- public const System.String F_WORD = "word";
- }
+ }
}
\ No newline at end of file
Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/WordNet/SynLookup/SynLookup.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/WordNet/SynLookup/SynLookup.cs?rev=1206365&r1=1206364&r2=1206365&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/WordNet/SynLookup/SynLookup.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/WordNet/SynLookup/SynLookup.cs Fri Nov 25 22:39:33 2011
@@ -16,140 +16,193 @@
*/
using System;
-
-using Lucene.Net.Store;
-using Lucene.Net.Search;
-using Lucene.Net.Index;
-using Lucene.Net.Documents;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Store;
namespace WorldNet.Net
{
-
-
/// Test program to look up synonyms.
public class SynLookup
{
-
+ static List already;
+ private static BooleanQuery tmp;
+
[STAThread]
public static void Main(System.String[] args)
{
if (args.Length != 2)
{
System.Console.Out.WriteLine(typeof(SynLookup) + " ");
- return;
+ return;
}
- FSDirectory directory = FSDirectory.GetDirectory(args[0], false);
- IndexSearcher searcher = new IndexSearcher(directory);
-
- System.String word = args[1];
- Hits hits = searcher.Search(new TermQuery(new Term(Syns2Index.F_WORD, word)));
-
- if (hits.Length() == 0)
+ using (var directory = FSDirectory.Open(new DirectoryInfo(args[0])))
{
- System.Console.Out.WriteLine("No synonyms found for " + word);
- }
- else
- {
- System.Console.Out.WriteLine("Synonyms found for \"" + word + "\":");
- }
-
- for (int i = 0; i < hits.Length(); i++)
- {
- Document doc = hits.Doc(i);
-
- System.String[] values = doc.GetValues(Syns2Index.F_SYN);
-
- for (int j = 0; j < values.Length; j++)
+ using (var searcher = new IndexSearcher(directory, true))
{
- System.Console.Out.WriteLine(values[j]);
+
+ String word = args[1];
+ Query query = new TermQuery(new Term(Syns2Index.F_WORD, word));
+ var countingCollector = new CountingCollector();
+ searcher.Search(query, countingCollector);
+
+ if (countingCollector.numHits == 0)
+ {
+ Console.Out.WriteLine("No synonyms found for " + word);
+ }
+ else
+ {
+ Console.Out.WriteLine("Synonyms found for \"" + word + "\":");
+ }
+
+ var hits = searcher.Search(query, countingCollector.numHits).ScoreDocs;
+
+ foreach (var v in
+ hits.Select(t => searcher.Doc(t.doc)).Select(doc => doc.GetValues(Syns2Index.F_SYN)).SelectMany(values => values))
+ {
+ Console.Out.WriteLine(v);
+ }
+
}
}
-
- searcher.Close();
- directory.Close();
}
-
- /// Perform synonym expansion on a query.
- ///
+ ///
+ /// Perform synonym expansion on a query.
///
- /// query
- ///
- /// syns
- ///
- /// a
- ///
- /// field
- ///
- /// boost
- ///
- public static Query Expand(System.String query, Searcher syns, Analyzer a, System.String field, float boost)
+ /// query
+ /// syns
+ /// a
+ /// field
+ /// boost
+ public static Query Expand(String query,
+ Searcher syns,
+ Analyzer a,
+ String field,
+ float boost)
{
- System.Collections.Hashtable already = new System.Collections.Hashtable(); // avoid dups
- System.Collections.IList top = new System.Collections.ArrayList(); // needs to be separately listed..
-
- // [1] Parse query into separate words so that when we expand we can avoid dups
- TokenStream ts = a.TokenStream(field, new System.IO.StringReader(query));
- Lucene.Net.Analysis.Token t;
- while ((t = ts.Next()) != null)
+ already = new List(); // avoid dups
+ var top = new List(); // needs to be separately listed..
+
+ var ts = a.TokenStream(field, new StringReader(query));
+ var termAtt = ts.AddAttribute();
+
+ while (ts.IncrementToken())
{
- System.String word = t.TermText();
- if (already.Contains(word) == false)
+ var word = termAtt.Term();
+
+ if (!already.Contains(word))
{
- already.Add(word, word);
+ already.Add(word);
top.Add(word);
}
}
- BooleanQuery tmp = new BooleanQuery();
-
+
+ tmp = new BooleanQuery();
+
// [2] form query
System.Collections.IEnumerator it = top.GetEnumerator();
while (it.MoveNext())
{
// [2a] add to level words in
- System.String word = (System.String) it.Current;
- TermQuery tq = new TermQuery(new Term(field, word));
+ var word = (String)it.Current;
+ var tq = new TermQuery(new Term(field, word));
tmp.Add(tq, BooleanClause.Occur.SHOULD);
-
- // [2b] add in unique synonums
- Hits hits = syns.Search(new TermQuery(new Term(Syns2Index.F_WORD, word)));
- for (int i = 0; i < hits.Length(); i++)
- {
- Document doc = hits.Doc(i);
- System.String[] values = doc.GetValues(Syns2Index.F_SYN);
- for (int j = 0; j < values.Length; j++)
- {
- System.String syn = values[j];
- if (already.Contains(syn) == false)
- {
- already.Add(syn, syn);
- tq = new TermQuery(new Term(field, syn));
- if (boost > 0)
- // else keep normal 1.0
- tq.SetBoost(boost);
- tmp.Add(tq, BooleanClause.Occur.SHOULD);
- }
- }
- }
+
+ var c = new CollectorImpl(field, boost);
+ syns.Search(new TermQuery(new Term(Syns2Index.F_WORD, word)), c);
}
-
-
+
return tmp;
}
- }
+ internal sealed class CountingCollector : Collector
+ {
+ public int numHits;
+
+ public override void SetScorer(Scorer scorer)
+ { }
+
+ public override void Collect(int doc)
+ {
+ numHits++;
+ }
+
+ public override void SetNextReader(IndexReader reader, int docBase)
+ { }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return true;
+ }
+ }
+
+ ///
+ /// CollectorImpl
+ ///
+ internal sealed class CollectorImpl : Collector
+ {
+ private IndexReader reader;
+ private readonly string field;
+ private readonly float boost;
+
+ public CollectorImpl(string field, float boost)
+ {
+ this.field = field;
+ this.boost = boost;
+ }
+
+ public override void SetScorer(Scorer scorer)
+ {
+ // Ignore
+ }
+
+ public override void Collect(int doc)
+ {
+ var d = reader.Document(doc);
+ var values = d.GetValues(Syns2Index.F_SYN);
+ foreach (var syn in values.Where(syn => !already.Contains(syn)))
+ {
+ already.Add(syn);
+
+ var tq = new TermQuery(new Term(field, syn));
+ if (boost > 0) // else keep normal 1.0
+ tq.SetBoost(boost);
+
+ tmp.Add(tq, BooleanClause.Occur.SHOULD);
+ }
+ }
+
+ public override void SetNextReader(IndexReader reader, int docBase)
+ {
+ this.reader = reader;
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return true;
+ }
+
+ }
+
+ ///
+ /// From project WordNet.Net.Syns2Index
+ ///
+ public class Syns2Index
+ {
+ ///
+ public const String F_SYN = "syn";
+
+ ///
+ public const String F_WORD = "word";
+ }
- ///
- /// From project WordNet.Net.Syns2Index
- ///
- public class Syns2Index
- {
- ///
- public const System.String F_SYN = "syn";
-
- ///
- public const System.String F_WORD = "word";
}
+
}
\ No newline at end of file
Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/WordNet/Syns2Index/Syns2Index.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/WordNet/Syns2Index/Syns2Index.cs?rev=1206365&r1=1206364&r2=1206365&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/WordNet/Syns2Index/Syns2Index.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/WordNet/Syns2Index/Syns2Index.cs Fri Nov 25 22:39:33 2011
@@ -16,8 +16,12 @@
*/
using System;
-
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using Lucene.Net.Store;
using Analyzer = Lucene.Net.Analysis.Analyzer;
+using Directory = System.IO.Directory;
using StandardAnalyzer = Lucene.Net.Analysis.Standard.StandardAnalyzer;
using Document = Lucene.Net.Documents.Document;
using Field = Lucene.Net.Documents.Field;
@@ -44,18 +48,12 @@ namespace WorldNet.Net
/// While the WordNet file distinguishes groups of synonyms with
/// related meanings we don't do that here.
///
- ///
/// This can take 4 minutes to execute and build an index on a "fast" system and the index takes up almost 3 MB.
- ///
///
- /// Dave Spencer, dave@searchmorph.com
- ///
- /// WordNet home page">
- ///
- /// prologdb man page">
- ///
- /// sample site that uses it">
- ///
+ ///
+ ///
+ ///
+ ///
public class Syns2Index
{
///
@@ -71,15 +69,17 @@ namespace WorldNet.Net
public const System.String F_WORD = "word";
///
- private static readonly Analyzer ana = new StandardAnalyzer();
+ private static readonly Analyzer ana = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT);
- /// Takes arg of prolog file name and index directory.
+ ///
+ /// Takes arg of prolog file name and index directory.
+ ///
[STAThread]
public static void Main(System.String[] args)
{
// get command line arguments
- System.String prologFilename = null; // name of file "wn_s.pl"
- System.String indexDir = null;
+ String prologFilename = null; // name of file "wn_s.pl"
+ String indexDir = null;
if (args.Length == 2)
{
prologFilename = args[0];
@@ -88,45 +88,44 @@ namespace WorldNet.Net
else
{
Usage();
- System.Environment.Exit(1);
+ Environment.Exit(1);
}
// ensure that the prolog file is readable
- if (!(new System.IO.FileInfo(prologFilename)).Exists)
+ if (!(new FileInfo(prologFilename)).Exists)
{
err.WriteLine("Error: cannot read Prolog file: " + prologFilename);
- System.Environment.Exit(1);
+ Environment.Exit(1);
}
// exit if the target index directory already exists
- if (System.IO.Directory.Exists((new System.IO.FileInfo(indexDir)).FullName))
+ if (Directory.Exists((new FileInfo(indexDir)).FullName))
{
err.WriteLine("Error: index directory already exists: " + indexDir);
err.WriteLine("Please specify a name of a non-existent directory");
- System.Environment.Exit(1);
+ Environment.Exit(1);
}
o.WriteLine("Opening Prolog file " + prologFilename);
- System.IO.FileStream fis = new System.IO.FileStream(prologFilename, System.IO.FileMode.Open, System.IO.FileAccess.Read);
- System.IO.StreamReader br = new System.IO.StreamReader(new System.IO.StreamReader(fis, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(fis, System.Text.Encoding.Default).CurrentEncoding);
- System.String line;
+ var fis = new FileStream(prologFilename, FileMode.Open, FileAccess.Read);
+ var br = new StreamReader(new StreamReader(fis, System.Text.Encoding.Default).BaseStream, new StreamReader(fis, System.Text.Encoding.Default).CurrentEncoding);
+ String line;
// maps a word to all the "groups" it's in
System.Collections.IDictionary word2Nums = new System.Collections.SortedList();
// maps a group to all the words in it
System.Collections.IDictionary num2Words = new System.Collections.SortedList();
// number of rejected words
- int ndecent = 0;
+ var ndecent = 0;
// status output
- int mod = 1;
- int row = 1;
+ var mod = 1;
+ var row = 1;
// parse prolog file
o.WriteLine("[1/2] Parsing " + prologFilename);
while ((line = br.ReadLine()) != null)
{
// occasional progress
- if ((++row) % mod == 0)
- // periodically print out line we read in
+ if ((++row) % mod == 0) // periodically print out line we read in
{
mod *= 2;
o.WriteLine("\t" + row + " " + line + " " + word2Nums.Count + " " + num2Words.Count + " ndecent=" + ndecent);
@@ -136,17 +135,17 @@ namespace WorldNet.Net
if (!line.StartsWith("s("))
{
err.WriteLine("OUCH: " + line);
- System.Environment.Exit(1);
+ Environment.Exit(1);
}
// parse line
line = line.Substring(2);
- int comma = line.IndexOf((System.Char) ',');
- System.String num = line.Substring(0, (comma) - (0));
- int q1 = line.IndexOf((System.Char) '\'');
+ var comma = line.IndexOf(',');
+ var num = line.Substring(0, comma);
+ var q1 = line.IndexOf('\'');
line = line.Substring(q1 + 1);
- int q2 = line.IndexOf((System.Char) '\'');
- System.String word = line.Substring(0, (q2) - (0)).ToLower();
+ var q2 = line.IndexOf('\'');
+ var word = line.Substring(0, q2).ToLower().Replace("''", "'");
// make sure is a normal word
if (!IsDecent(word))
@@ -157,11 +156,10 @@ namespace WorldNet.Net
// 1/2: word2Nums map
// append to entry or add new one
- System.Collections.IList lis = (System.Collections.IList) word2Nums[word];
+ var lis = (System.Collections.IList) word2Nums[word];
if (lis == null)
{
- lis = new System.Collections.ArrayList();
- lis.Add(num);
+ lis = new List {num};
word2Nums[word] = lis;
}
else
@@ -171,8 +169,7 @@ namespace WorldNet.Net
lis = (System.Collections.IList) num2Words[num];
if (lis == null)
{
- lis = new System.Collections.ArrayList();
- lis.Add(word);
+ lis = new List { word };
num2Words[num] = lis;
}
else
@@ -188,20 +185,18 @@ namespace WorldNet.Net
Index(indexDir, word2Nums, num2Words);
}
- /// Checks to see if a word contains only alphabetic characters by
+ ///
+ /// Checks to see if a word contains only alphabetic characters by
/// checking it one character at a time.
- ///
///
- /// string to check
- ///
- /// true if the string is decent
- ///
- private static bool IsDecent(System.String s)
+ /// string to check
+ /// true if the string is decent
+ private static bool IsDecent(String s)
{
- int len = s.Length;
- for (int i = 0; i < len; i++)
+ var len = s.Length;
+ for (var i = 0; i < len; i++)
{
- if (!System.Char.IsLetter(s[i]))
+ if (!Char.IsLetter(s[i]))
{
return false;
}
@@ -209,75 +204,73 @@ namespace WorldNet.Net
return true;
}
- /// Forms a Lucene index based on the 2 maps.
- ///
+ ///
+ /// Forms a Lucene index based on the 2 maps.
///
- /// the direcotry where the index should be created
- ///
- /// word2Nums
- ///
- /// num2Words
- ///
- private static void Index(System.String indexDir, System.Collections.IDictionary word2Nums, System.Collections.IDictionary num2Words)
+ /// the direcotry where the index should be created
+ /// word2Nums
+ /// num2Words
+ private static void Index(String indexDir, System.Collections.IDictionary word2Nums, System.Collections.IDictionary num2Words)
{
- int row = 0;
- int mod = 1;
+ var row = 0;
+ var mod = 1;
- // override the specific index if it already exists
- IndexWriter writer = new IndexWriter(indexDir, ana, true);
- writer.SetUseCompoundFile(true); // why?
- // blindly up these parameters for speed
- writer.SetMergeFactor(writer.GetMergeFactor() * 2);
- writer.SetMaxBufferedDocs(writer.GetMaxBufferedDocs() * 2);
- System.Collections.IEnumerator i1 = word2Nums.Keys.GetEnumerator();
- while (i1.MoveNext())
- // for each word
+ using (var dir = FSDirectory.Open(new DirectoryInfo(indexDir)))
{
- System.String g = (System.String) i1.Current;
- Document doc = new Document();
-
- int n = Index(word2Nums, num2Words, g, doc);
- if (n > 0)
+ var writer = new IndexWriter(dir, ana, true, IndexWriter.MaxFieldLength.LIMITED);
+ writer.SetUseCompoundFile(true); // why?
+
+ var i1 = word2Nums.Keys.GetEnumerator();
+ while (i1.MoveNext())
{
- doc.Add(new Field(F_WORD, g, Field.Store.YES, Field.Index.UN_TOKENIZED));
- if ((++row % mod) == 0)
+ var g = (String)i1.Current;
+ var doc = new Document();
+
+ var n = Index(word2Nums, num2Words, g, doc);
+ if (n > 0)
{
- o.WriteLine("\trow=" + row + "/" + word2Nums.Count + " doc= " + doc);
- mod *= 2;
+ doc.Add(new Field(F_WORD, g, Field.Store.YES, Field.Index.NOT_ANALYZED));
+ if ((++row % mod) == 0)
+ {
+ o.WriteLine("\trow=" + row + "/" + word2Nums.Count + " doc= " + doc);
+ mod *= 2;
+ }
+ writer.AddDocument(doc);
}
- writer.AddDocument(doc);
- } // else degenerate
+ }
+ o.WriteLine("Optimizing..");
+ writer.Optimize();
+ writer.Close();
}
- o.WriteLine("Optimizing..");
- writer.Optimize();
- writer.Close();
+
}
- /// Given the 2 maps fills a document for 1 word.
+ ///
+ /// Given the 2 maps fills a document for 1 word.
+ ///
private static int Index(System.Collections.IDictionary word2Nums, System.Collections.IDictionary num2Words, System.String g, Document doc)
{
- System.Collections.IList keys = (System.Collections.IList) word2Nums[g]; // get list of key#'s
- System.Collections.IEnumerator i2 = keys.GetEnumerator();
+ var keys = (System.Collections.IList) word2Nums[g]; // get list of key#'s
+ var i2 = keys.GetEnumerator();
- System.Collections.SortedList already = new System.Collections.SortedList(); // keep them sorted
+ var already = new System.Collections.SortedList(); // keep them sorted
// pass 1: fill up 'already' with all words
while (i2.MoveNext()) // for each key#
{
- foreach (object item in (System.Collections.IList) num2Words[i2.Current]) // get list of words
+ foreach (var item in
+ ((System.Collections.IList) num2Words[i2.Current]).Cast