Return-Path:
Delivered-To: apmail-incubator-lucene-net-commits-archive@locus.apache.org
Received: (qmail 28027 invoked from network); 4 Jun 2006 02:43:37 -0000
Received: from hermes.apache.org (HELO mail.apache.org) (209.237.227.199)
by minotaur.apache.org with SMTP; 4 Jun 2006 02:43:37 -0000
Received: (qmail 88898 invoked by uid 500); 4 Jun 2006 02:43:37 -0000
Delivered-To: apmail-incubator-lucene-net-commits-archive@incubator.apache.org
Received: (qmail 88848 invoked by uid 500); 4 Jun 2006 02:43:36 -0000
Mailing-List: contact lucene-net-commits-help@incubator.apache.org;
run by ezmlm
Precedence: bulk
List-Help:
List-Unsubscribe:
List-Post:
List-Id:
Reply-To: lucene-net-dev@incubator.apache.org
Delivered-To: mailing list lucene-net-commits@incubator.apache.org
Received: (qmail 88812 invoked by uid 99); 4 Jun 2006 02:43:36 -0000
Received: from asf.osuosl.org (HELO asf.osuosl.org) (140.211.166.49)
by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 03 Jun 2006 19:43:36 -0700
X-ASF-Spam-Status: No, hits=-9.4 required=10.0
tests=ALL_TRUSTED,NO_REAL_NAME
X-Spam-Check-By: apache.org
Received-SPF: pass (asf.osuosl.org: local policy)
Received: from [140.211.166.113] (HELO eris.apache.org) (140.211.166.113)
by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 03 Jun 2006 19:43:32 -0700
Received: by eris.apache.org (Postfix, from userid 65534)
id 0DE8C1A9859; Sat, 3 Jun 2006 19:42:26 -0700 (PDT)
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Subject: svn commit: r411501 [19/30] - in /incubator/lucene.net/trunk/C#/src:
./
Demo/DeleteFiles/ Demo/DemoLib/ Demo/DemoLib/HTML/ Demo/IndexFiles/
Demo/IndexHtml/ Demo/SearchFiles/ Lucene.Net/ Lucene.Net/Analysis/
Lucene.Net/Analysis/Standard/ Lucene.Net/Docu...
Date: Sun, 04 Jun 2006 02:41:25 -0000
To: lucene-net-commits@incubator.apache.org
From: aroush@apache.org
X-Mailer: svnmailer-1.0.8
Message-Id: <20060604024226.0DE8C1A9859@eris.apache.org>
X-Virus-Checked: Checked by ClamAV on apache.org
X-Spam-Rating: minotaur.apache.org 1.6.2 0/1000/N
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/FuzzyTermEnum.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Search/FuzzyTermEnum.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/FuzzyTermEnum.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/FuzzyTermEnum.cs Sat Jun 3 19:41:13 2006
@@ -13,123 +13,147 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
using System;
using IndexReader = Lucene.Net.Index.IndexReader;
using Term = Lucene.Net.Index.Term;
+
namespace Lucene.Net.Search
{
- /// Subclass of FilteredTermEnum for enumerating all terms that are similiar to the specified filter term.
+ /// Subclass of FilteredTermEnum for enumerating all terms that are similiar
+ /// to the specified filter term.
+ ///
/// Term enumerations are always ordered by Term.compareTo(). Each term in
- /// the enumeration is greater than all that precede it.
+ /// the enumeration is greater than all that precede it.
///
- public sealed class FuzzyTermEnum:FilteredTermEnum
+ public sealed class FuzzyTermEnum : FilteredTermEnum
{
- private void InitBlock()
+
+ /* This should be somewhere around the average long word.
+ * If it is longer, we waste time and space. If it is shorter, we waste a
+ * little bit of time growing the array as we encounter longer words.
+ */
+ private const int TYPICAL_LONGEST_WORD_IN_INDEX = 19;
+
+ /* Allows us save time required to create a new array
+ * everytime similarity is called.
+ */
+ private int[][] d;
+
+ private float similarity;
+ private bool endEnum = false;
+
+ private Term searchTerm = null;
+ private System.String field;
+ private System.String text;
+ private System.String prefix;
+
+ private float minimumSimilarity;
+ private float scale_factor;
+ private int[] maxDistances = new int[TYPICAL_LONGEST_WORD_IN_INDEX];
+
+ /// Creates a FuzzyTermEnum with an empty prefix and a minSimilarity of 0.5f.
+ ///
+ /// After calling the constructor the enumeration is already pointing to the first
+ /// valid term if such a term exists.
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ /// IOException
+ ///
+ ///
+ public FuzzyTermEnum(IndexReader reader, Term term) : this(reader, term, FuzzyQuery.defaultMinSimilarity, FuzzyQuery.defaultPrefixLength)
+ {
+ }
+
+ /// Creates a FuzzyTermEnum with an empty prefix.
+ ///
+ /// After calling the constructor the enumeration is already pointing to the first
+ /// valid term if such a term exists.
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ /// IOException
+ ///
+ ///
+ public FuzzyTermEnum(IndexReader reader, Term term, float minSimilarity) : this(reader, term, minSimilarity, FuzzyQuery.defaultPrefixLength)
+ {
+ }
+
+ /// Constructor for enumeration of all terms from specified reader
which share a prefix of
+ /// length prefixLength
with term
and which have a fuzzy similarity >
+ /// minSimilarity
.
+ ///
+ /// After calling the constructor the enumeration is already pointing to the first
+ /// valid term if such a term exists.
+ ///
+ ///
+ /// Delivers terms.
+ ///
+ /// Pattern term.
+ ///
+ /// Minimum required similarity for terms from the reader. Default value is 0.5f.
+ ///
+ /// Length of required common prefix. Default value is 0.
+ ///
+ /// IOException
+ public FuzzyTermEnum(IndexReader reader, Term term, float minSimilarity, int prefixLength) : base()
+ {
+
+ if (minSimilarity >= 1.0f)
+ throw new System.ArgumentException("minimumSimilarity cannot be greater than or equal to 1");
+ else if (minSimilarity < 0.0f)
+ throw new System.ArgumentException("minimumSimilarity cannot be less than 0");
+ if (prefixLength < 0)
+ throw new System.ArgumentException("prefixLength cannot be less than 0");
+
+ this.minimumSimilarity = minSimilarity;
+ this.scale_factor = 1.0f / (1.0f - minimumSimilarity);
+ this.searchTerm = term;
+ this.field = searchTerm.Field();
+
+ //The prefix could be longer than the word.
+ //It's kind of silly though. It means we must match the entire word.
+ int fullSearchTermLength = searchTerm.Text().Length;
+ int realPrefixLength = prefixLength > fullSearchTermLength?fullSearchTermLength:prefixLength;
+
+ this.text = searchTerm.Text().Substring(realPrefixLength);
+ this.prefix = searchTerm.Text().Substring(0, (realPrefixLength) - (0));
+
+ InitializeMaxDistances();
+ this.d = InitDistanceArray();
+
+ SetEnum(reader.Terms(new Term(searchTerm.Field(), prefix)));
+ }
+
+ /// The termCompare method in FuzzyTermEnum uses Levenshtein distance to
+ /// calculate the distance between the given term and the comparing term.
+ ///
+ protected internal override bool TermCompare(Term term)
{
- for (int i = 0; i < 1; i++)
+ if (field == term.Field() && term.Text().StartsWith(prefix))
{
- e[i] = new int[1];
+ System.String target = term.Text().Substring(prefix.Length);
+ this.similarity = Similarity(target);
+ return (similarity > minimumSimilarity);
}
+ endEnum = true;
+ return false;
}
- internal double distance;
- internal bool endEnum = false;
- internal Term searchTerm = null;
- internal System.String field = "";
- internal System.String text = "";
- internal int textlen;
- internal System.String prefix = "";
- internal int prefixLength = 0;
- internal float minimumSimilarity;
- internal double scale_factor;
-
-
- /// Empty prefix and minSimilarity of 0.5f are used.
- ///
- ///
- /// reader
- ///
- /// term
- ///
- /// IOException
- ///
- ///
- public FuzzyTermEnum(IndexReader reader, Term term):this(reader, term, FuzzyQuery.defaultMinSimilarity, 0)
- {
- }
-
- /// This is the standard FuzzyTermEnum with an empty prefix.
- ///
- ///
- /// reader
- ///
- /// term
- ///
- /// minSimilarity
- ///
- /// IOException
- ///
- ///
- public FuzzyTermEnum(IndexReader reader, Term term, float minSimilarity):this(reader, term, minSimilarity, 0)
- {
- }
-
- /// Constructor for enumeration of all terms from specified reader
which share a prefix of
- /// length prefixLength
with term
and which have a fuzzy similarity >
- /// minSimilarity
.
- ///
- ///
- /// Delivers terms.
- ///
- /// Pattern term.
- ///
- /// Minimum required similarity for terms from the reader. Default value is 0.5f.
- ///
- /// Length of required common prefix. Default value is 0.
- ///
- /// IOException
- public FuzzyTermEnum(IndexReader reader, Term term, float minSimilarity, int prefixLength):base()
- {
- InitBlock();
- minimumSimilarity = minSimilarity;
- scale_factor = 1.0f / (1.0f - minimumSimilarity);
- searchTerm = term;
- field = searchTerm.Field();
- text = searchTerm.Text();
- textlen = text.Length;
- if (prefixLength > 0 && prefixLength < textlen)
- {
- this.prefixLength = prefixLength;
- prefix = text.Substring(0, (prefixLength) - (0));
- text = text.Substring(prefixLength);
- textlen = text.Length;
- }
- SetEnum(reader.Terms(new Term(searchTerm.Field(), prefix)));
- }
-
- /// The termCompare method in FuzzyTermEnum uses Levenshtein distance to
- /// calculate the distance between the given term and the comparing term.
- ///
- protected internal override bool TermCompare(Term term)
- {
- System.String termText = term.Text();
- if (field == term.Field() && termText.StartsWith(prefix))
- {
- System.String target = termText.Substring(prefixLength);
- int targetlen = target.Length;
- int dist = EditDistance(text, target, textlen, targetlen);
- distance = 1 - ((double) dist / (double) System.Math.Min(textlen, targetlen));
- return (distance > minimumSimilarity);
- }
- endEnum = true;
- return false;
- }
-
- public override float Difference()
- {
- return (float) ((distance - minimumSimilarity) * scale_factor);
- }
+ public override float Difference()
+ {
+ return (float) ((similarity - minimumSimilarity) * scale_factor);
+ }
public override bool EndEnum()
{
@@ -141,76 +165,184 @@
/// ****************************
///
- /// Finds and returns the smallest of three integers
- private static int Min(int a, int b, int c)
+ /// Finds and returns the smallest of three integers
+ private static int min(int a, int b, int c)
{
int t = (a < b) ? a : b;
return (t < c) ? t : c;
}
- /// This static array saves us from the time required to create a new array
- /// everytime editDistance is called.
- ///
- private int[][] e = new int[1][];
+ private int[][] InitDistanceArray()
+ {
+ int[][] tmpArray = new int[this.text.Length + 1][];
+ for (int i = 0; i < this.text.Length + 1; i++)
+ {
+ tmpArray[i] = new int[TYPICAL_LONGEST_WORD_IN_INDEX];
+ }
+ return tmpArray;
+ }
- /// Levenshtein distance also known as edit distance is a measure of similiarity
- /// between two strings where the distance is measured as the number of character
- /// deletions, insertions or substitutions required to transform one string to
- /// the other string.
- /// This method takes in four parameters; two strings and their respective
- /// lengths to compute the Levenshtein distance between the two strings.
- /// The result is returned as an integer.
+ /// Similarity returns a number that is 1.0f or less (including negative numbers)
+ /// based on how similar the Term is compared to a target term. It returns
+ /// exactly 0.0f when
+ ///
+ /// editDistance < maximumEditDistance
+ /// Otherwise it returns:
+ ///
+ /// 1 - (editDistance / length)
+ /// where length is the length of the shortest term (text or target) including a
+ /// prefix that are identical and editDistance is the Levenshtein distance for
+ /// the two words.
+ ///
+ /// Embedded within this algorithm is a fail-fast Levenshtein distance
+ /// algorithm. The fail-fast algorithm differs from the standard Levenshtein
+ /// distance algorithm in that it is aborted if it is discovered that the
+ /// mimimum distance between the words is greater than some threshold.
+ ///
+ ///
To calculate the maximum distance threshold we use the following formula:
+ ///
+ /// (1 - minimumSimilarity) * length
+ /// where length is the shortest term including any prefix that is not part of the
+ /// similarity comparision. This formula was derived by solving for what maximum value
+ /// of distance returns false for the following statements:
+ ///
+ /// similarity = 1 - ((float)distance / (float) (prefixLength + Math.min(textlen, targetlen)));
+ /// return (similarity > minimumSimilarity);
+ /// where distance is the Levenshtein distance for the two words.
+ ///
+ /// Levenshtein distance (also known as edit distance) is a measure of similiarity
+ /// between two strings where the distance is measured as the number of character
+ /// deletions, insertions or substitutions required to transform one string to
+ /// the other string.
///
- private int EditDistance(System.String s, System.String t, int n, int m)
+ /// the target word or phrase
+ ///
+ /// the similarity, 0.0 or less indicates that it matches less than the required
+ /// threshold and 1.0 indicates that the text and target are identical
+ ///
+ private float Similarity(System.String target)
{
- if (e.Length <= n || e[0].Length <= m)
+ lock (this)
{
- int[][] tmpArray = new int[System.Math.Max(e.Length, n + 1)][];
- for (int i = 0; i < System.Math.Max(e.Length, n + 1); i++)
+ int m = target.Length;
+ int n = text.Length;
+ if (n == 0)
{
- tmpArray[i] = new int[System.Math.Max(e[0].Length, m + 1)];
+ //we don't have anything to compare. That means if we just add
+ //the letters for m we get the new word
+ return prefix.Length == 0 ? 0.0f : 1.0f - ((float) m / prefix.Length);
}
- e = tmpArray;
- }
- int[][] d = e; // matrix
- int i2; // iterates through s
- int j; // iterates through t
- char s_i; // ith character of s
-
- if (n == 0)
- return m;
- if (m == 0)
- return n;
-
- // init matrix d
- for (i2 = 0; i2 <= n; i2++)
- d[i2][0] = i2;
- for (j = 0; j <= m; j++)
- d[0][j] = j;
-
- // start computing edit distance
- for (i2 = 1; i2 <= n; i2++)
- {
- s_i = s[i2 - 1];
- for (j = 1; j <= m; j++)
+ if (m == 0)
+ {
+ return prefix.Length == 0 ? 0.0f : 1.0f - ((float) n / prefix.Length);
+ }
+
+ int maxDistance = GetMaxDistance(m);
+
+ if (maxDistance < System.Math.Abs(m - n))
+ {
+ //just adding the characters of m to n or vice-versa results in
+ //too many edits
+ //for example "pre" length is 3 and "prefixes" length is 8. We can see that
+ //given this optimal circumstance, the edit distance cannot be less than 5.
+ //which is 8-3 or more precisesly Math.abs(3-8).
+ //if our maximum edit distance is 4, then we can discard this word
+ //without looking at it.
+ return 0.0f;
+ }
+
+ //let's make sure we have enough room in our array to do the distance calculations.
+ if (d[0].Length <= m)
+ {
+ GrowDistanceArray(m);
+ }
+
+ // init matrix d
+ for (int i = 0; i <= n; i++)
+ d[i][0] = i;
+ for (int j = 0; j <= m; j++)
+ d[0][j] = j;
+
+ // start computing edit distance
+ for (int i = 1; i <= n; i++)
{
- if (s_i != t[j - 1])
- d[i2][j] = Min(d[i2 - 1][j], d[i2][j - 1], d[i2 - 1][j - 1]) + 1;
- else
- d[i2][j] = Min(d[i2 - 1][j] + 1, d[i2][j - 1] + 1, d[i2 - 1][j - 1]);
+ int bestPossibleEditDistance = m;
+ char s_i = text[i - 1];
+ for (int j = 1; j <= m; j++)
+ {
+ if (s_i != target[j - 1])
+ {
+ d[i][j] = min(d[i - 1][j], d[i][j - 1], d[i - 1][j - 1]) + 1;
+ }
+ else
+ {
+ d[i][j] = min(d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1]);
+ }
+ bestPossibleEditDistance = System.Math.Min(bestPossibleEditDistance, d[i][j]);
+ }
+
+ //After calculating row i, the best possible edit distance
+ //can be found by found by finding the smallest value in a given column.
+ //If the bestPossibleEditDistance is greater than the max distance, abort.
+
+ if (i > maxDistance && bestPossibleEditDistance > maxDistance)
+ {
+ //equal is okay, but not greater
+ //the closest the target can be to the text is just too far away.
+ //this target is leaving the party early.
+ return 0.0f;
+ }
}
+
+ // this will return less than 0.0 when the edit distance is
+ // greater than the number of characters in the shorter word.
+ // but this was the formula that was previously used in FuzzyTermEnum,
+ // so it has not been changed (even though minimumSimilarity must be
+ // greater than 0.0)
+ return 1.0f - ((float) d[n][m] / (float) (prefix.Length + System.Math.Min(n, m)));
}
-
- // we got the result!
- return d[n][m];
+ }
+
+ /// Grow the second dimension of the array, so that we can calculate the
+ /// Levenshtein difference.
+ ///
+ private void GrowDistanceArray(int m)
+ {
+ for (int i = 0; i < d.Length; i++)
+ {
+ d[i] = new int[m + 1];
+ }
+ }
+
+ /// The max Distance is the maximum Levenshtein distance for the text
+ /// compared to some other value that results in score that is
+ /// better than the minimum similarity.
+ ///
+ /// the length of the "other value"
+ ///
+ /// the maximum levenshtein distance that we care about
+ ///
+ private int GetMaxDistance(int m)
+ {
+ return (m < maxDistances.Length)?maxDistances[m]:CalculateMaxDistance(m);
+ }
+
+ private void InitializeMaxDistances()
+ {
+ for (int i = 0; i < maxDistances.Length; i++)
+ {
+ maxDistances[i] = CalculateMaxDistance(i);
+ }
+ }
+
+ private int CalculateMaxDistance(int m)
+ {
+ return (int) ((1 - minimumSimilarity) * (System.Math.Min(text.Length, m) + prefix.Length));
}
public override void Close()
{
- base.Close();
- searchTerm = null;
- field = null;
- text = null;
+ base.Close(); //call super.close() and let the garbage collector do its work.
}
}
}
Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/Hit.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Search/Hit.cs?rev=411501&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/Hit.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/Hit.cs Sat Jun 3 19:41:13 2006
@@ -0,0 +1,135 @@
+/*
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Document = Lucene.Net.Documents.Document;
+
+namespace Lucene.Net.Search
+{
+
+ /// Wrapper used by {@link HitIterator} to provide a lazily loaded hit
+ /// from {@link Hits}.
+ ///
+ ///
+ /// Jeremy Rayner
+ ///
+ [Serializable]
+ public class Hit
+ {
+
+ private Document doc = null;
+
+ private bool resolved = false;
+
+ private Hits hits = null;
+ private int hitNumber;
+
+ /// Constructed from {@link HitIterator}
+ /// Hits returned from a search
+ ///
+ /// Hit index in Hits
+ ///
+ internal Hit(Hits hits, int hitNumber)
+ {
+ this.hits = hits;
+ this.hitNumber = hitNumber;
+ }
+
+ /// Returns document for this hit.
+ ///
+ ///
+ ///
+ ///
+ public virtual Document GetDocument()
+ {
+ if (!resolved)
+ FetchTheHit();
+ return doc;
+ }
+
+ /// Returns score for this hit.
+ ///
+ ///
+ ///
+ ///
+ public virtual float GetScore()
+ {
+ return hits.Score(hitNumber);
+ }
+
+ /// Returns id for this hit.
+ ///
+ ///
+ ///
+ ///
+ public virtual int GetId()
+ {
+ return hits.Id(hitNumber);
+ }
+
+ private void FetchTheHit()
+ {
+ doc = hits.Doc(hitNumber);
+ resolved = true;
+ }
+
+ // provide some of the Document style interface (the simple stuff)
+
+ /// Returns the boost factor for this hit on any field of the underlying document.
+ ///
+ ///
+ ///
+ ///
+ public virtual float GetBoost()
+ {
+ return GetDocument().GetBoost();
+ }
+
+ /// Returns the string value of the field with the given name if any exist in
+ /// this document, or null. If multiple fields exist with this name, this
+ /// method returns the first value added. If only binary fields with this name
+ /// exist, returns null.
+ ///
+ ///
+ ///
+ ///
+ public virtual System.String Get(System.String name)
+ {
+ return GetDocument().Get(name);
+ }
+
+ /// Prints the parameters to be used to discover the promised result.
+ public override System.String ToString()
+ {
+ System.Text.StringBuilder buffer = new System.Text.StringBuilder();
+ buffer.Append("Hit<");
+ buffer.Append(hits.ToString());
+ buffer.Append(" [");
+ buffer.Append(hitNumber);
+ buffer.Append("] ");
+ if (resolved)
+ {
+ buffer.Append("resolved");
+ }
+ else
+ {
+ buffer.Append("unresolved");
+ }
+ buffer.Append(">");
+ return buffer.ToString();
+ }
+ }
+}
\ No newline at end of file
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/HitCollector.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Search/HitCollector.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/HitCollector.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/HitCollector.cs Sat Jun 3 19:41:13 2006
@@ -13,13 +13,19 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
using System;
+
namespace Lucene.Net.Search
{
- /// Lower-level search API.
- ///
+
+ /// Lower-level search API.
+ ///
HitCollectors are primarily meant to be used to implement queries,
+ /// sorting and filtering.
+ ///
+ ///
///
- /// $Id: HitCollector.java,v 1.6 2004/03/29 22:48:03 cutting Exp $
+ /// $Id: HitCollector.java 155607 2005-02-27 01:29:53Z otis $
///
public abstract class HitCollector
{
@@ -40,7 +46,7 @@
/// Note: This is called in an inner search loop. For good search
/// performance, implementations of this method should not call
/// {@link Searcher#Doc(int)} or
- /// {@link Lucene.Net.Index.IndexReader#Document(int)} on every
+ /// {@link Lucene.Net.index.IndexReader#Document(int)} on every
/// document number encountered. Doing so can slow searches by an order
/// of magnitude or more.
///
Note: The score
passed to this method is a raw score.
Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/HitIterator.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Search/HitIterator.cs?rev=411501&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/HitIterator.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/HitIterator.cs Sat Jun 3 19:41:13 2006
@@ -0,0 +1,84 @@
+/*
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Search
+{
+
+ /// An iterator over {@link Hits} that provides lazy fetching of each document.
+ /// {@link Hits#Iterator()} returns an instance of this class. Calls to {@link #next()}
+ /// return a {@link Hit} instance.
+ ///
+ ///
+ /// Jeremy Rayner
+ ///
+ public class HitIterator : System.Collections.IEnumerator
+ {
+ /// Returns a {@link Hit} instance representing the next hit in {@link Hits}.
+ ///
+ ///
+ /// Next {@link Hit}.
+ ///
+ public virtual System.Object Current
+ {
+ get
+ {
+ if (hitNumber == hits.Length())
+ throw new System.ArgumentOutOfRangeException();
+
+ System.Object next = new Hit(hits, hitNumber);
+ hitNumber++;
+ return next;
+ }
+
+ }
+ private Hits hits;
+ private int hitNumber = 0;
+
+ /// Constructed from {@link Hits#Iterator()}.
+ internal HitIterator(Hits hits)
+ {
+ this.hits = hits;
+ }
+
+ /// true if current hit is less than the total number of {@link Hits}.
+ ///
+ public virtual bool MoveNext()
+ {
+ return hitNumber < hits.Length();
+ }
+
+ /// Unsupported operation.
+ ///
+ ///
+ /// UnsupportedOperationException
+ public virtual void Remove()
+ {
+ throw new System.NotSupportedException();
+ }
+
+ /// Returns the total number of hits.
+ public virtual int Length()
+ {
+ return hits.Length();
+ }
+ //UPGRADE_TODO: The following method was automatically generated and it must be implemented in order to preserve the class logic. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1232'"
+ virtual public void Reset()
+ {
+ }
+ }
+}
\ No newline at end of file
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/HitQueue.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Search/HitQueue.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/HitQueue.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/HitQueue.cs Sat Jun 3 19:41:13 2006
@@ -13,8 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
using System;
using PriorityQueue = Lucene.Net.Util.PriorityQueue;
+
namespace Lucene.Net.Search
{
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/Hits.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Search/Hits.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/Hits.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/Hits.cs Sat Jun 3 19:41:13 2006
@@ -13,15 +13,17 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
using System;
using Document = Lucene.Net.Documents.Document;
+
namespace Lucene.Net.Search
{
/// A ranked list of documents, used to hold search results.
public sealed class Hits
{
- private Query query;
+ private Weight weight;
private Searcher searcher;
private Filter filter = null;
private Sort sort = null;
@@ -36,7 +38,7 @@
internal Hits(Searcher s, Query q, Filter f)
{
- query = q;
+ weight = q.Weight(s);
searcher = s;
filter = f;
GetMoreDocs(50); // retrieve 100 initially
@@ -44,7 +46,7 @@
internal Hits(Searcher s, Query q, Filter f, Sort o)
{
- query = q;
+ weight = q.Weight(s);
searcher = s;
filter = f;
sort = o;
@@ -62,14 +64,15 @@
}
int n = min * 2; // double # retrieved
- TopDocs topDocs = (sort == null) ? searcher.Search(query, filter, n) : searcher.Search(query, filter, n, sort);
+ TopDocs topDocs = (sort == null) ? searcher.Search(weight, filter, n) : searcher.Search(weight, filter, n, sort);
length = topDocs.totalHits;
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
float scoreNorm = 1.0f;
- if (length > 0 && scoreDocs[0].score > 1.0f)
+
+ if (length > 0 && topDocs.GetMaxScore() > 1.0f)
{
- scoreNorm = 1.0f / scoreDocs[0].score;
+ scoreNorm = 1.0f / topDocs.GetMaxScore();
}
int end = scoreDocs.Length < length?scoreDocs.Length:length;
@@ -124,6 +127,18 @@
return HitDoc(n).id;
}
+ /// Returns a {@link HitIterator} to navigate the Hits. Each item returned
+ /// from {@link Iterator#next()} is a {@link Hit}.
+ ///
+ /// Caution: Iterate only over the hits needed. Iterating over all
+ /// hits is generally not desirable and may be the source of
+ /// performance issues.
+ ///
+ ///
+ public System.Collections.IEnumerator Iterator()
+ {
+ return new HitIterator(this);
+ }
private HitDoc HitDoc(int n)
{
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/IndexSearcher.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Search/IndexSearcher.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/IndexSearcher.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/IndexSearcher.cs Sat Jun 3 19:41:13 2006
@@ -13,18 +13,24 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
using System;
using Document = Lucene.Net.Documents.Document;
using IndexReader = Lucene.Net.Index.IndexReader;
using Term = Lucene.Net.Index.Term;
using Directory = Lucene.Net.Store.Directory;
+
namespace Lucene.Net.Search
{
/// Implements search over a single IndexReader.
///
/// Applications usually need only call the inherited {@link #Search(Query)}
- /// or {@link #Search(Query,Filter)} methods.
+ /// or {@link #Search(Query,Filter)} methods. For performance reasons it is
+ /// recommended to open only one IndexSearcher and use it for all of your searches.
+ ///
+ ///
Note that you can only access Hits from an IndexSearcher as long as it is
+ /// not yet closed, otherwise an IOException will be thrown.
///
public class IndexSearcher : Searcher
{
@@ -39,13 +45,13 @@
this.bits = bits;
this.totalHits = totalHits;
this.hq = hq;
- this.nDocs = nDocs;
- this.enclosingInstance = enclosingInstance;
+ this.nDocs = nDocs;
+ this.enclosingInstance = enclosingInstance;
}
private System.Collections.BitArray bits;
private int[] totalHits;
private Lucene.Net.Search.HitQueue hq;
- private int nDocs;
+ private int nDocs;
private IndexSearcher enclosingInstance;
public IndexSearcher Enclosing_Instance
{
@@ -55,19 +61,19 @@
}
}
- private float minScore = 0.0f;
+ private float minScore = 0.0f;
public override void Collect(int doc, float score)
{
if (score > 0.0f && (bits == null || bits.Get(doc)))
{
// skip docs not in bits
totalHits[0]++;
- if (hq.Size() < nDocs || score >= minScore)
- {
- hq.Insert(new ScoreDoc(doc, score));
- minScore = ((ScoreDoc) hq.Top()).score; // maintain minScore
- }
- }
+ if (hq.Size() < nDocs || score >= minScore)
+ {
+ hq.Insert(new ScoreDoc(doc, score));
+ minScore = ((ScoreDoc) hq.Top()).score; // maintain minScore
+ }
+ }
}
}
private class AnonymousClassHitCollector1 : HitCollector
@@ -137,9 +143,14 @@
}
}
}
- public /*internal*/ IndexReader reader;
+ internal IndexReader reader;
private bool closeReader;
+ public IndexReader Reader
+ {
+ get { return reader; }
+ }
+
/// Creates a searcher searching the index in the named directory.
public IndexSearcher(System.String path) : this(IndexReader.Open(path), true)
{
@@ -161,12 +172,18 @@
this.closeReader = closeReader;
}
+ /// Return the {@link IndexReader} this searches.
+ public virtual IndexReader GetIndexReader()
+ {
+ return reader;
+ }
+
/// Note that the underlying IndexReader is not closed, if
/// IndexSearcher was constructed with IndexSearcher(IndexReader r).
/// If the IndexReader was supplied implicitly by specifying a directory, then
/// the IndexReader gets closed.
///
- public override void Close()
+ public override void Close()
{
if (closeReader)
reader.Close();
@@ -191,31 +208,38 @@
}
// inherit javadoc
- public override TopDocs Search(Query query, Filter filter, int nDocs)
+ public override TopDocs Search(Weight weight, Filter filter, int nDocs)
{
- Scorer scorer = query.Weight(this).Scorer(reader);
+
+ if (nDocs <= 0)
+ // null might be returned from hq.top() below.
+ throw new System.ArgumentException("nDocs must be > 0");
+
+ Scorer scorer = weight.Scorer(reader);
if (scorer == null)
- return new TopDocs(0, new ScoreDoc[0]);
+ return new TopDocs(0, new ScoreDoc[0], System.Single.NegativeInfinity);
- System.Collections.BitArray bits = filter != null ? filter.Bits(reader) : null;
+ System.Collections.BitArray bits = filter != null?filter.Bits(reader):null;
HitQueue hq = new HitQueue(nDocs);
int[] totalHits = new int[1];
scorer.Score(new AnonymousClassHitCollector(bits, totalHits, hq, nDocs, this));
ScoreDoc[] scoreDocs = new ScoreDoc[hq.Size()];
for (int i = hq.Size() - 1; i >= 0; i--)
- // put docs in array
+ // put docs in array
scoreDocs[i] = (ScoreDoc) hq.Pop();
- return new TopDocs(totalHits[0], scoreDocs);
+ float maxScore = (totalHits[0] == 0) ? System.Single.NegativeInfinity : scoreDocs[0].score;
+
+ return new TopDocs(totalHits[0], scoreDocs, maxScore);
}
// inherit javadoc
- public override TopFieldDocs Search(Query query, Filter filter, int nDocs, Sort sort)
+ public override TopFieldDocs Search(Weight weight, Filter filter, int nDocs, Sort sort)
{
- Scorer scorer = query.Weight(this).Scorer(reader);
+ Scorer scorer = weight.Scorer(reader);
if (scorer == null)
- return new TopFieldDocs(0, new ScoreDoc[0], sort.fields);
+ return new TopFieldDocs(0, new ScoreDoc[0], sort.fields, System.Single.NegativeInfinity);
System.Collections.BitArray bits = filter != null ? filter.Bits(reader) : null;
FieldSortedHitQueue hq = new FieldSortedHitQueue(reader, sort.fields, nDocs);
@@ -227,12 +251,11 @@
// put docs in array
scoreDocs[i] = hq.FillFields((FieldDoc) hq.Pop());
- return new TopFieldDocs(totalHits[0], scoreDocs, hq.GetFields());
+ return new TopFieldDocs(totalHits[0], scoreDocs, hq.GetFields(), hq.GetMaxScore());
}
-
// inherit javadoc
- public override void Search(Query query, Filter filter, HitCollector results)
+ public override void Search(Weight weight, Filter filter, HitCollector results)
{
HitCollector collector = results;
if (filter != null)
@@ -241,7 +264,7 @@
collector = new AnonymousClassHitCollector2(bits, results, this);
}
- Scorer scorer = query.Weight(this).Scorer(reader);
+ Scorer scorer = weight.Scorer(reader);
if (scorer == null)
return ;
scorer.Score(collector);
@@ -257,9 +280,9 @@
return query;
}
- public override Explanation Explain(Query query, int doc)
+ public override Explanation Explain(Weight weight, int doc)
{
- return query.Weight(this).Explain(reader, doc);
+ return weight.Explain(reader, doc);
}
}
}
Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/MatchAllDocsQuery.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Search/MatchAllDocsQuery.cs?rev=411501&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/MatchAllDocsQuery.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/MatchAllDocsQuery.cs Sat Jun 3 19:41:13 2006
@@ -0,0 +1,203 @@
+/*
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using IndexReader = Lucene.Net.Index.IndexReader;
+using ToStringUtils = Lucene.Net.Util.ToStringUtils;
+
+namespace Lucene.Net.Search
+{
+
+ /// A query that matches all documents.
+ ///
+ ///
+ /// John Wang
+ ///
+ [Serializable]
+ public class MatchAllDocsQuery : Query
+ {
+
+ public MatchAllDocsQuery()
+ {
+ }
+
+ private class MatchAllScorer:Scorer
+ {
+ private void InitBlock(MatchAllDocsQuery enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private MatchAllDocsQuery enclosingInstance;
+ public MatchAllDocsQuery Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+
+ internal IndexReader reader;
+ internal int count;
+ internal int maxDoc;
+
+ internal MatchAllScorer(MatchAllDocsQuery enclosingInstance, IndexReader reader, Similarity similarity) : base(similarity)
+ {
+ InitBlock(enclosingInstance);
+ this.reader = reader;
+ count = - 1;
+ maxDoc = reader.MaxDoc();
+ }
+
+ public override int Doc()
+ {
+ return count;
+ }
+
+ public override Explanation Explain(int doc)
+ {
+ Explanation explanation = new Explanation();
+ explanation.SetValue(1.0f);
+ explanation.SetDescription("MatchAllDocsQuery");
+ return explanation;
+ }
+
+ public override bool Next()
+ {
+ while (count < (maxDoc - 1))
+ {
+ count++;
+ if (!reader.IsDeleted(count))
+ {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ public override float Score()
+ {
+ return 1.0f;
+ }
+
+ public override bool SkipTo(int target)
+ {
+ count = target - 1;
+ return Next();
+ }
+ }
+
+ [Serializable]
+ private class MatchAllDocsWeight : Weight
+ {
+ private void InitBlock(MatchAllDocsQuery enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private MatchAllDocsQuery enclosingInstance;
+ public MatchAllDocsQuery Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ private Searcher searcher;
+
+ public MatchAllDocsWeight(MatchAllDocsQuery enclosingInstance, Searcher searcher)
+ {
+ InitBlock(enclosingInstance);
+ this.searcher = searcher;
+ }
+
+ public override System.String ToString()
+ {
+ return "weight(" + Enclosing_Instance + ")";
+ }
+
+ public virtual Query GetQuery()
+ {
+ return Enclosing_Instance;
+ }
+
+ public virtual float GetValue()
+ {
+ return 1.0f;
+ }
+
+ public virtual float SumOfSquaredWeights()
+ {
+ return 1.0f;
+ }
+
+ public virtual void Normalize(float queryNorm)
+ {
+ }
+
+ public virtual Scorer Scorer(IndexReader reader)
+ {
+ return new MatchAllScorer(enclosingInstance, reader, Enclosing_Instance.GetSimilarity(searcher));
+ }
+
+ public virtual Explanation Explain(IndexReader reader, int doc)
+ {
+ // explain query weight
+ Explanation queryExpl = new Explanation();
+ queryExpl.SetDescription("MatchAllDocsQuery:");
+
+ Explanation boostExpl = new Explanation(Enclosing_Instance.GetBoost(), "boost");
+ if (Enclosing_Instance.GetBoost() != 1.0f)
+ queryExpl.AddDetail(boostExpl);
+ queryExpl.SetValue(boostExpl.GetValue());
+
+ return queryExpl;
+ }
+ }
+
+ protected internal override Weight CreateWeight(Searcher searcher)
+ {
+ return new MatchAllDocsWeight(this, searcher);
+ }
+
+ public override System.String ToString(System.String field)
+ {
+ System.Text.StringBuilder buffer = new System.Text.StringBuilder();
+ buffer.Append("MatchAllDocsQuery");
+ buffer.Append(ToStringUtils.Boost(GetBoost()));
+ return buffer.ToString();
+ }
+
+ public override bool Equals(System.Object o)
+ {
+ if (!(o is MatchAllDocsQuery))
+ return false;
+ MatchAllDocsQuery other = (MatchAllDocsQuery) o;
+ return this.GetBoost() == other.GetBoost();
+ }
+
+ public override int GetHashCode()
+ {
+ return BitConverter.ToInt32(BitConverter.GetBytes(GetBoost()), 0);
+ }
+
+ // {{Aroush-1.9}} Do we need this?!
+ override public System.Object Clone()
+ {
+ return null;
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/MultiPhraseQuery.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Search/MultiPhraseQuery.cs?rev=411501&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/MultiPhraseQuery.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/MultiPhraseQuery.cs Sat Jun 3 19:41:13 2006
@@ -0,0 +1,342 @@
+/*
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using IndexReader = Lucene.Net.Index.IndexReader;
+using MultipleTermPositions = Lucene.Net.Index.MultipleTermPositions;
+using Term = Lucene.Net.Index.Term;
+using TermPositions = Lucene.Net.Index.TermPositions;
+using ToStringUtils = Lucene.Net.Util.ToStringUtils;
+
+namespace Lucene.Net.Search
+{
+
+ /// MultiPhraseQuery is a generalized version of PhraseQuery, with an added
+ /// method {@link #Add(Term[])}.
+ /// To use this class, to search for the phrase "Microsoft app*" first use
+ /// add(Term) on the term "Microsoft", then find all terms that have "app" as
+ /// prefix using IndexReader.terms(Term), and use MultiPhraseQuery.add(Term[]
+ /// terms) to add them to the query.
+ ///
+ ///
+ /// Anders Nielsen
+ ///
+ /// 1.0
+ ///
+ [Serializable]
+ public class MultiPhraseQuery : Query
+ {
+ private System.String field;
+ private System.Collections.ArrayList termArrays = new System.Collections.ArrayList();
+ private System.Collections.ArrayList positions = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
+
+ private int slop = 0;
+
+ /// Sets the phrase slop for this query.
+ ///
+ ///
+ public virtual void SetSlop(int s)
+ {
+ slop = s;
+ }
+
+ /// Sets the phrase slop for this query.
+ ///
+ ///
+ public virtual int GetSlop()
+ {
+ return slop;
+ }
+
+ /// Add a single term at the next position in the phrase.
+ ///
+ ///
+ public virtual void Add(Term term)
+ {
+ Add(new Term[]{term});
+ }
+
+ /// Add multiple terms at the next position in the phrase. Any of the terms
+ /// may match.
+ ///
+ ///
+ ///
+ ///
+ public virtual void Add(Term[] terms)
+ {
+ int position = 0;
+ if (positions.Count > 0)
+ position = ((System.Int32) positions[positions.Count - 1]) + 1;
+
+ Add(terms, position);
+ }
+
+ /// Allows to specify the relative position of terms within the phrase.
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ public virtual void Add(Term[] terms, int position)
+ {
+ if (termArrays.Count == 0)
+ field = terms[0].Field();
+
+ for (int i = 0; i < terms.Length; i++)
+ {
+ if ((System.Object) terms[i].Field() != (System.Object) field)
+ {
+ throw new System.ArgumentException("All phrase terms must be in the same field (" + field + "): " + terms[i]);
+ }
+ }
+
+ termArrays.Add(terms);
+ positions.Add((System.Int32) position);
+ }
+
+ /// Returns the relative positions of terms in this phrase.
+ public virtual int[] GetPositions()
+ {
+ int[] result = new int[positions.Count];
+ for (int i = 0; i < positions.Count; i++)
+ result[i] = ((System.Int32) positions[i]);
+ return result;
+ }
+
+ [Serializable]
+ private class MultiPhraseWeight : Weight
+ {
+ private void InitBlock(MultiPhraseQuery enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private MultiPhraseQuery enclosingInstance;
+ public MultiPhraseQuery Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ private Similarity similarity;
+ private float value_Renamed;
+ private float idf;
+ private float queryNorm;
+ private float queryWeight;
+
+ public MultiPhraseWeight(MultiPhraseQuery enclosingInstance, Searcher searcher)
+ {
+ InitBlock(enclosingInstance);
+ this.similarity = Enclosing_Instance.GetSimilarity(searcher);
+
+ // compute idf
+ System.Collections.IEnumerator i = Enclosing_Instance.termArrays.GetEnumerator();
+ while (i.MoveNext())
+ {
+ Term[] terms = (Term[]) i.Current;
+ for (int j = 0; j < terms.Length; j++)
+ {
+ idf += Enclosing_Instance.GetSimilarity(searcher).Idf(terms[j], searcher);
+ }
+ }
+ }
+
+ public virtual Query GetQuery()
+ {
+ return Enclosing_Instance;
+ }
+ public virtual float GetValue()
+ {
+ return value_Renamed;
+ }
+
+ public virtual float SumOfSquaredWeights()
+ {
+ queryWeight = idf * Enclosing_Instance.GetBoost(); // compute query weight
+ return queryWeight * queryWeight; // square it
+ }
+
+ public virtual void Normalize(float queryNorm)
+ {
+ this.queryNorm = queryNorm;
+ queryWeight *= queryNorm; // normalize query weight
+ value_Renamed = queryWeight * idf; // idf for document
+ }
+
+ public virtual Scorer Scorer(IndexReader reader)
+ {
+ if (Enclosing_Instance.termArrays.Count == 0)
+ // optimize zero-term case
+ return null;
+
+ TermPositions[] tps = new TermPositions[Enclosing_Instance.termArrays.Count];
+ for (int i = 0; i < tps.Length; i++)
+ {
+ Term[] terms = (Term[]) Enclosing_Instance.termArrays[i];
+
+ TermPositions p;
+ if (terms.Length > 1)
+ p = new MultipleTermPositions(reader, terms);
+ else
+ p = reader.TermPositions(terms[0]);
+
+ if (p == null)
+ return null;
+
+ tps[i] = p;
+ }
+
+ if (Enclosing_Instance.slop == 0)
+ return new ExactPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, reader.Norms(Enclosing_Instance.field));
+ else
+ return new SloppyPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, Enclosing_Instance.slop, reader.Norms(Enclosing_Instance.field));
+ }
+
+ public virtual Explanation Explain(IndexReader reader, int doc)
+ {
+ Explanation result = new Explanation();
+ result.SetDescription("weight(" + GetQuery() + " in " + doc + "), product of:");
+
+ Explanation idfExpl = new Explanation(idf, "idf(" + GetQuery() + ")");
+
+ // explain query weight
+ Explanation queryExpl = new Explanation();
+ queryExpl.SetDescription("queryWeight(" + GetQuery() + "), product of:");
+
+ Explanation boostExpl = new Explanation(Enclosing_Instance.GetBoost(), "boost");
+ if (Enclosing_Instance.GetBoost() != 1.0f)
+ queryExpl.AddDetail(boostExpl);
+
+ queryExpl.AddDetail(idfExpl);
+
+ Explanation queryNormExpl = new Explanation(queryNorm, "queryNorm");
+ queryExpl.AddDetail(queryNormExpl);
+
+ queryExpl.SetValue(boostExpl.GetValue() * idfExpl.GetValue() * queryNormExpl.GetValue());
+
+ result.AddDetail(queryExpl);
+
+ // explain field weight
+ Explanation fieldExpl = new Explanation();
+ fieldExpl.SetDescription("fieldWeight(" + GetQuery() + " in " + doc + "), product of:");
+
+ Explanation tfExpl = Scorer(reader).Explain(doc);
+ fieldExpl.AddDetail(tfExpl);
+ fieldExpl.AddDetail(idfExpl);
+
+ Explanation fieldNormExpl = new Explanation();
+ byte[] fieldNorms = reader.Norms(Enclosing_Instance.field);
+ float fieldNorm = fieldNorms != null ? Similarity.DecodeNorm(fieldNorms[doc]) : 0.0f;
+ fieldNormExpl.SetValue(fieldNorm);
+ fieldNormExpl.SetDescription("fieldNorm(field=" + Enclosing_Instance.field + ", doc=" + doc + ")");
+ fieldExpl.AddDetail(fieldNormExpl);
+
+ fieldExpl.SetValue(tfExpl.GetValue() * idfExpl.GetValue() * fieldNormExpl.GetValue());
+
+ result.AddDetail(fieldExpl);
+
+ // combine them
+ result.SetValue(queryExpl.GetValue() * fieldExpl.GetValue());
+
+ if (queryExpl.GetValue() == 1.0f)
+ return fieldExpl;
+
+ return result;
+ }
+ }
+
+ public override Query Rewrite(IndexReader reader)
+ {
+ if (termArrays.Count == 1)
+ {
+ // optimize one-term case
+ Term[] terms = (Term[]) termArrays[0];
+ BooleanQuery boq = new BooleanQuery(true);
+ for (int i = 0; i < terms.Length; i++)
+ {
+ boq.Add(new TermQuery(terms[i]), BooleanClause.Occur.SHOULD);
+ }
+ boq.SetBoost(GetBoost());
+ return boq;
+ }
+ else
+ {
+ return this;
+ }
+ }
+
+ protected internal override Weight CreateWeight(Searcher searcher)
+ {
+ return new MultiPhraseWeight(this, searcher);
+ }
+
+ /// Prints a user-readable version of this query.
+ public override System.String ToString(System.String f)
+ {
+ System.Text.StringBuilder buffer = new System.Text.StringBuilder();
+ if (!field.Equals(f))
+ {
+ buffer.Append(field);
+ buffer.Append(":");
+ }
+
+ buffer.Append("\"");
+ System.Collections.IEnumerator i = termArrays.GetEnumerator();
+ while (i.MoveNext())
+ {
+ Term[] terms = (Term[]) i.Current;
+ if (terms.Length > 1)
+ {
+ buffer.Append("(");
+ for (int j = 0; j < terms.Length; j++)
+ {
+ buffer.Append(terms[j].Text());
+ if (j < terms.Length - 1)
+ buffer.Append(" ");
+ }
+ buffer.Append(")");
+ }
+ else
+ {
+ buffer.Append(terms[0].Text());
+ }
+ if (i.MoveNext())
+ buffer.Append(" ");
+ }
+ buffer.Append("\"");
+
+ if (slop != 0)
+ {
+ buffer.Append("~");
+ buffer.Append(slop);
+ }
+
+ buffer.Append(ToStringUtils.Boost(GetBoost()));
+
+ return buffer.ToString();
+ }
+
+ // {{Aroush-1.9}} Do we need this?!
+ override public System.Object Clone()
+ {
+ return null;
+ }
+ }
+}
\ No newline at end of file
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/MultiSearcher.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Search/MultiSearcher.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/MultiSearcher.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/MultiSearcher.cs Sat Jun 3 19:41:13 2006
@@ -13,9 +13,11 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
using System;
using Document = Lucene.Net.Documents.Document;
using Term = Lucene.Net.Index.Term;
+
namespace Lucene.Net.Search
{
@@ -54,6 +56,92 @@
results.Collect(doc + start, score);
}
}
+ /// Document Frequency cache acting as a Dummy-Searcher.
+ /// This class is no full-fledged Searcher, but only supports
+ /// the methods necessary to initialize Weights.
+ ///
+ private class CachedDfSource:Searcher
+ {
+ private System.Collections.IDictionary dfMap; // Map from Terms to corresponding doc freqs
+ private int maxDoc; // document count
+
+ public CachedDfSource(System.Collections.IDictionary dfMap, int maxDoc)
+ {
+ this.dfMap = dfMap;
+ this.maxDoc = maxDoc;
+ }
+
+ public override int DocFreq(Term term)
+ {
+ int df;
+ try
+ {
+ df = ((System.Int32) dfMap[term]);
+ }
+ catch (System.NullReferenceException)
+ {
+ throw new System.ArgumentException("df for term " + term.Text() + " not available");
+ }
+ return df;
+ }
+
+ public override int[] DocFreqs(Term[] terms)
+ {
+ int[] result = new int[terms.Length];
+ for (int i = 0; i < terms.Length; i++)
+ {
+ result[i] = DocFreq(terms[i]);
+ }
+ return result;
+ }
+
+ public override int MaxDoc()
+ {
+ return maxDoc;
+ }
+
+ public override Query Rewrite(Query query)
+ {
+ // this is a bit of a hack. We know that a query which
+ // creates a Weight based on this Dummy-Searcher is
+ // always already rewritten (see preparedWeight()).
+ // Therefore we just return the unmodified query here
+ return query;
+ }
+
+ public override void Close()
+ {
+ throw new System.NotSupportedException();
+ }
+
+ public override Document Doc(int i)
+ {
+ throw new System.NotSupportedException();
+ }
+
+ public override Explanation Explain(Weight weight, int doc)
+ {
+ throw new System.NotSupportedException();
+ }
+
+ public override void Search(Weight weight, Filter filter, HitCollector results)
+ {
+ throw new System.NotSupportedException();
+ }
+
+ public override TopDocs Search(Weight weight, Filter filter, int n)
+ {
+ throw new System.NotSupportedException();
+ }
+
+ public override TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort)
+ {
+ throw new System.NotSupportedException();
+ }
+ }
+
+
+
private Lucene.Net.Search.Searchable[] searchables;
private int[] starts;
private int maxDoc = 0;
@@ -72,6 +160,12 @@
starts[searchables.Length] = maxDoc;
}
+ /// Return the array of {@link Searchable}s this searches.
+ public virtual Lucene.Net.Search.Searchable[] GetSearchables()
+ {
+ return searchables;
+ }
+
protected internal virtual int[] GetStarts()
{
return starts;
@@ -151,15 +245,16 @@
return maxDoc;
}
- public override TopDocs Search(Query query, Filter filter, int nDocs)
+ public override TopDocs Search(Weight weight, Filter filter, int nDocs)
{
+
HitQueue hq = new HitQueue(nDocs);
int totalHits = 0;
for (int i = 0; i < searchables.Length; i++)
{
// search each searcher
- TopDocs docs = searchables[i].Search(query, filter, nDocs);
+ TopDocs docs = searchables[i].Search(weight, filter, nDocs);
totalHits += docs.totalHits; // update totalHits
ScoreDoc[] scoreDocs = docs.scoreDocs;
for (int j = 0; j < scoreDocs.Length; j++)
@@ -174,25 +269,30 @@
ScoreDoc[] scoreDocs2 = new ScoreDoc[hq.Size()];
for (int i = hq.Size() - 1; i >= 0; i--)
- // put docs in array
+ // put docs in array
scoreDocs2[i] = (ScoreDoc) hq.Pop();
- return new TopDocs(totalHits, scoreDocs2);
+ float maxScore = (totalHits == 0) ? System.Single.NegativeInfinity : scoreDocs2[0].score;
+
+ return new TopDocs(totalHits, scoreDocs2, maxScore);
}
-
- public override TopFieldDocs Search(Query query, Filter filter, int n, Sort sort)
+ public override TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort)
{
FieldDocSortedHitQueue hq = null;
int totalHits = 0;
+ float maxScore = System.Single.NegativeInfinity;
+
for (int i = 0; i < searchables.Length; i++)
{
// search each searcher
- TopFieldDocs docs = searchables[i].Search(query, filter, n, sort);
+ TopFieldDocs docs = searchables[i].Search(weight, filter, n, sort);
+
if (hq == null)
hq = new FieldDocSortedHitQueue(docs.fields, n);
totalHits += docs.totalHits; // update totalHits
+ maxScore = System.Math.Max(maxScore, docs.GetMaxScore());
ScoreDoc[] scoreDocs = docs.scoreDocs;
for (int j = 0; j < scoreDocs.Length; j++)
{
@@ -206,22 +306,22 @@
ScoreDoc[] scoreDocs2 = new ScoreDoc[hq.Size()];
for (int i = hq.Size() - 1; i >= 0; i--)
- // put docs in array
+ // put docs in array
scoreDocs2[i] = (ScoreDoc) hq.Pop();
- return new TopFieldDocs(totalHits, scoreDocs2, hq.GetFields());
+ return new TopFieldDocs(totalHits, scoreDocs2, hq.GetFields(), maxScore);
}
// inherit javadoc
- public override void Search(Query query, Filter filter, HitCollector results)
+ public override void Search(Weight weight, Filter filter, HitCollector results)
{
for (int i = 0; i < searchables.Length; i++)
{
int start = starts[i];
- searchables[i].Search(query, filter, new AnonymousClassHitCollector(results, start, this));
+ searchables[i].Search(weight, filter, new AnonymousClassHitCollector(results, start, this));
}
}
@@ -232,13 +332,66 @@
{
queries[i] = searchables[i].Rewrite(original);
}
- return original.Combine(queries);
+ return queries[0].Combine(queries);
}
- public override Explanation Explain(Query query, int doc)
+ public override Explanation Explain(Weight weight, int doc)
{
int i = SubSearcher(doc); // find searcher index
- return searchables[i].Explain(query, doc - starts[i]); // dispatch to searcher
+ return searchables[i].Explain(weight, doc - starts[i]); // dispatch to searcher
+ }
+
+ /// Create weight in multiple index scenario.
+ ///
+ /// Distributed query processing is done in the following steps:
+ /// 1. rewrite query
+ /// 2. extract necessary terms
+ /// 3. collect dfs for these terms from the Searchables
+ /// 4. create query weight using aggregate dfs.
+ /// 5. distribute that weight to Searchables
+ /// 6. merge results
+ ///
+ /// Steps 1-4 are done here, 5+6 in the search() methods
+ ///
+ ///
+ /// rewritten queries
+ ///
+ protected internal override Weight CreateWeight(Query original)
+ {
+ // step 1
+ Query rewrittenQuery = Rewrite(original);
+
+ // step 2
+ System.Collections.Hashtable terms = new System.Collections.Hashtable();
+ rewrittenQuery.ExtractTerms(terms);
+
+ // step3
+ Term[] allTermsArray = new Term[terms.Count];
+ int index = 0;
+ System.Collections.IEnumerator e = terms.GetEnumerator();
+ while (e.MoveNext())
+ allTermsArray[index++] = e.Current as Term;
+ int[] aggregatedDfs = new int[terms.Count];
+ for (int i = 0; i < searchables.Length; i++)
+ {
+ int[] dfs = searchables[i].DocFreqs(allTermsArray);
+ for (int j = 0; j < aggregatedDfs.Length; j++)
+ {
+ aggregatedDfs[j] += dfs[j];
+ }
+ }
+
+ System.Collections.Hashtable dfMap = new System.Collections.Hashtable();
+ for (int i = 0; i < allTermsArray.Length; i++)
+ {
+ dfMap[allTermsArray[i]] = (System.Int32) aggregatedDfs[i];
+ }
+
+ // step4
+ int numDocs = MaxDoc();
+ CachedDfSource cacheSim = new CachedDfSource(dfMap, numDocs);
+
+ return rewrittenQuery.Weight(cacheSim);
}
}
}
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/MultiTermQuery.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Search/MultiTermQuery.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/MultiTermQuery.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/MultiTermQuery.cs Sat Jun 3 19:41:13 2006
@@ -13,9 +13,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
using System;
using IndexReader = Lucene.Net.Index.IndexReader;
using Term = Lucene.Net.Index.Term;
+using ToStringUtils = Lucene.Net.Util.ToStringUtils;
+
namespace Lucene.Net.Search
{
@@ -32,7 +35,7 @@
/// {@link FuzzyTermEnum}, respectively.
///
[Serializable]
- public abstract class MultiTermQuery:Query
+ public abstract class MultiTermQuery : Query
{
private Term term;
@@ -54,7 +57,7 @@
public override Query Rewrite(IndexReader reader)
{
FilteredTermEnum enumerator = GetEnum(reader);
- BooleanQuery query = new BooleanQuery();
+ BooleanQuery query = new BooleanQuery(true);
try
{
do
@@ -64,7 +67,7 @@
{
TermQuery tq = new TermQuery(t); // found a match
tq.SetBoost(GetBoost() * enumerator.Difference()); // set the boost
- query.Add(tq, false, false); // add to query
+ query.Add(tq, BooleanClause.Occur.SHOULD); // add to query
}
}
while (enumerator.Next());
@@ -76,12 +79,6 @@
return query;
}
- public override Query Combine(Query[] queries)
- {
- return Query.MergeBooleanQueries(queries);
- }
-
-
/// Prints a user-readable version of this query.
public override System.String ToString(System.String field)
{
@@ -92,15 +89,28 @@
buffer.Append(":");
}
buffer.Append(term.Text());
- if (GetBoost() != 1.0f)
- {
- System.Globalization.NumberFormatInfo nfi = new System.Globalization.CultureInfo("en-US", false).NumberFormat;
- nfi.NumberDecimalDigits = 1;
-
- buffer.Append("^");
- buffer.Append(GetBoost().ToString("N", nfi));
- }
+ buffer.Append(ToStringUtils.Boost(GetBoost()));
return buffer.ToString();
+ }
+
+ public override bool Equals(System.Object o)
+ {
+ if (this == o)
+ return true;
+ if (!(o is MultiTermQuery))
+ return false;
+
+ MultiTermQuery multiTermQuery = (MultiTermQuery) o;
+
+ if (!term.Equals(multiTermQuery.term))
+ return false;
+
+ return GetBoost() == multiTermQuery.GetBoost();
+ }
+
+ public override int GetHashCode()
+ {
+ return term.GetHashCode();
}
}
}
Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/NonMatchingScorer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Search/NonMatchingScorer.cs?rev=411501&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/NonMatchingScorer.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/NonMatchingScorer.cs Sat Jun 3 19:41:13 2006
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Search
+{
+
+ /// A scorer that matches no document at all.
+ class NonMatchingScorer : Scorer
+ {
+ public NonMatchingScorer() : base(null)
+ {
+ } // no similarity used
+
+ public override int Doc()
+ {
+ throw new System.NotSupportedException();
+ }
+
+ public override bool Next()
+ {
+ return false;
+ }
+
+ public override float Score()
+ {
+ throw new System.NotSupportedException();
+ }
+
+ public override bool SkipTo(int target)
+ {
+ return false;
+ }
+
+ public override Explanation Explain(int doc)
+ {
+ Explanation e = new Explanation();
+ e.SetDescription("No document matches.");
+ return e;
+ }
+ }
+}
\ No newline at end of file
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/ParallelMultiSearcher.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Search/ParallelMultiSearcher.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/ParallelMultiSearcher.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/ParallelMultiSearcher.cs Sat Jun 3 19:41:13 2006
@@ -13,9 +13,11 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
using System;
using Term = Lucene.Net.Index.Term;
using PriorityQueue = Lucene.Net.Util.PriorityQueue;
+
namespace Lucene.Net.Search
{
@@ -24,9 +26,9 @@
///
Applications usually need only call the inherited {@link #Search(Query)}
/// or {@link #Search(Query,Filter)} methods.
///
- public class ParallelMultiSearcher:MultiSearcher
+ public class ParallelMultiSearcher : MultiSearcher
{
- private class AnonymousClassHitCollector1:HitCollector
+ private class AnonymousClassHitCollector1 : HitCollector
{
public AnonymousClassHitCollector1(Lucene.Net.Search.HitCollector results, int start, ParallelMultiSearcher enclosingInstance)
{
@@ -59,7 +61,7 @@
private int[] starts;
/// Creates a searcher which searches searchables.
- public ParallelMultiSearcher(Lucene.Net.Search.Searchable[] searchables):base(searchables)
+ public ParallelMultiSearcher(Lucene.Net.Search.Searchable[] searchables) : base(searchables)
{
this.searchables = searchables;
this.starts = GetStarts();
@@ -68,17 +70,14 @@
/// TODO: parallelize this one too
public override int DocFreq(Term term)
{
- int docFreq = 0;
- for (int i = 0; i < searchables.Length; i++)
- docFreq += searchables[i].DocFreq(term);
- return docFreq;
+ return base.DocFreq(term);
}
/// A search implementation which spans a new thread for each
/// Searchable, waits for each search to complete and merge
/// the results back together.
///
- public override TopDocs Search(Query query, Filter filter, int nDocs)
+ public override TopDocs Search(Weight weight, Filter filter, int nDocs)
{
HitQueue hq = new HitQueue(nDocs);
int totalHits = 0;
@@ -87,7 +86,7 @@
{
// search each searcher
// Assume not too many searchables and cost of creating a thread is by far inferior to a search
- msta[i] = new MultiSearcherThread(searchables[i], query, filter, nDocs, hq, i, starts, "MultiSearcher thread #" + (i + 1));
+ msta[i] = new MultiSearcherThread(searchables[i], weight, filter, nDocs, hq, i, starts, "MultiSearcher thread #" + (i + 1));
msta[i].Start();
}
@@ -97,7 +96,7 @@
{
msta[i].Join();
}
- catch (System.Threading.ThreadInterruptedException ie)
+ catch (System.Threading.ThreadInterruptedException)
{
; // TODO: what should we do with this???
}
@@ -118,14 +117,16 @@
// put docs in array
scoreDocs[i] = (ScoreDoc) hq.Pop();
- return new TopDocs(totalHits, scoreDocs);
+ float maxScore = (totalHits == 0) ? System.Single.NegativeInfinity : scoreDocs[0].score;
+
+ return new TopDocs(totalHits, scoreDocs, maxScore);
}
/// A search implementation allowing sorting which spans a new thread for each
/// Searchable, waits for each search to complete and merges
/// the results back together.
///
- public override TopFieldDocs Search(Query query, Filter filter, int nDocs, Sort sort)
+ public override TopFieldDocs Search(Weight weight, Filter filter, int nDocs, Sort sort)
{
// don't specify the fields - we'll wait to do this until we get results
FieldDocSortedHitQueue hq = new FieldDocSortedHitQueue(null, nDocs);
@@ -135,17 +136,19 @@
{
// search each searcher
// Assume not too many searchables and cost of creating a thread is by far inferior to a search
- msta[i] = new MultiSearcherThread(searchables[i], query, filter, nDocs, hq, sort, i, starts, "MultiSearcher thread #" + (i + 1));
+ msta[i] = new MultiSearcherThread(searchables[i], weight, filter, nDocs, hq, sort, i, starts, "MultiSearcher thread #" + (i + 1));
msta[i].Start();
}
+ float maxScore = System.Single.NegativeInfinity;
+
for (int i = 0; i < searchables.Length; i++)
{
try
{
msta[i].Join();
}
- catch (System.Threading.ThreadInterruptedException ie)
+ catch (System.Threading.ThreadInterruptedException)
{
; // TODO: what should we do with this???
}
@@ -153,6 +156,7 @@
if (ioe == null)
{
totalHits += msta[i].Hits();
+ maxScore = System.Math.Max(maxScore, msta[i].GetMaxScore());
}
else
{
@@ -166,7 +170,7 @@
// put docs in array
scoreDocs[i] = (ScoreDoc) hq.Pop();
- return new TopFieldDocs(totalHits, scoreDocs, hq.GetFields());
+ return new TopFieldDocs(totalHits, scoreDocs, hq.GetFields(), maxScore);
}
/// Lower-level search API.
@@ -180,37 +184,32 @@
/// non-high-scoring hits.
///
///
- /// to match documents
+ /// to match documents
///
/// if non-null, a bitset used to eliminate some documents
///
/// to receive hits
///
- /// TODO: parallelize this one too
///
- public override void Search(Query query, Filter filter, HitCollector results)
+ /// parallelize this one too
+ public override void Search(Weight weight, Filter filter, HitCollector results)
{
for (int i = 0; i < searchables.Length; i++)
{
int start = starts[i];
- searchables[i].Search(query, filter, new AnonymousClassHitCollector1(results, start, this));
+ searchables[i].Search(weight, filter, new AnonymousClassHitCollector1(results, start, this));
}
}
/*
* TODO: this one could be parallelized too
- * @see Lucene.Net.Search.Searchable#rewrite(Lucene.Net.Search.Query)
+ * @see Lucene.Net.search.Searchable#rewrite(Lucene.Net.search.Query)
*/
public override Query Rewrite(Query original)
{
- Query[] queries = new Query[searchables.Length];
- for (int i = 0; i < searchables.Length; i++)
- {
- queries[i] = searchables[i].Rewrite(original);
- }
- return original.Combine(queries);
+ return base.Rewrite(original);
}
}
@@ -219,7 +218,7 @@
{
private Lucene.Net.Search.Searchable searchable;
- private Query query;
+ private Weight weight;
private Filter filter;
private int nDocs;
private TopDocs docs;
@@ -229,10 +228,10 @@
private System.IO.IOException ioe;
private Sort sort;
- public MultiSearcherThread(Lucene.Net.Search.Searchable searchable, Query query, Filter filter, int nDocs, HitQueue hq, int i, int[] starts, System.String name):base(name)
+ public MultiSearcherThread(Lucene.Net.Search.Searchable searchable, Weight weight, Filter filter, int nDocs, HitQueue hq, int i, int[] starts, System.String name):base(name)
{
this.searchable = searchable;
- this.query = query;
+ this.weight = weight;
this.filter = filter;
this.nDocs = nDocs;
this.hq = hq;
@@ -240,10 +239,10 @@
this.starts = starts;
}
- public MultiSearcherThread(Lucene.Net.Search.Searchable searchable, Query query, Filter filter, int nDocs, FieldDocSortedHitQueue hq, Sort sort, int i, int[] starts, System.String name):base(name)
+ public MultiSearcherThread(Lucene.Net.Search.Searchable searchable, Weight weight, Filter filter, int nDocs, FieldDocSortedHitQueue hq, Sort sort, int i, int[] starts, System.String name):base(name)
{
this.searchable = searchable;
- this.query = query;
+ this.weight = weight;
this.filter = filter;
this.nDocs = nDocs;
this.hq = hq;
@@ -256,7 +255,7 @@
{
try
{
- docs = (sort == null)?searchable.Search(query, filter, nDocs):searchable.Search(query, filter, nDocs, sort);
+ docs = (sort == null)?searchable.Search(weight, filter, nDocs):searchable.Search(weight, filter, nDocs, sort);
}
// Store the IOException for later use by the caller of this thread
catch (System.IO.IOException ioe)
@@ -265,7 +264,7 @@
}
if (this.ioe == null)
{
- // if we are sorting by fields, we need to tell the Field sorted hit queue
+ // if we are sorting by fields, we need to tell the field sorted hit queue
// the actual type of fields, in case the original list contained AUTO.
// if the searchable returns null for fields, we'll have problems.
if (sort != null)
@@ -291,6 +290,11 @@
public virtual int Hits()
{
return docs.totalHits;
+ }
+
+ public virtual float GetMaxScore()
+ {
+ return docs.GetMaxScore();
}
public virtual System.IO.IOException GetIOException()
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/PhrasePositions.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Search/PhrasePositions.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/PhrasePositions.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Search/PhrasePositions.cs Sat Jun 3 19:41:13 2006
@@ -13,8 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
using System;
using Lucene.Net.Index;
+
namespace Lucene.Net.Search
{