lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dor...@apache.org
Subject svn commit: r560372 [1/2] - in /lucene/java/trunk: ./ contrib/benchmark/ contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/ contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/ contrib/benchmark/src/java/org/apache/lucene/b...
Date Fri, 27 Jul 2007 20:24:53 GMT
Author: doronc
Date: Fri Jul 27 13:24:52 2007
New Revision: 560372

URL: http://svn.apache.org/viewvc?view=rev&rev=560372
Log:
LUCENE-836: Add support for search quality benchmarking.

Added:
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/Judge.java   (with props)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityBenchmark.java   (with props)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityQuery.java   (with props)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityQueryParser.java   (with props)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityStats.java   (with props)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/package.html   (with props)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecJudge.java   (with props)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecTopicsReader.java   (with props)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/package.html   (with props)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/DocNameExtractor.java   (with props)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java   (with props)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SimpleQQParser.java   (with props)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java   (with props)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/package.html   (with props)
    lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/
    lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java   (with props)
    lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/trecQRels.txt   (with props)
    lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/trecTopics.txt   (with props)
Modified:
    lucene/java/trunk/common-build.xml
    lucene/java/trunk/contrib/benchmark/CHANGES.txt
    lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java

Modified: lucene/java/trunk/common-build.xml
URL: http://svn.apache.org/viewvc/lucene/java/trunk/common-build.xml?view=diff&rev=560372&r1=560371&r2=560372
==============================================================================
--- lucene/java/trunk/common-build.xml (original)
+++ lucene/java/trunk/common-build.xml Fri Jul 27 13:24:52 2007
@@ -284,6 +284,8 @@
     </copy>
   </target>
 
+  <property name="tests.verbose" value="false"/>
+
   <target name="test" depends="compile-test" description="Runs unit tests">
     <fail unless="junit.present">
       ##################################################################
@@ -299,6 +301,10 @@
       <assertions>
         <enable package="org.apache.lucene"/>
       </assertions>
+
+      <!-- allow tests to control debug prints -->
+      <sysproperty key="tests.verbose" value="${tests.verbose}"/>
+
       <!-- TODO: create propertyset for test properties, so each project can have its own set -->
       <sysproperty key="dataDir" file="src/test"/>
       <sysproperty key="tempDir" file="${build.dir}/test"/>

Modified: lucene/java/trunk/contrib/benchmark/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/CHANGES.txt?view=diff&rev=560372&r1=560371&r2=560372
==============================================================================
--- lucene/java/trunk/contrib/benchmark/CHANGES.txt (original)
+++ lucene/java/trunk/contrib/benchmark/CHANGES.txt Fri Jul 27 13:24:52 2007
@@ -4,6 +4,14 @@
 
 $Id:$
 
+7/27/07
+  LUCENE-836: Add support for search quality benchmarking, running 
+  a set of queries against a searcher, and, optionally produce a submission
+  report, and, if query judgements are available, compute quality measures:
+  recall, precision_at_N, average_precision, MAP. TREC specific Judge (based 
+  on TREC QRels) and TREC Topics reader are included in o.a.l.benchmark.quality.trec
+  but any other format of queries and judgements can be implemented and used.
+  
 7/24/07
   LUCENE-947: Add support for creating and index "one document per
   line" from a large text file, which reduces per-document overhead of

Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/Judge.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/Judge.java?view=auto&rev=560372
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/Judge.java (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/Judge.java Fri Jul 27 13:24:52 2007
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.quality;
+
+import java.io.PrintWriter;
+
+/**
+ * Judge if a document is relevant for a quality query.
+ */
+public interface Judge {
+
+  /**
+   * Judge if document <code>docName</code> is relevant for the given quality query.
+   * @param docName name of doc tested for relevancy.
+   * @param query tested quality query. 
+   * @return true if relevant, false if not.
+   */
+  public boolean isRelevant(String docName, QualityQuery query);
+
+  /**
+   * Validate that queries and this Judge match each other.
+   * To be perfectly valid, this Judge must have some data for each and every 
+   * input quality query, and must not have any data on any other quality query.  
+   * <b>Note</b>: the quality benchmark run would not fail in case of imperfect
+   * validity, just a warning message would be logged.  
+   * @param qq quality queries to be validated.
+   * @param logger if not null, validation issues are logged.
+   * @return true if perfectly valid, false if not.
+   */
+  public boolean validateData (QualityQuery qq[], PrintWriter logger);
+  
+  /**
+   * Return the maximal recall for the input quality query. 
+   * It is the number of relevant docs this Judge "knows" for the query. 
+   * @param query the query whose maximal recall is needed.
+   */
+  public int maxRecall (QualityQuery query);
+
+}

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/Judge.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/Judge.java
------------------------------------------------------------------------------
    svn:executable = *

Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityBenchmark.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityBenchmark.java?view=auto&rev=560372
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityBenchmark.java (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityBenchmark.java Fri Jul 27 13:24:52 2007
@@ -0,0 +1,124 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.quality;
+
+import java.io.IOException;
+import java.io.PrintWriter;
+
+import org.apache.lucene.benchmark.quality.utils.DocNameExtractor;
+import org.apache.lucene.benchmark.quality.utils.SubmissionReport;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.Searcher;
+import org.apache.lucene.search.TopDocs;
+
+/**
+ * Main entry point for running a quality benchmark.
+ * <p>
+ * There are two main configurations for running a quality benchmark: <ul>
+ * <li>Against existing judgements.</li>
+ * <li>For submission (e.g. for a contest).</li>
+ * </ul>
+ * The first configuration requires a non null
+ * {@link org.apache.lucene.benchmark.quality.Judge Judge}. 
+ * The second configuration requires a non null 
+ * {@link org.apache.lucene.benchmark.quality.utils.SubmissionReport SubmissionLogger}.
+ */
+public class QualityBenchmark {
+
+  /** Quality Queries that this quality benchmark would execute. */
+  protected QualityQuery qualityQueries[];
+  
+  /** Parser for turning QualityQueries into Lucene Queries. */
+  protected QualityQueryParser qqParser;
+  
+  /** Index to be searched. */
+  protected Searcher searcher;
+
+  /** index field to extract doc name for each search result; used for judging the results. */  
+  protected String docNameField;
+
+  /**
+   * Create a QualityBenchmark.
+   * @param qqs quality queries to run.
+   * @param qqParser parser for turning QualityQueries into Lucene Queries. 
+   * @param searcher index to be searched.
+   * @param docNameField name of field containg the document name.
+   *        This allows to extract the doc name for search results,
+   *        and is important for judging the results.  
+   */
+  public QualityBenchmark(QualityQuery qqs[], QualityQueryParser qqParser, 
+      Searcher searcher, String docNameField) {
+    this.qualityQueries = qqs;
+    this.qqParser = qqParser;
+    this.searcher = searcher;
+    this.docNameField = docNameField;
+  }
+
+  /**
+   * Run the quality benchmark.
+   * @param maxResults how many results to collect for each quality query.
+   * @param judge the judge that can tell if a certain result doc is relevant for a certain quality query. 
+   *        If null, no judgements would be made. Usually null for a submission run. 
+   * @param submitRep submission report is created if non null.
+   * @param qualityLog If not null, quality run data would be printed for each query.
+   * @return QualityStats of each quality query that was executed.
+   * @throws Exception if quality benchmark failed to run.
+   */
+  public  QualityStats [] execute(int maxResults, Judge judge, SubmissionReport submitRep, 
+                                  PrintWriter qualityLog) throws Exception {
+    QualityStats stats[] = new QualityStats[qualityQueries.length]; 
+    for (int i=0; i<qualityQueries.length; i++) {
+      QualityQuery qq = qualityQueries[i];
+      // generate query
+      Query q = qqParser.parse(qq);
+      // search with this query 
+      long t1 = System.currentTimeMillis();
+      TopDocs td = searcher.search(q,null,maxResults);
+      long searchTime = System.currentTimeMillis()-t1;
+      //most likely we either submit or judge, but check both 
+      if (judge!=null) {
+        stats[i] = analyzeQueryResults(qq, q, td, judge, qualityLog, searchTime);
+      }
+      if (submitRep!=null) {
+        submitRep.report(qq,td,docNameField,searcher);
+      }
+    } 
+    return stats;
+  }
+  
+  /* Analyze/judge results for a single quality query; optionally log them. */  
+  private QualityStats analyzeQueryResults(QualityQuery qq, Query q, TopDocs td, Judge judge, PrintWriter logger, long searchTime) throws IOException {
+    QualityStats stts = new QualityStats(judge.maxRecall(qq),searchTime);
+    ScoreDoc sd[] = td.scoreDocs;
+    long t1 = System.currentTimeMillis(); // extraction of first doc name we meassure also construction of doc name extractor, just in case.
+    DocNameExtractor xt = new DocNameExtractor(docNameField);
+    for (int i=0; i<sd.length; i++) {
+      String docName = xt.docName(searcher,sd[i].doc);
+      long docNameExtractTime = System.currentTimeMillis() - t1;
+      t1 = System.currentTimeMillis();
+      boolean isRelevant = judge.isRelevant(docName,qq);
+      stts.addResult(i+1,isRelevant, docNameExtractTime);
+    }
+    if (logger!=null) {
+      logger.println(qq.getQueryID()+"  -  "+q);
+      stts.log(qq.getQueryID()+" Stats:",1,logger,"  ");
+    }
+    return stts;
+  }
+
+}

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityBenchmark.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityQuery.java?view=auto&rev=560372
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityQuery.java (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityQuery.java Fri Jul 27 13:24:52 2007
@@ -0,0 +1,87 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.quality;
+
+import java.util.Map;
+
+/**
+ * A QualityQuery has an ID and some name-value pairs.
+ * <p> 
+ * The ID allows to map the quality query with its judgements.
+ * <p>
+ * The name-value pairs are used by a 
+ * {@link org.apache.lucene.benchmark.quality.QualityQueryParser}
+ * to create a Lucene {@link org.apache.lucene.search.Query}.
+ * <p>
+ * It is very likely that name-value-pairs would be mapped into fields in a Lucene query,
+ * but it is up to the QualityQueryParser how to map - e.g. all values in a single field, 
+ * or each pair as its own field, etc., - and this of course must match the way the 
+ * searched index was constructed.
+ */
+public class QualityQuery implements Comparable {
+  private String queryID;
+  private Map nameValPairs;
+
+  /**
+   * Create a QualityQuery with given ID and name-value pairs.
+   * @param queryID ID of this quality query.
+   * @param nameValPairs the contents of this quality query.
+   */
+  public QualityQuery(String queryID, Map nameValPairs) {
+    this.queryID = queryID;
+    this.nameValPairs = nameValPairs;
+  }
+  
+  /**
+   * Return all the names of name-value-pairs in this QualityQuery.
+   */
+  public String[] getNames() {
+    return (String[]) nameValPairs.keySet().toArray(new String[0]);
+  }
+
+  /**
+   * Return the value of a certain name-value pair.
+   * @param name the name whose value should be returned. 
+   */
+  public String getValue(String name) {
+    return (String) nameValPairs.get(name);
+  }
+
+  /**
+   * Return the ID of this query.
+   * The ID allows to map the quality query with its judgements.
+   */
+  public String getQueryID() {
+    return queryID;
+  }
+
+  /* for a nicer sort of input queries before running them.
+   * Try first as ints, fall back to string if not int. */ 
+  public int compareTo(Object o) {
+    QualityQuery other = (QualityQuery) o;
+    try {
+      // compare as ints when ids ints
+      int n = Integer.parseInt(queryID);
+      int nOther = Integer.parseInt(other.queryID);
+      return n - nOther;
+    } catch (NumberFormatException e) {
+      // fall back to string comparison
+      return queryID.compareTo(other.queryID);
+    }
+  }
+  
+}

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityQuery.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityQuery.java
------------------------------------------------------------------------------
    svn:executable = *

Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityQueryParser.java?view=auto&rev=560372
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityQueryParser.java (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityQueryParser.java Fri Jul 27 13:24:52 2007
@@ -0,0 +1,34 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.quality;
+
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.search.Query;
+
+/**
+ * Parse a QualityQuery into a Lucene query.
+ */
+public interface QualityQueryParser {
+
+  /**
+   * Parse a given QualityQuery into a Lucene query.
+   * @param qq the quality query to be parsed.
+   * @throws ParseException if parsing failed.
+   */
+  public Query parse(QualityQuery qq) throws ParseException;
+  
+}

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityQueryParser.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityQueryParser.java
------------------------------------------------------------------------------
    svn:executable = *

Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityStats.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityStats.java?view=auto&rev=560372
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityStats.java (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityStats.java Fri Jul 27 13:24:52 2007
@@ -0,0 +1,266 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.quality;
+
+import java.io.PrintWriter;
+import java.text.NumberFormat;
+import java.util.ArrayList;
+
+/**
+ * Results of quality benchmark run for a single query or for a set of queries.
+ */
+public class QualityStats {
+
+  /** Number of points for which precision is computed. */
+  public static final int MAX_POINTS = 20;
+  
+  private double maxGoodPoints;
+  private double recall;
+  private double pAt[];
+  private double pReleventSum = 0;
+  private double numPoints = 0;
+  private double numGoodPoints = 0;
+  private long searchTime;
+  private long docNamesExtractTime;
+
+  /**
+   * A certain rank in which a relevant doc was found.
+   */
+  public static class RecallPoint {
+    private int rank;
+    private double recall;
+    private RecallPoint(int rank, double recall) {
+      this.rank = rank;
+      this.recall = recall;
+    }
+    /** Returns the rank: where on the list of returned docs this relevant doc appeared. */
+    public int getRank() {
+      return rank;
+    }
+    /** Returns the recall: how many relevant docs were returned up to this point, inclusive. */
+    public double getRecall() {
+      return recall;
+    }
+  }
+  
+  private ArrayList recallPoints;
+  
+  /**
+   * Construct a QualityStats object with anticipated maximal number of relevant hits. 
+   * @param maxGoodPoints maximal possible relevant hits.
+   */
+  public QualityStats(double maxGoodPoints, long searchTime) {
+    this.maxGoodPoints = maxGoodPoints;
+    this.searchTime = searchTime;
+    this.recallPoints = new ArrayList();
+    pAt = new double[MAX_POINTS+1]; // pAt[0] unused. 
+  }
+
+  /**
+   * Add a (possibly relevant) doc.
+   * @param n rank of the added doc (its ordinal position within the query results).
+   * @param isRelevant true if the added doc is relevant, false otherwise.
+   */
+  public void addResult(int n, boolean isRelevant, long docNameExtractTime) {
+    if (Math.abs(numPoints+1 - n) > 1E-6) {
+      throw new IllegalArgumentException("point "+n+" illegal after "+numPoints+" points!");
+    }
+    if (isRelevant) {
+      numGoodPoints+=1;
+      recallPoints.add(new RecallPoint(n,numGoodPoints));
+    }
+    numPoints = n;
+    double p = numGoodPoints / numPoints;
+    if (isRelevant) {
+      pReleventSum += p;
+    }
+    if (n<pAt.length) {
+      pAt[n] = p;
+    }
+    recall = maxGoodPoints<=0 ? p : numGoodPoints/maxGoodPoints;
+    docNamesExtractTime += docNameExtractTime;
+  }
+
+  /**
+   * Return the precision at rank n:
+   * |{relevant hits within first <code>n</code> hits}| / <code>n</code>.
+   * @param n requested precision point, must be at least 1 and at most {@link #MAX_POINTS}. 
+   */
+  public double getPrecisionAt(int n) {
+    if (n<1 || n>MAX_POINTS) {
+      throw new IllegalArgumentException("n="+n+" - but it must be in [1,"+MAX_POINTS+"] range!"); 
+    }
+    if (n>numPoints) {
+      return (numPoints * pAt[(int)numPoints])/n;
+    }
+    return pAt[n];
+  }
+
+  /**
+   * Return the average precision at recall points: sum of precision at recall points / maxGoodPoints.
+   */
+  public double getAvp() {
+    return maxGoodPoints==0 ? 0 : pReleventSum/maxGoodPoints;
+  }
+  
+  /**
+   * Return the recall: |{relevant hits}| / |{hits}|.
+   */
+  public double getRecall() {
+    return recall;
+  }
+
+  /**
+   * Log information on this QualityStats object.
+   * @param logger Logger.
+   * @param prefix prefix before each log line.
+   */
+  public void log(String title, int paddLines, PrintWriter logger, String prefix) {
+    for (int i=0; i<paddLines; i++) {  
+      logger.println();
+    }
+    if (title!=null && title.trim().length()>0) {
+      logger.println(title);
+    }
+    prefix = prefix==null ? "" : prefix;
+    NumberFormat nf = NumberFormat.getInstance();
+    nf.setMaximumFractionDigits(3);
+    nf.setMinimumFractionDigits(3);
+    nf.setGroupingUsed(true);
+    int M = 19;
+    logger.println(prefix+format("Search Seconds: ",M)+
+        fracFormat(nf.format((double)searchTime/1000)));
+    logger.println(prefix+format("DocName Seconds: ",M)+
+        fracFormat(nf.format((double)docNamesExtractTime/1000)));
+    logger.println(prefix+format("Num Points: ",M)+
+        fracFormat(nf.format(numPoints)));
+    logger.println(prefix+format("Num Good Points: ",M)+
+        fracFormat(nf.format(numGoodPoints)));
+    logger.println(prefix+format("Max Good Points: ",M)+
+        fracFormat(nf.format(maxGoodPoints)));
+    logger.println(prefix+format("Average Precision: ",M)+
+        fracFormat(nf.format(getAvp())));
+    logger.println(prefix+format("Recall: ",M)+
+        fracFormat(nf.format(getRecall())));
+    for (int i=1; i<(int)numPoints && i<pAt.length; i++) {
+      logger.println(prefix+format("Precision At "+i+": ",M)+
+          fracFormat(nf.format(getPrecisionAt(i))));
+    }
+    for (int i=0; i<paddLines; i++) {  
+      logger.println();
+    }
+  }
+
+  private static String padd = "                                    ";
+  private String format(String s, int minLen) {
+    s = (s==null ? "" : s);
+    int n = Math.max(minLen,s.length());
+    return (s+padd).substring(0,n);
+  }
+  private String fracFormat(String frac) {
+    int k = frac.indexOf('.');
+    String s1 = padd+frac.substring(0,k);
+    int n = Math.max(k,6);
+    s1 = s1.substring(s1.length()-n);
+    return s1 + frac.substring(k);
+  }
+  
+  /**
+   * Create a QualityStats object that is the average of the input QualityStats objects. 
+   * @param stats array of input stats to be averaged.
+   * @return an average over the input stats.
+   */
+  public static QualityStats average(QualityStats[] stats) {
+    QualityStats avg = new QualityStats(0,0);
+    int m = 0; // queries with positive judgements
+    // aggregate
+    for (int i=0; i<stats.length; i++) {
+      avg.searchTime += stats[i].searchTime;
+      avg.docNamesExtractTime += stats[i].docNamesExtractTime;
+      if (stats[i].maxGoodPoints>0) {
+        m++;
+        avg.numGoodPoints += stats[i].numGoodPoints;
+        avg.numPoints += stats[i].numPoints;
+        avg.pReleventSum += stats[i].getAvp();
+        avg.recall += stats[i].recall;
+        avg.maxGoodPoints += stats[i].maxGoodPoints;
+        for (int j=1; j<avg.pAt.length; j++) {
+          avg.pAt[j] += stats[i].getPrecisionAt(j);
+        }
+      }
+    }
+    assert m>0 : "Fishy: no \"good\" queries!";
+    // take average: times go by all queries, other meassures go by "good" queries noly.
+    avg.searchTime /= stats.length;
+    avg.docNamesExtractTime /= stats.length;
+    avg.numGoodPoints /= m;
+    avg.numPoints /= m;
+    avg.recall /= m;
+    avg.maxGoodPoints /= m;
+    for (int j=1; j<avg.pAt.length; j++) {
+      avg.pAt[j] /= m;
+    }
+    avg.pReleventSum /= m;                 // this is actually avgp now 
+    avg.pReleventSum *= avg.maxGoodPoints; // so that getAvgP() would be correct
+    
+    return avg;
+  }
+
+  /**
+   * Returns the time it took to extract doc names for judging the measured query, in milliseconds.
+   */
+  public long getDocNamesExtractTime() {
+    return docNamesExtractTime;
+  }
+
+  /**
+   * Returns the maximal number of good points.
+   * This is the number of relevant docs known by the judge for the measured query.
+   */
+  public double getMaxGoodPoints() {
+    return maxGoodPoints;
+  }
+
+  /**
+   * Returns the number of good points (only relevant points).
+   */
+  public double getNumGoodPoints() {
+    return numGoodPoints;
+  }
+
+  /**
+   * Returns the number of points (both relevant and irrelevant points).
+   */
+  public double getNumPoints() {
+    return numPoints;
+  }
+
+  /**
+   * Returns the recallPoints.
+   */
+  public RecallPoint [] getRecallPoints() {
+    return (RecallPoint[]) recallPoints.toArray(new RecallPoint[0]);
+  }
+
+  /**
+   * Returns the search time in milliseconds for the measured query.
+   */
+  public long getSearchTime() {
+    return searchTime;
+  }
+
+}

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityStats.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/package.html
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/package.html?view=auto&rev=560372
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/package.html (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/package.html Fri Jul 27 13:24:52 2007
@@ -0,0 +1,65 @@
+<html>
+<body>
+<h2>Search Quality Benchmarking.</h2>
+<p>
+This package allows to benchmark search quality of a Lucene application.
+<p>
+In order to use this package you should provide:
+<ul>
+  <li>A <a href="../../search/Searcher.html">searcher</a>.</li>
+  <li><a href="QualityQuery.html">Quality queries</a>.</li>
+  <li><a href="Judge.html">Judging object</a>.</li>
+  <li><a href="utils/SubmissionReport.html">Reporting object</a>.</li>
+</ul>
+<p>
+For benchmarking TREC collections with TREC QRels, take a look at the 
+<a href="trec/package-summary.html">trec package</a>.
+<p>
+Here is a sample code used to run the TREC 2006 queries 701-850 on the .Gov2 collection:
+
+<pre>
+    File topicsFile = new File("topics-701-850.txt");
+    File qrelsFile = new File("qrels-701-850.txt");
+    Searcher searcher = new IndexSearcher("index");
+
+    int maxResults = 1000;
+    String docNameField = "docname"; 
+    
+    PrintWriter logger = new PrintWriter(System.out,true); 
+
+    // use trec utilities to read trec topics into quality queries
+    TrecTopicsReader qReader = new TrecTopicsReader();
+    QualityQuery qqs[] = qReader.readQueries(new BufferedReader(new FileReader(topicsFile)));
+    
+    // prepare judge, with trec utilities that read from a QRels file
+    Judge judge = new TrecJudge(new BufferedReader(new FileReader(qrelsFile)));
+    
+    // validate topics & judgments match each other
+    judge.validateData(qqs, logger);
+    
+    // set the parsing of quality queries into Lucene queries.
+    QualityQueryParser qqParser = new SimpleQQParser("title", "body");
+    
+    // run the benchmark
+    QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField);
+    SubmissionReport submitLog = null;
+    QualityStats stats[] = qrun.execute(maxResults, judge, submitLog, logger);
+    
+    // print an avarage sum of the results
+    QualityStats avg = QualityStats.average(stats);
+    avg.log("SUMMARY",2,logger, "  ");
+</pre>
+
+<p>
+Some immediate ways to modify this program to your needs are:
+<ul>
+  <li>To run on different formats of queries and judgements provide your own 
+      <a href="Judge.html">Judge</a> and 
+      <a href="QualityQuery.html">Quality queries</a>.</li>
+  <li>Create sophisticated Lucene queries by supplying a different 
+  <a href="QualityQueryParser.html">Quality query parser</a>.</li>
+</ul>
+
+</body>
+
+</html>

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/package.html
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/package.html
------------------------------------------------------------------------------
    svn:executable = *

Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecJudge.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecJudge.java?view=auto&rev=560372
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecJudge.java (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecJudge.java Fri Jul 27 13:24:52 2007
@@ -0,0 +1,158 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.quality.trec;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.StringTokenizer;
+
+import org.apache.lucene.benchmark.quality.Judge;
+import org.apache.lucene.benchmark.quality.QualityQuery;
+
+/**
+ * Judge if given document is relevant to given quality query, based on Trec format for judgements.
+ */
+public class TrecJudge implements Judge {
+
+  HashMap judgements;
+  
+  /**
+   * Constructor from a reader.
+   * <p>
+   * Expected input format:
+   * <pre>
+   *     qnum  0   doc-name     is-relevant
+   * </pre> 
+   * Two sample lines:
+   * <pre> 
+   *     19    0   doc303       1
+   *     19    0   doc7295      0
+   * </pre> 
+   * @param reader where judgments are read from.
+   * @throws IOException 
+   */
+  public TrecJudge (BufferedReader reader) throws IOException {
+    judgements = new HashMap();
+    QRelJudgement curr = null;
+    String zero = "0";
+    String line;
+    
+    try {
+      while (null!=(line=reader.readLine())) {
+        line = line.trim();
+        if (line.length()==0 || '#'==line.charAt(0)) {
+          continue;
+        }
+        StringTokenizer st = new StringTokenizer(line);
+        String queryID = st.nextToken();
+        st.nextToken();
+        String docName = st.nextToken();
+        boolean relevant = !zero.equals(st.nextToken());
+        assert !st.hasMoreTokens() : "wrong format: "+line+"  next: "+st.nextToken();
+        if (relevant) { // only keep relevant docs
+          if (curr==null || !curr.queryID.equals(queryID)) {
+            curr = (QRelJudgement)judgements.get(queryID);
+            if (curr==null) {
+              curr = new QRelJudgement(queryID);
+              judgements.put(queryID,curr);
+            }
+          }
+          curr.addRelevandDoc(docName);
+        }
+      }
+    } finally {
+      reader.close();
+    }
+  }
+  
+  // inherit javadocs
+  public boolean isRelevant(String docName, QualityQuery query) {
+    QRelJudgement qrj = (QRelJudgement) judgements.get(query.getQueryID());
+    return qrj!=null && qrj.isRelevant(docName);
+  }
+
+  /** single Judgement of a trec quality query */
+  private static class QRelJudgement {
+    private String queryID;
+    private HashMap relevantDocs;
+    
+    QRelJudgement(String queryID) {
+      this.queryID = queryID;
+      relevantDocs = new HashMap();
+    }
+    
+    public void addRelevandDoc(String docName) {
+      relevantDocs.put(docName,docName);
+    }
+
+    boolean isRelevant(String docName) {
+      return relevantDocs.containsKey(docName);
+    }
+
+    public int maxRecall() {
+      return relevantDocs.size();
+    }
+  }
+
+  // inherit javadocs
+  public boolean validateData(QualityQuery[] qq, PrintWriter logger) {
+    HashMap missingQueries = (HashMap) judgements.clone();
+    ArrayList missingJudgements = new ArrayList();
+    for (int i=0; i<qq.length; i++) {
+      String id = qq[i].getQueryID();
+      if (missingQueries.containsKey(id)) {
+        missingQueries.remove(id);
+      } else {
+        missingJudgements.add(id);
+      }
+    }
+    boolean isValid = true;
+    if (missingJudgements.size()>0) {
+      isValid = false;
+      if (logger!=null) {
+        logger.println("WARNING: "+missingJudgements.size()+" queries have no judgments! - ");
+        for (int i=0; i<missingJudgements.size(); i++) {
+          logger.println("   "+(String)missingJudgements.get(i));
+        }
+      }
+    }
+    if (missingQueries.size()>0) {
+      isValid = false;
+      if (logger!=null) {
+        logger.println("WARNING: "+missingQueries.size()+" judgments match no query! - ");
+        for (Iterator it = missingQueries.keySet().iterator(); it.hasNext();) {
+          String id = (String) it.next();
+          logger.println("   "+id);
+        }
+      }
+    }
+    return isValid;
+  }
+
+  // inherit javadocs
+  public int maxRecall(QualityQuery query) {
+    QRelJudgement qrj = (QRelJudgement) judgements.get(query.getQueryID());
+    if (qrj!=null) {
+      return qrj.maxRecall();
+    }
+    return 0;
+  }
+}

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecJudge.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecTopicsReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecTopicsReader.java?view=auto&rev=560372
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecTopicsReader.java (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecTopicsReader.java Fri Jul 27 13:24:52 2007
@@ -0,0 +1,123 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.quality.trec;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+
+import org.apache.lucene.benchmark.quality.QualityQuery;
+
+/**
+ * Read TREC topics.
+ * <p>
+ * Expects this topic format -
+ * <pre>
+ *   &lt;top&gt;
+ *   &lt;num&gt; Number: nnn
+ *     
+ *   &lt;title&gt; title of the topic
+ *     
+ *   &lt;desc&gt; Description:
+ *   description of the topic
+ *     
+ *   &lt;narr&gt; Narrative:
+ *   "story" composed by assessors.
+ *    
+ *   &lt;/top&gt;
+ * </pre>
+ * Comment lines starting with '#' are ignored.
+ */
+public class TrecTopicsReader {
+
+  private static final String newline = System.getProperty("line.separator");
+  
+  /**
+   *  Constructor for Trec's TopicsReader
+   */
+  public TrecTopicsReader() {
+    super();
+  }
+
+  /**
+   * Read quality queries from trec format topics file.
+   * @param reader where queries are read from.
+   * @return the result quality queries.
+   * @throws IOException if cannot read the queries.
+   */
+  public QualityQuery[] readQueries(BufferedReader reader) throws IOException {
+    ArrayList res = new ArrayList();
+    StringBuffer sb;
+    try {
+      while (null!=(sb=read(reader,"<top>",null,false,false))) {
+        HashMap fields = new HashMap();
+        // id
+        sb = read(reader,"<num>",null,true,false);
+        int k = sb.indexOf(":");
+        String id = sb.substring(k+1).trim();
+        // title
+        sb = read(reader,"<title>",null,true,false);
+        k = sb.indexOf(">");
+        String title = sb.substring(k+1).trim();
+        // description
+        sb = read(reader,"<desc>",null,false,false);
+        sb = read(reader,"<narr>",null,false,true);
+        String descripion = sb.toString().trim();
+        // we got a topic!
+        fields.put("title",title);
+        fields.put("description",descripion);
+        QualityQuery topic = new QualityQuery(id,fields);
+        res.add(topic);
+        // skip narrative, get to end of doc
+        read(reader,"</top>",null,false,false);
+      }
+    } finally {
+      reader.close();
+    }
+    // sort result array (by ID) 
+    QualityQuery qq[] = (QualityQuery[]) res.toArray(new QualityQuery[0]);
+    Arrays.sort(qq);
+    return qq;
+  }
+
+  // read until finding a line that starts with the specified prefix
+  private StringBuffer read (BufferedReader reader, String prefix, StringBuffer sb, boolean collectMatchLine, boolean collectAll) throws IOException {
+    sb = (sb==null ? new StringBuffer() : sb);
+    String sep = "";
+    while (true) {
+      String line = reader.readLine();
+      if (line==null) {
+        return null;
+      }
+      if (line.startsWith(prefix)) {
+        if (collectMatchLine) {
+          sb.append(sep+line);
+          sep = newline;
+        }
+        break;
+      }
+      if (collectAll) {
+        sb.append(sep+line);
+        sep = newline;
+      }
+    }
+    //System.out.println("read: "+sb);
+    return sb;
+  }
+}

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecTopicsReader.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/package.html
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/package.html?view=auto&rev=560372
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/package.html (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/package.html Fri Jul 27 13:24:52 2007
@@ -0,0 +1,6 @@
+<html>
+<body>
+Utilities for Trec related quality benchmarking, feeding from Trec Topics and QRels inputs.
+</body>
+
+</html>

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/package.html
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/package.html
------------------------------------------------------------------------------
    svn:executable = *

Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/DocNameExtractor.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/DocNameExtractor.java?view=auto&rev=560372
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/DocNameExtractor.java (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/DocNameExtractor.java Fri Jul 27 13:24:52 2007
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.quality.utils;
+
+import java.io.IOException;
+
+import org.apache.lucene.document.FieldSelector;
+import org.apache.lucene.document.FieldSelectorResult;
+import org.apache.lucene.search.Searcher;
+
+/**
+ * Utility: extract doc names from an index
+ */
+public class DocNameExtractor {
+
+  private FieldSelector fldSel;
+  private String docNameField;
+  
+  /**
+   * Constructor for DocNameExtractor.
+   * @param docNameField name of the stored field containing the doc name. 
+   */
+  public DocNameExtractor (final String docNameField) {
+    this.docNameField = docNameField;
+    fldSel = new FieldSelector() {
+      public FieldSelectorResult accept(String fieldName) {
+        return fieldName.equals(docNameField) ? 
+            FieldSelectorResult.LOAD_AND_BREAK :
+              FieldSelectorResult.NO_LOAD;
+      }
+    };
+  }
+  
+  /**
+   * Extract the name of the input doc from the index.
+   * @param searcher access to the index.
+   * @param docid ID of doc whose name is needed.
+   * @return the name of the input doc as extracted from the index.
+   * @throws IOException if cannot extract the doc name from the index.
+   */
+  public String docName(Searcher searcher, int docid) throws IOException {
+    return searcher.doc(docid,fldSel).get(docNameField);
+  }
+  
+}

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/DocNameExtractor.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/DocNameExtractor.java
------------------------------------------------------------------------------
    svn:executable = *

Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java?view=auto&rev=560372
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java Fri Jul 27 13:24:52 2007
@@ -0,0 +1,135 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.quality.utils;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.PriorityQueue;
+
+/**
+ * Suggest Quality queries based on an index contents.
+ * Utility class, used for making quality test benchmarks.
+ */
+public class QualityQueriesFinder {
+
+  private static final String newline = System.getProperty("line.separator");
+  private Directory dir;
+  
+  /**
+   * Constrctor over a directory containing the index.
+   * @param dir directory containing the index we search for the quality test. 
+   */
+  private QualityQueriesFinder(Directory dir) {
+    this.dir = dir;
+  }
+
+  /**
+   * @param args {index-dir}
+   * @throws IOException  if cannot access the index.
+   */
+  public static void main(String[] args) throws IOException {
+    if (args.length<1) {
+      System.err.println("Usage: java QualityQueriesFinder <index-dir>");
+      System.exit(1);
+    }
+    QualityQueriesFinder qqf = new QualityQueriesFinder(FSDirectory.getDirectory(new File(args[0])));
+    String q[] = qqf.bestQueries("body",20);
+    for (int i=0; i<q.length; i++) {
+      System.out.println(newline+formatQueryAsTrecTopic(i,q[i],null,null));
+    }
+  }
+
+  private String [] bestQueries(String field,int numQueries) throws IOException {
+    String words[] = bestTerms("body",4*numQueries);
+    int n = words.length;
+    int m = n/4;
+    String res[] = new String[m];
+    for (int i=0; i<res.length; i++) {
+      res[i] = words[i] + " " + words[m+i]+ "  " + words[n-1-m-i]  + " " + words[n-1-i];
+      //System.out.println("query["+i+"]:  "+res[i]);
+    }
+    return res;
+  }
+  
+  private static String formatQueryAsTrecTopic (int qnum, String title, String description, String narrative) {
+    return 
+      "<top>" + newline +
+      "<num> Number: " + qnum             + newline + newline + 
+      "<title> " + (title==null?"":title) + newline + newline + 
+      "<desc> Description:"               + newline +
+      (description==null?"":description)  + newline + newline +
+      "<narr> Narrative:"                 + newline +
+      (narrative==null?"":narrative)      + newline + newline +
+      "</top>";
+  }
+  
+  private String [] bestTerms(String field,int numTerms) throws IOException {
+    PriorityQueue pq = new TermsDfQueue(numTerms);
+    IndexReader ir = IndexReader.open(dir);
+    try {
+      int threshold = ir.maxDoc() / 10; // ignore words too common.
+      TermEnum terms = ir.terms(new Term(field,""));
+      while (terms.next()) {
+        if (!field.equals(terms.term().field())) {
+          break;
+        }
+        int df = terms.docFreq();
+        if (df<threshold) {
+          String ttxt = terms.term().text();
+          pq.insert(new TermDf(ttxt,df));
+        }
+      }
+    } finally {
+      ir.close();
+    }
+    String res[] = new String[pq.size()];
+    int i = 0;
+    while (pq.size()>0) {
+      TermDf tdf = (TermDf) pq.pop(); 
+      res[i++] = tdf.word;
+      System.out.println(i+".   word:  "+tdf.df+"   "+tdf.word);
+    }
+    return res;
+  }
+
+  private static class TermDf {
+    String word;
+    int df;
+    TermDf (String word, int freq) {
+      this.word = word;
+      this.df = freq;
+    }
+  }
+  
+  private static class TermsDfQueue extends PriorityQueue {
+    TermsDfQueue (int maxSize) {
+      initialize(maxSize);
+    }
+    protected boolean lessThan(Object a, Object b) {
+      TermDf tf1 = (TermDf) a;
+      TermDf tf2 = (TermDf) b;
+      return tf1.df < tf2.df;
+    }
+  }
+  
+}

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java
------------------------------------------------------------------------------
    svn:executable = *

Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SimpleQQParser.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SimpleQQParser.java?view=auto&rev=560372
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SimpleQQParser.java (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SimpleQQParser.java Fri Jul 27 13:24:52 2007
@@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.quality.utils;
+
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.benchmark.quality.QualityQuery;
+import org.apache.lucene.benchmark.quality.QualityQueryParser;
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.search.Query;
+
+/**
+ * Simplistic quality query parser. A Lucene query is created by passing 
+ * the value of the specified QualityQuery name-value pair into 
+ * a Lucene's QueryParser using StandardAnalyzer. */
+public class SimpleQQParser implements QualityQueryParser {
+
+  private String qqName;
+  private String indexField;
+  ThreadLocal queryParser = new ThreadLocal();
+
+  /**
+   * Constructor of a simple qq parser.
+   * @param qqName name-value pair of quality query to use for creating the query
+   * @param indexField corresponding index field  
+   */
+  public SimpleQQParser(String qqName, String indexField) {
+    this.qqName = qqName;
+    this.indexField = indexField;
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.lucene.benchmark.quality.QualityQueryParser#parse(org.apache.lucene.benchmark.quality.QualityQuery)
+   */
+  public Query parse(QualityQuery qq) throws ParseException {
+    QueryParser qp = (QueryParser) queryParser.get();
+    if (qp==null) {
+      qp = new QueryParser(indexField, new StandardAnalyzer());
+      queryParser.set(qp);
+    }
+    return qp.parse(qq.getValue(qqName));
+  }
+
+}

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SimpleQQParser.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SimpleQQParser.java
------------------------------------------------------------------------------
    svn:executable = *

Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java?view=auto&rev=560372
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java Fri Jul 27 13:24:52 2007
@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.quality.utils;
+
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.text.NumberFormat;
+
+import org.apache.lucene.benchmark.quality.QualityQuery;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.Searcher;
+import org.apache.lucene.search.TopDocs;
+
+/**
+ * Create a log ready for submission.
+ * Extend this class and override
+ * {@link #report(QualityQuery, TopDocs, String, Searcher)}
+ * to create different reports. 
+ */
+public class SubmissionReport {
+
+  private NumberFormat nf;
+  private PrintWriter logger;
+  
+  /**
+   * Constructor for SubmissionReport.
+   * @param logger if null, no submission data is created. 
+   */
+  public SubmissionReport (PrintWriter logger) {
+    this.logger = logger;
+    nf = NumberFormat.getInstance();
+    nf.setMaximumFractionDigits(4);
+    nf.setMinimumFractionDigits(4);
+  }
+  
+  /**
+   * Report a search result for a certain quality query.
+   * @param qq quality query for which the results are reported.
+   * @param td search results for the query.
+   * @param docNameField stored field used for fetching the result doc name.  
+   * @param searcher index access for fetching doc name.
+   * @throws IOException in case of a problem.
+   */
+  public void report(QualityQuery qq, TopDocs td, String docNameField, Searcher searcher) throws IOException {
+    if (logger==null) {
+      return;
+    }
+    ScoreDoc sd[] = td.scoreDocs;
+    String sep = " \t ";
+    DocNameExtractor xt = new DocNameExtractor(docNameField);
+    for (int i=0; i<sd.length; i++) {
+      String docName = xt.docName(searcher,sd[i].doc);
+      logger.println(
+          qq.getQueryID()       + sep +
+          '0'                   + sep +
+          format(docName,20)    + sep +
+          format(""+i,7)        + sep +
+          nf.format(sd[i].score)
+          );
+    }
+  }
+
+  private static String padd = "                                    ";
+  private String format(String s, int minLen) {
+    s = (s==null ? "" : s);
+    int n = Math.max(minLen,s.length());
+    return (s+padd).substring(0,n);
+  }
+}

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/package.html
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/package.html?view=auto&rev=560372
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/package.html (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/package.html Fri Jul 27 13:24:52 2007
@@ -0,0 +1,6 @@
+<html>
+<body>
+Miscellaneous utilities for search quality benchmarking: query parsing, submission reports.
+</body>
+
+</html>

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/package.html
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/package.html
------------------------------------------------------------------------------
    svn:executable = *

Modified: lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java?view=diff&rev=560372&r1=560371&r2=560372
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java Fri Jul 27 13:24:52 2007
@@ -23,6 +23,9 @@
 import java.io.BufferedReader;
 
 import org.apache.lucene.benchmark.byTask.Benchmark;
+import org.apache.lucene.benchmark.byTask.feeds.DocData;
+import org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException;
+import org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker;
 import org.apache.lucene.benchmark.byTask.tasks.CountingSearchTestTask;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
@@ -135,8 +138,8 @@
     // 1. alg definition (required in every "logic" test)
     String algLines[] = {
         "# ----- properties ",
-        "doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker",
-        "doc.add.log.step=2697",
+        "doc.maker="+Reuters20DocMaker.class.getName(),
+        "doc.add.log.step=3",
         "doc.term.vector=false",
         "doc.maker.forever=false",
         "directory=FSDirectory",
@@ -153,7 +156,7 @@
 
     // 3. test number of docs in the index
     IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());
-    int ndocsExpected = 21578; // that's how many docs there are in the Reuters collecton.
+    int ndocsExpected = 20; // Reuters20DocMaker exhausts after 20 docs.
     assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
     ir.close();
   }
@@ -221,7 +224,7 @@
   }
   
   // create the benchmark and execute it. 
-  private Benchmark execBenchmark(String[] algLines) throws Exception {
+  public static Benchmark execBenchmark(String[] algLines) throws Exception {
     String algText = algLinesToText(algLines);
     logTstLogic(algText);
     Benchmark benchmark = new Benchmark(new StringReader(algText));
@@ -230,7 +233,7 @@
   }
   
   // catenate alg lines to make the alg text
-  private String algLinesToText(String[] algLines) {
+  private static String algLinesToText(String[] algLines) {
     String indent = "  ";
     StringBuffer sb = new StringBuffer();
     for (int i = 0; i < propLines.length; i++) {
@@ -242,11 +245,22 @@
     return sb.toString();
   }
 
-  private void logTstLogic (String txt) {
+  private static void logTstLogic (String txt) {
     if (!DEBUG) 
       return;
     System.out.println("Test logic of:");
     System.out.println(txt);
   }
 
+  /** use reuters and the exhaust mechanism, but to be faster, add 20 docs only... */
+  public static class Reuters20DocMaker extends ReutersDocMaker {
+    private int nDocs=0;
+    protected DocData getNextDocData() throws Exception {
+      if (nDocs>=20 && !forever) {
+        throw new NoMoreDataException();
+      }
+      nDocs++;
+      return super.getNextDocData();
+    }
+  }
 }

Added: lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java?view=auto&rev=560372
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java (added)
+++ lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java Fri Jul 27 13:24:52 2007
@@ -0,0 +1,174 @@
+package org.apache.lucene.benchmark.quality;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.PrintWriter;
+
+import org.apache.lucene.benchmark.byTask.TestPerfTasksLogic;
+import org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker;
+import org.apache.lucene.benchmark.quality.Judge;
+import org.apache.lucene.benchmark.quality.QualityQuery;
+import org.apache.lucene.benchmark.quality.QualityQueryParser;
+import org.apache.lucene.benchmark.quality.QualityBenchmark;
+import org.apache.lucene.benchmark.quality.trec.TrecJudge;
+import org.apache.lucene.benchmark.quality.trec.TrecTopicsReader;
+import org.apache.lucene.benchmark.quality.utils.SimpleQQParser;
+import org.apache.lucene.benchmark.quality.utils.SubmissionReport;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.store.FSDirectory;
+
+import junit.framework.TestCase;
+
+/**
+ * Test that quality run does its job.
+ */
+public class TestQualityRun extends TestCase {
+
+  private static boolean DEBUG = Boolean.getBoolean("tests.verbose");
+  
+  /**
+   * @param arg0
+   */
+  public TestQualityRun(String name) {
+    super(name);
+  }
+
+  public void testTrecQuality() throws Exception {
+    // first create the complete reuters index
+    createReutersIndex();
+    
+    File workDir = new File(System.getProperty("benchmark.work.dir","work"));
+    assertTrue("Bad workDir: "+workDir, workDir.exists()&& workDir.isDirectory());
+
+    int maxResults = 1000;
+    String docNameField = "docid"; 
+    
+    PrintWriter logger = DEBUG ? new PrintWriter(System.out,true) : null;
+
+    // <tests src dir> for topics/qrels files - src/test/org/apache/lucene/benchmark/quality
+    File srcTestDir = new File(new File(new File(new File(new File(
+      new File(new File(workDir.getAbsoluteFile().getParentFile(),
+        "src"),"test"),"org"),"apache"),"lucene"),"benchmark"),"quality");
+    
+    // prepare topics
+    File topicsFile = new File(srcTestDir, "trecTopics.txt");
+    assertTrue("Bad topicsFile: "+topicsFile, topicsFile.exists()&& topicsFile.isFile());
+    TrecTopicsReader qReader = new TrecTopicsReader();
+    QualityQuery qqs[] = qReader.readQueries(new BufferedReader(new FileReader(topicsFile)));
+    
+    // prepare judge
+    File qrelsFile = new File(srcTestDir, "trecQRels.txt");
+    assertTrue("Bad qrelsFile: "+qrelsFile, qrelsFile.exists()&& qrelsFile.isFile());
+    Judge judge = new TrecJudge(new BufferedReader(new FileReader(qrelsFile)));
+    
+    // validate topics & judgments match each other
+    judge.validateData(qqs, logger);
+    
+    IndexSearcher searcher = new IndexSearcher(FSDirectory.getDirectory(new File(workDir,"index")));
+
+    QualityQueryParser qqParser = new SimpleQQParser("title","body");
+    QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField);
+    
+    SubmissionReport submitLog = DEBUG ? new SubmissionReport(logger) : null;
+    QualityStats stats[] = qrun.execute(maxResults, judge, submitLog, logger);
+    
+    // --------- verify by the way judgments were altered for this test:
+    // for some queries, depending on m = qnum % 8
+    // m==0: avg_precision and recall are hurt, by marking fake docs as relevant
+    // m==1: precision_at_n and avg_precision are hurt, by unmarking relevant docs
+    // m==2: all precision, precision_at_n and recall are hurt.
+    // m>=3: these queries remain perfect
+    for (int i = 0; i < stats.length; i++) {
+      QualityStats s = stats[i];
+      switch (i%8) {
+
+      case 0:
+        assertTrue("avg-p should be hurt: "+s.getAvp(), 1.0 > s.getAvp());
+        assertTrue("recall should be hurt: "+s.getRecall(), 1.0 > s.getRecall());
+        for (int j = 1; j <= QualityStats.MAX_POINTS; j++) {
+          assertEquals("p_at_"+j+" should be perfect: "+s.getPrecisionAt(j), 1.0, s.getPrecisionAt(j), 1E-9);
+        }
+        break;
+      
+      case 1:
+        assertTrue("avg-p should be hurt", 1.0 > s.getAvp());
+        assertEquals("recall should be perfect: "+s.getRecall(), 1.0, s.getRecall(), 1E-9);
+        for (int j = 1; j <= QualityStats.MAX_POINTS; j++) {
+          assertTrue("p_at_"+j+" should be hurt: "+s.getPrecisionAt(j), 1.0 > s.getPrecisionAt(j));
+        }
+        break;
+
+      case 2:
+        assertTrue("avg-p should be hurt: "+s.getAvp(), 1.0 > s.getAvp());
+        assertTrue("recall should be hurt: "+s.getRecall(), 1.0 > s.getRecall());
+        for (int j = 1; j <= QualityStats.MAX_POINTS; j++) {
+          assertTrue("p_at_"+j+" should be hurt: "+s.getPrecisionAt(j), 1.0 > s.getPrecisionAt(j));
+        }
+        break;
+
+      default: {
+        assertEquals("avg-p should be perfect: "+s.getAvp(), 1.0, s.getAvp(), 1E-9);
+        assertEquals("recall should be perfect: "+s.getRecall(), 1.0, s.getRecall(), 1E-9);
+        for (int j = 1; j <= QualityStats.MAX_POINTS; j++) {
+          assertEquals("p_at_"+j+" should be perfect: "+s.getPrecisionAt(j), 1.0, s.getPrecisionAt(j), 1E-9);
+        }
+      }
+      
+      }
+    }
+    
+    QualityStats avg = QualityStats.average(stats);
+    if (logger!=null) {
+      avg.log("Average statistis:",1,logger,"  ");
+    }
+    
+    assertTrue("mean avg-p should be hurt: "+avg.getAvp(), 1.0 > avg.getAvp());
+    assertTrue("avg recall should be hurt: "+avg.getRecall(), 1.0 > avg.getRecall());
+    for (int j = 1; j <= QualityStats.MAX_POINTS; j++) {
+      assertTrue("avg p_at_"+j+" should be hurt: "+avg.getPrecisionAt(j), 1.0 > avg.getPrecisionAt(j));
+    }
+
+    
+  }
+
+  // use benchmark logic to create the full Reuters index
+  private void createReutersIndex() throws Exception {
+    // 1. alg definition
+    String algLines[] = {
+        "# ----- properties ",
+        "doc.maker="+ReutersDocMaker.class.getName(),
+        "doc.add.log.step=2500",
+        "doc.term.vector=false",
+        "doc.maker.forever=false",
+        "directory=FSDirectory",
+        "doc.stored=true",
+        "doc.tokenized=true",
+        "# ----- alg ",
+        "ResetSystemErase",
+        "CreateIndex",
+        "{ AddDoc } : *",
+        "CloseIndex",
+    };
+    
+    // 2. execute the algorithm  (required in every "logic" test)
+    TestPerfTasksLogic.execBenchmark(algLines);
+  }
+}

Propchange: lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message