lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dor...@apache.org
Subject svn commit: r522569 - in /lucene/java/trunk/contrib/benchmark: ./ src/java/org/apache/lucene/benchmark/byTask/ src/java/org/apache/lucene/benchmark/byTask/feeds/ src/java/org/apache/lucene/benchmark/byTask/tasks/ src/java/org/apache/lucene/benchmark/by...
Date Mon, 26 Mar 2007 16:46:34 GMT
Author: doronc
Date: Mon Mar 26 09:46:33 2007
New Revision: 522569

URL: http://svn.apache.org/viewvc?view=rev&rev=522569
Log:
LUCENE-849: configurable HTML Parser; external classes; exhaustive doc maker - '*';

Added:
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DemoHTMLParser.java
  (with props)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocData.java
  (with props)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/HTMLParser.java
  (with props)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/NoMoreDataException.java
  (with props)
Modified:
    lucene/java/trunk/contrib/benchmark/CHANGES.txt
    lucene/java/trunk/contrib/benchmark/build.xml
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocMaker.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java
    lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java

Modified: lucene/java/trunk/contrib/benchmark/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/CHANGES.txt?view=diff&rev=522569&r1=522568&r2=522569
==============================================================================
--- lucene/java/trunk/contrib/benchmark/CHANGES.txt (original)
+++ lucene/java/trunk/contrib/benchmark/CHANGES.txt Mon Mar 26 09:46:33 2007
@@ -4,6 +4,13 @@
 
 $Id:$
 
+3/25/07
+
+LUCENE-849: 
+1. which HTML Parser is used is configurable with html.parser property.
+2. External classes added to classpath with -Dbenchmark.ext.classpath=path.
+3. '*' as repeating number now means "exhaust doc maker - no repetitions".
+
 3/22/07
 
 -Moved withRetrieve() call out of the loop in ReadTask

Modified: lucene/java/trunk/contrib/benchmark/build.xml
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/build.xml?view=diff&rev=522569&r1=522568&r2=522569
==============================================================================
--- lucene/java/trunk/contrib/benchmark/build.xml (original)
+++ lucene/java/trunk/contrib/benchmark/build.xml Mon Mar 26 09:46:33 2007
@@ -97,6 +97,7 @@
     <path id="run.classpath">
         <path refid="classpath"/>
         <pathelement location="${build.dir}/classes/java"/>
+        <pathelement location="${benchmark.ext.classpath}"/>
     </path>
 
     <target name="run-standard" depends="compile,check-files,get-files" description="Run
the standard baseline">

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java?view=diff&rev=522569&r1=522568&r2=522569
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java
(original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java
Mon Mar 26 09:46:33 2007
@@ -52,6 +52,7 @@
     try {
       runData = new PerfRunData(new Config(algReader));
     } catch (Exception e) {
+      e.printStackTrace();
       throw new Exception("Error: cannot init PerfRunData!",e);
     }
     

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java?view=diff&rev=522569&r1=522568&r2=522569
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java
(original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java
Mon Mar 26 09:46:33 2007
@@ -23,6 +23,7 @@
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
+import org.apache.lucene.benchmark.byTask.feeds.HTMLParser;
 import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
 import org.apache.lucene.benchmark.byTask.stats.Points;
 import org.apache.lucene.benchmark.byTask.tasks.ReadTask;
@@ -58,6 +59,7 @@
   private Directory directory;
   private Analyzer analyzer;
   private DocMaker docMaker;
+  private HTMLParser htmlParser;
   
   // we use separate (identical) instances for each "read" task type, so each can iterate
the quries separately.
   private HashMap readTaskQueryMaker;
@@ -79,7 +81,10 @@
     docMaker.setConfig(config);
     // query makers
     readTaskQueryMaker = new HashMap();
-    qmkrClass = Class.forName(config.get("query.maker","org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker"));
+    qmkrClass = Class.forName(config.get("query.maker","org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker"));
+    // html parser, used for some doc makers
+    htmlParser = (HTMLParser) Class.forName(config.get("html.parser","org.apache.lucene.benchmark.byTask.feeds.DemoHTMLParser")).newInstance();
+    docMaker.setHTMLParser(htmlParser);
 
     // index stuff
     reinit(false);
@@ -227,6 +232,13 @@
       readTaskQueryMaker.put(readTaskClass,qm);
     }
     return qm;
+  }
+
+  /**
+   * @return Returns the htmlParser.
+   */
+  public HTMLParser getHtmlParser() {
+    return htmlParser;
   }
 
 }

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java?view=diff&rev=522569&r1=522568&r2=522569
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java
(original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java
Mon Mar 26 09:46:33 2007
@@ -26,9 +26,7 @@
 import java.io.File;
 import java.io.UnsupportedEncodingException;
 import java.util.ArrayList;
-import java.util.Date;
 import java.util.Iterator;
-import java.util.Properties;
 
 
 /**
@@ -47,15 +45,8 @@
   
   private int numDocsCreated = 0;
   private boolean storeBytes = false;
+  protected boolean forever;
 
-  static class DocData {
-    String name;
-    Date date;
-    String title;
-    String body;
-    Properties props;
-  }
-  
   private static class LeftOver {
     private DocData docdata;
     private int cnt;
@@ -80,10 +71,14 @@
 
   /**
    * Return the data of the next document.
+   * All current implementations can create docs forever. 
+   * When the input data is exhausted, input files are iterated.
+   * This re-iteration can be avoided by setting doc.maker.forever to false (default is true).
    * @return data of the next document.
    * @exception if cannot create the next doc data
+   * @exception NoMoreDataException if data is exhausted (and 'forever' set to false).
    */
-  protected abstract DocData getNextDocData() throws Exception;
+  protected abstract DocData getNextDocData() throws NoMoreDataException, Exception;
 
   /*
    *  (non-Javadoc)
@@ -103,32 +98,32 @@
     int docid = incrNumDocsCreated();
     Document doc = new Document();
     doc.add(new Field("docid", "doc"+docid, storeVal, indexVal, termVecVal));
-    if (docData.name!=null) {
-      String name = (cnt<0 ? docData.name : docData.name+"_"+cnt);
+    if (docData.getName()!=null) {
+      String name = (cnt<0 ? docData.getName() : docData.getName()+"_"+cnt);
       doc.add(new Field("docname", name, storeVal, indexVal, termVecVal));
     }
-    if (docData.date!=null) {
-      String dateStr = DateTools.dateToString(docData.date, DateTools.Resolution.SECOND);
+    if (docData.getDate()!=null) {
+      String dateStr = DateTools.dateToString(docData.getDate(), DateTools.Resolution.SECOND);
       doc.add(new Field("docdate", dateStr, storeVal, indexVal, termVecVal));
     }
-    if (docData.title!=null) {
-      doc.add(new Field("doctitle", docData.title, storeVal, indexVal, termVecVal));
+    if (docData.getTitle()!=null) {
+      doc.add(new Field("doctitle", docData.getTitle(), storeVal, indexVal, termVecVal));
     }
-    if (docData.body!=null && docData.body.length()>0) {
+    if (docData.getBody()!=null && docData.getBody().length()>0) {
       String bdy;
-      if (size<=0 || size>=docData.body.length()) {
-        bdy = docData.body; // use all
-        docData.body = "";  // nothing left
+      if (size<=0 || size>=docData.getBody().length()) {
+        bdy = docData.getBody(); // use all
+        docData.setBody("");  // nothing left
       } else {
         // attempt not to break words - if whitespace found within next 20 chars...
-        for (int n=size-1; n<size+20 && n<docData.body.length(); n++) {
-          if (Character.isWhitespace(docData.body.charAt(n))) {
+        for (int n=size-1; n<size+20 && n<docData.getBody().length(); n++)
{
+          if (Character.isWhitespace(docData.getBody().charAt(n))) {
             size = n;
             break;
           }
         }
-        bdy = docData.body.substring(0,size); // use part
-        docData.body = docData.body.substring(size); // some left
+        bdy = docData.getBody().substring(0,size); // use part
+        docData.setBody(docData.getBody().substring(size)); // some left
       }
       doc.add(new Field(BODY_FIELD, bdy, storeVal, indexVal, termVecVal));
       if (storeBytes == true) {
@@ -136,13 +131,13 @@
       }
     }
 
-    if (docData.props!=null) {
-      for (Iterator it = docData.props.keySet().iterator(); it.hasNext(); ) {
+    if (docData.getProps()!=null) {
+      for (Iterator it = docData.getProps().keySet().iterator(); it.hasNext(); ) {
         String key = (String) it.next();
-        String val = (String) docData.props.get(key);
+        String val = (String) docData.getProps().get(key);
         doc.add(new Field(key, val, storeVal, indexVal, termVecVal));
       }
-      docData.props = null;
+      docData.setProps(null);
     }
     //System.out.println("============== Created doc "+numDocsCreated+" :\n"+doc+"\n==========");
     return doc;
@@ -154,19 +149,19 @@
    */
   public Document makeDocument(int size) throws Exception {
     LeftOver lvr = (LeftOver) leftovr.get();
-    if (lvr==null || lvr.docdata==null || lvr.docdata.body==null || lvr.docdata.body.length()==0)
{
+    if (lvr==null || lvr.docdata==null || lvr.docdata.getBody()==null || lvr.docdata.getBody().length()==0)
{
       resetLeftovers();
     }
     DocData dd = (lvr==null ? getNextDocData() : lvr.docdata);
     int cnt = (lvr==null ? 0 : lvr.cnt);
-    while (dd.body==null || dd.body.length()<size) {
+    while (dd.getBody()==null || dd.getBody().length()<size) {
       DocData dd2 = dd;
       dd = getNextDocData();
       cnt = 0;
-      dd.body = dd2.body + dd.body;
+      dd.setBody(dd2.getBody() + dd.getBody());
     }
     Document doc = createDocument(dd,size,cnt);
-    if (dd.body==null || dd.body.length()==0) {
+    if (dd.getBody()==null || dd.getBody().length()==0) {
       resetLeftovers();
     } else {
       if (lvr == null) {
@@ -195,6 +190,7 @@
     indexVal = (tokenized ? Field.Index.TOKENIZED : Field.Index.UN_TOKENIZED);
     termVecVal = (termVec ? Field.TermVector.YES : Field.TermVector.NO);
     storeBytes = config.get("doc.store.body.bytes", false);
+    forever = config.get("doc.maker.forever",true);
   }
 
   /*
@@ -247,6 +243,8 @@
   private int lastPrintedNumUniqueTexts = 0;
   private long lastPrintedNumUniqueBytes = 0;
   private int printNum = 0;
+  private HTMLParser htmlParser;
+  
   public void printDocStatistics() {
     boolean print = false;
     String col = "                  ";
@@ -277,6 +275,7 @@
   }
 
   protected void collectFiles(File f, ArrayList inputFiles) {
+    //System.out.println("Collect: "+f.getAbsolutePath());
     if (!f.canRead()) {
       return;
     }
@@ -289,6 +288,21 @@
     }
     inputFiles.add(f);
     addUniqueBytes(f.length());
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#setHTMLParser(org.apache.lucene.benchmark.byTask.feeds.HTMLParser)
+   */
+  public void setHTMLParser(HTMLParser htmlParser) {
+    this.htmlParser = htmlParser;
+  }
+
+  /*
+   *  (non-Javadoc)
+   * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#getHtmlParser()
+   */
+  public HTMLParser getHtmlParser() {
+    return htmlParser;
   }
 
 

Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DemoHTMLParser.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DemoHTMLParser.java?view=auto&rev=522569
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DemoHTMLParser.java
(added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DemoHTMLParser.java
Mon Mar 26 09:46:33 2007
@@ -0,0 +1,82 @@
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+import java.text.DateFormat;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.Locale;
+import java.util.Properties;
+
+/**
+ * HTML Parser that is based on Lucene's demo HTML parser.
+ */
+public class DemoHTMLParser implements org.apache.lucene.benchmark.byTask.feeds.HTMLParser
{
+
+  DateFormat dateFormat;
+  
+  public DemoHTMLParser () {
+    dateFormat = new SimpleDateFormat("EEE, dd MMM yyyy kk:mm:ss ",Locale.US);  //Tue, 09
Dec 2003 22:39:08 GMT
+    dateFormat.setLenient(true);
+  }
+
+  /*
+   *  (non-Javadoc)
+   * @see org.apache.lucene.benchmark.byTask.feeds.HTMLParser#parse(java.lang.String, java.util.Date,
java.io.Reader, java.text.DateFormat)
+   */
+  public DocData parse(String name, Date date, Reader reader, DateFormat dateFormat) throws
IOException, InterruptedException {
+    org.apache.lucene.demo.html.HTMLParser p = new org.apache.lucene.demo.html.HTMLParser(reader);
+    
+    // title
+    String title = p.getTitle();
+    // properties 
+    Properties props = p.getMetaTags(); 
+    // body
+    Reader r = p.getReader();
+    char c[] = new char[1024];
+    StringBuffer bodyBuf = new StringBuffer();
+    int n;
+    while ((n = r.read(c)) >= 0) {
+      if (n>0) {
+        bodyBuf.append(c,0,n);
+      }
+    }
+    r.close();
+    if (date == null && props.getProperty("date")!=null) {
+      try {
+        date = dateFormat.parse(props.getProperty("date").trim());
+      } catch (ParseException e) {
+        // do not fail test just because a date could not be parsed
+        System.out.println("ignoring date parse exception (assigning 'now') for: "+props.getProperty("date"));
+        date = new Date(); // now 
+      }
+    }
+      
+    return new DocData(name, bodyBuf.toString(), title, props, date);
+  }
+
+  public DocData parse(String name, Date date, StringBuffer inputText, DateFormat dateFormat)
throws IOException, InterruptedException {
+    // TODO Auto-generated method stub
+    return parse(name, date, new StringReader(inputText.toString()), dateFormat);
+  }
+
+}

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DemoHTMLParser.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DemoHTMLParser.java
------------------------------------------------------------------------------
    svn:executable = *

Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocData.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocData.java?view=auto&rev=522569
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocData.java
(added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocData.java
Mon Mar 26 09:46:33 2007
@@ -0,0 +1,113 @@
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Date;
+import java.util.Properties;
+
+/**
+ * Output of parsing (e.g. HTML parsing) of an input document.
+ */
+
+public class DocData {
+  
+  private String name;
+  private String body;
+  private String title;
+  private Date date;
+  private Properties props;
+  
+  public DocData(String name, String body, String title, Properties props, Date date) {
+    this.name = name;
+    this.body = body;
+    this.title = title;
+    this.date = date;
+    this.props = props;
+  }
+
+  /**
+   * @return Returns the name.
+   */
+  public String getName() {
+    return name;
+  }
+
+  /**
+   * @param name The name to set.
+   */
+  public void setName(String name) {
+    this.name = name;
+  }
+
+  /**
+   * @return Returns the props.
+   */
+  public Properties getProps() {
+    return props;
+  }
+
+  /**
+   * @param props The props to set.
+   */
+  public void setProps(Properties props) {
+    this.props = props;
+  }
+
+  /**
+   * @return Returns the body.
+   */
+  public String getBody() {
+    return body;
+  }
+
+  /**
+   * @param body The body to set.
+   */
+  public void setBody(String body) {
+    this.body = body;
+  }
+
+  /**
+   * @return Returns the title.
+   */
+  public String getTitle() {
+    return title;
+  }
+
+  /**
+   * @param title The title to set.
+   */
+  public void setTitle(String title) {
+    this.title = title;
+  }
+
+  /**
+   * @return Returns the date.
+   */
+  public Date getDate() {
+    return date;
+  }
+
+  /**
+   * @param date The date to set.
+   */
+  public void setDate(Date date) {
+    this.date = date;
+  }
+
+}

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocData.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocData.java
------------------------------------------------------------------------------
    svn:executable = *

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java?view=diff&rev=522569&r1=522568&r2=522569
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
(original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
Mon Mar 26 09:46:33 2007
@@ -61,4 +61,11 @@
 
   /** Print some statistics on docs available/added/etc. */ 
   public void printDocStatistics();
-}
+
+  /** Set the html parser to use, when appropriate */
+  public void setHTMLParser(HTMLParser htmlParser);
+  
+  /** Returns the htmlParser. */
+  public HTMLParser getHtmlParser();
+
+}
\ No newline at end of file

Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/HTMLParser.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/HTMLParser.java?view=auto&rev=522569
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/HTMLParser.java
(added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/HTMLParser.java
Mon Mar 26 09:46:33 2007
@@ -0,0 +1,51 @@
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+import java.text.DateFormat;
+import java.util.Date;
+
+/**
+ * HTML Parsing Interfacew for test purposes
+ */
+public interface HTMLParser {
+
+  /**
+   * Parse the input Reader and return DocData. 
+   * A provided name or date is used for the result, otherwise an attempt is 
+   * made to set them from the parsed data.
+   * @param dateFormat date formatter to use for extracting the date.   
+   * @param name name of the result doc data. If null, attempt to set by parsed data.
+   * @param date date of the result doc data. If null, attempt to set by parsed data.
+   * @param reader of html text to parse.
+   * @return Parsed doc data.
+   * @throws IOException
+   * @throws InterruptedException
+   */
+  public DocData parse(String name, Date date, Reader reader, DateFormat dateFormat) throws
IOException, InterruptedException;
+  
+  /**
+   * Parse the inputText and return DocData. 
+   * @param inputText the html text to parse.
+   * @see #parse(String, Date, Reader, DateFormat)
+   */
+  public DocData parse(String name, Date date, StringBuffer inputText, DateFormat dateFormat)
throws IOException, InterruptedException;
+
+}

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/HTMLParser.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/HTMLParser.java
------------------------------------------------------------------------------
    svn:executable = *

Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/NoMoreDataException.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/NoMoreDataException.java?view=auto&rev=522569
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/NoMoreDataException.java
(added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/NoMoreDataException.java
Mon Mar 26 09:46:33 2007
@@ -0,0 +1,27 @@
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Exception indicating there is no more data.
+ * Thrown by Docs Makers if doc.maker.forever is false and docs sources of that maker where
exhausted.
+ * This is usefull for iterating all document of a source, in case we don't know in advance
how many docs there are.
+ */
+public class NoMoreDataException extends Exception {
+
+}

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/NoMoreDataException.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/NoMoreDataException.java
------------------------------------------------------------------------------
    svn:executable = *

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java?view=diff&rev=522569&r1=522568&r2=522569
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java
(original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java
Mon Mar 26 09:46:33 2007
@@ -25,6 +25,7 @@
 import java.text.DateFormat;
 import java.text.SimpleDateFormat;
 import java.util.ArrayList;
+import java.util.Date;
 import java.util.Locale;
 
 
@@ -66,13 +67,16 @@
     File f = null;
     String name = null;
     synchronized (this) {
-      f = (File) inputFiles.get(nextFile++);
-      name = f.getCanonicalPath()+"_"+iteration;
       if (nextFile >= inputFiles.size()) { 
-        // exhausted files, start a new round
+        // exhausted files, start a new round, unless forever set to false.
+        if (!forever) {
+          throw new NoMoreDataException();
+        }
         nextFile = 0;
         iteration++;
       }
+      f = (File) inputFiles.get(nextFile++);
+      name = f.getCanonicalPath()+"_"+iteration;
     }
     
     BufferedReader reader = new BufferedReader(new FileReader(f));
@@ -90,13 +94,9 @@
     
     addBytes(f.length());
 
-    DocData dd = new DocData();
     
-    dd.date = dateFormat.parse(dateStr.trim());
-    dd.name = name;
-    dd.title = title;
-    dd.body = bodyBuf.toString();
-    return dd;
+    Date date = dateFormat.parse(dateStr.trim()); 
+    return new DocData(name, bodyBuf.toString(), title, null, date);
   }
 
 

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java?view=diff&rev=522569&r1=522568&r2=522569
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java
(original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java
Mon Mar 26 09:46:33 2007
@@ -18,7 +18,7 @@
  */
 
 /**
- * Create documents for the test
+ * Create documents for the test.
  */
 public class SimpleDocMaker extends BasicDocMaker {
   
@@ -58,12 +58,12 @@
     return 0; // not applicable
   }
 
-  protected DocData getNextDocData() {
-    DocData dd = new DocData();
-    dd.body = DOC_TEXT;
-    dd.name = "doc"+newdocid();
+  protected DocData getNextDocData() throws NoMoreDataException {
+    if (docID>0 && !forever) {
+      throw new NoMoreDataException();
+    }
     addBytes(DOC_TEXT.length());
-    return dd;
+    return new DocData("doc"+newdocid(),DOC_TEXT, null, null, null);
   }
 
 }

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocMaker.java?view=diff&rev=522569&r1=522568&r2=522569
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocMaker.java
(original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocMaker.java
Mon Mar 26 09:46:33 2007
@@ -23,19 +23,15 @@
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStreamReader;
-import java.io.Reader;
-import java.io.StringReader;
 import java.text.DateFormat;
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.Date;
 import java.util.Locale;
-import java.util.Properties;
 import java.util.zip.GZIPInputStream;
 
 import org.apache.lucene.benchmark.byTask.utils.Config;
-import org.apache.lucene.demo.html.HTMLParser;
 
 
 /**
@@ -45,7 +41,7 @@
 
   private static final String newline = System.getProperty("line.separator");
   
-  private DateFormat dateFormat;
+  private DateFormat dateFormat [];
   private File dataDir = null;
   private ArrayList inputFiles = new ArrayList();
   private int nextFile = 0;
@@ -53,6 +49,13 @@
   private BufferedReader reader;
   private GZIPInputStream zis;
   
+  private static final String DATE_FORMATS [] = {
+    "EEE, dd MMM yyyy kk:mm:ss z", //Tue, 09 Dec 2003 22:39:08 GMT
+    "EEE MMM dd kk:mm:ss yyyy z",  //Tue Dec 09 16:45:08 2003 EST
+    "EEE, dd-MMM-':'y kk:mm:ss z", //Tue, 09 Dec 2003 22:39:08 GMT
+    "EEE, dd-MMM-yyy kk:mm:ss z", //Tue, 09 Dec 2003 22:39:08 GMT
+  };
+  
   /* (non-Javadoc)
    * @see SimpleDocMaker#setConfig(java.util.Properties)
    */
@@ -65,34 +68,44 @@
       throw new RuntimeException("No txt files in dataDir: "+dataDir.getAbsolutePath());
     }
     // date format: 30-MAR-1987 14:22:36.87
-    dateFormat = new SimpleDateFormat("EEE, dd MMM yyyy kk:mm:ss ",Locale.US);  //Tue, 09
Dec 2003 22:39:08 GMT
-    dateFormat.setLenient(true);
-  }
+    dateFormat = new SimpleDateFormat[DATE_FORMATS.length];
+    for (int i = 0; i < dateFormat.length; i++) {
+      dateFormat[i] = new SimpleDateFormat(DATE_FORMATS[i],Locale.US);
+      dateFormat[i].setLenient(true);
+    }
+ }
 
-  private void openNextFile() throws Exception {
+  private void openNextFile() throws NoMoreDataException, Exception {
     closeInputs();
     int retries = 0;
-    while (retries<20) {
+    while (true) {
       File f = null;
       synchronized (this) {
-        f = (File) inputFiles.get(nextFile++);
         if (nextFile >= inputFiles.size()) { 
-          // exhausted files, start a new round
+          // exhausted files, start a new round, unless forever set to false.
+          if (!forever) {
+            throw new NoMoreDataException();
+          }
           nextFile = 0;
           iteration++;
         }
+        f = (File) inputFiles.get(nextFile++);
       }
       System.out.println("opening: "+f+" length: "+f.length());
       try {
         zis = new GZIPInputStream(new BufferedInputStream(new FileInputStream(f)));
-        break;
+        reader = new BufferedReader(new InputStreamReader(zis));
+        return;
       } catch (Exception e) {
         retries++;
-        System.out.println("Skipping 'bad' file "+f.getAbsolutePath()+"  #retries="+retries);
-        continue;
+        if (retries<20) {
+          System.out.println("Skipping 'bad' file "+f.getAbsolutePath()+"  #retries="+retries);
+          continue;
+        } else {
+          throw new NoMoreDataException();
+        }
       }
     }
-    reader = new BufferedReader(new InputStreamReader(zis));
   }
 
   private void closeInputs() {
@@ -142,7 +155,7 @@
     return sb;
   }
   
-  protected DocData getNextDocData() throws Exception {
+  protected DocData getNextDocData() throws NoMoreDataException, Exception {
     if (reader==null) {
       openNextFile();
     }
@@ -162,39 +175,27 @@
     // 6. collect until end of doc
     sb = read("</DOC>",null,false,true);
     // this is the next document, so parse it 
-    // TODO use a more robust html parser (current one aborts parsing quite easily). 
-    HTMLParser p = new HTMLParser(new StringReader(sb.toString()));
-    // title
-    String title = p.getTitle();
-    // properties 
-    Properties props = p.getMetaTags(); 
-    // body
-    Reader r = p.getReader();
-    char c[] = new char[1024];
-    StringBuffer bodyBuf = new StringBuffer();
-    int n;
-    while ((n = r.read(c)) >= 0) {
-      if (n>0) {
-        bodyBuf.append(c,0,n);
-      }
-    }
-    r.close();
-    addBytes(bodyBuf.length());
+    Date date = parseDate(dateStr);
+    HTMLParser p = getHtmlParser();
+    DocData docData = p.parse(name, date, sb, dateFormat[0]);
+    addBytes(sb.length()); // count char length of parsed html text (larger than the plain
doc body text). 
     
-    DocData dd = new DocData();
+    return docData;
+  }
 
-    try {
-      dd.date = dateFormat.parse(dateStr.trim());
-    } catch (ParseException e) {
-      // do not fail test just because a date could not be parsed
-      System.out.println("ignoring date parse exception (assigning 'now') for: "+dateStr);
-      dd.date = new Date(); // now 
-    }
-    dd.name = name;
-    dd.title = title;
-    dd.body = bodyBuf.toString();
-    dd.props = props;
-    return dd;
+  private Date parseDate(String dateStr) {
+    Date date = null;
+    for (int i=0; i<dateFormat.length; i++) {
+      try {
+        date = dateFormat[i].parse(dateStr.trim());
+        return date;
+      } catch (ParseException e) {
+      }
+    }
+    // do not fail test just because a date could not be parsed
+    System.out.println("ignoring date parse exception (assigning 'now') for: "+dateStr);
+    date = new Date(); // now 
+    return date;
   }
 
 

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html?view=diff&rev=522569&r1=522568&r2=522569
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html
(original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html
Mon Mar 26 09:46:33 2007
@@ -149,6 +149,19 @@
 command "Wonderful" to be used in the algorithm.
 </p>
 
+<p>
+<u>External classes</u>: It is sometimes useful to invoke the benchmark
+package with your external alg file that configures the use of your own
+doc/query maker and or html parser. You can work this out without
+modifying the benchmark package code, by passing your class path
+with the benchmark.ext.classpath property:
+<ul>
+  <li>ant run-task -Dtask.alg=[full-path-to-your-alg-file]
+      <font color="#FF0000">-Dbenchmark.ext.classpath=/mydir/classes
+      </font> -Dtask.mem=512M</li>
+</ul>
+</p>
+
 <a name="algorithm"></a>
 <h2>Benchmark "algorithm"</h2>
 
@@ -198,6 +211,14 @@
  30 times in a row.
  <br>Example -  <font color="#FF0066">{ AddDoc AddDoc } : 30</font> - would
do
  addDoc 60 times in a row.
+ <br><b>Exhaustive repeating</b>: use <font color="#FF0066">*</font>
instead of
+ a number to repeat forever.
+ This is sometimes useful, for adding as many files as a doc maker can create,
+ without iterating over the same files again, but in the case that the exact
+ number of files is not known in advance. For insance, TREC files extracted
+ from a zip file.
+ <br>Example -  <font color="#FF0066">{ AddDoc } : *</font>  - would add
docs
+ until the doc maker is "exhausted".
  </li>
  <li>
  <b>Command parameter</b>: a command can optionally take a single parameter.
@@ -487,6 +508,8 @@
   <li><b>Docs and queries creation:</b></li>
     <ul><li>analyzer
     </li><li>doc.maker
+    </li><li>doc.maker.forever
+    </li><li>html.parser
     </li><li>doc.stored
     </li><li>doc.tokenized
     </li><li>doc.term.vector

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java?view=diff&rev=522569&r1=522568&r2=522569
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java
(original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java
Mon Mar 26 09:46:33 2007
@@ -21,11 +21,13 @@
 import java.util.Iterator;
 
 import org.apache.lucene.benchmark.byTask.PerfRunData;
+import org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException;
 
 /**
  * Sequence of parallel or sequential tasks.
  */
 public class TaskSequence extends PerfTask {
+  public static int REPEAT_EXHAUST = -2; 
   private ArrayList tasks;
   private int repetitions = 1;
   private boolean parallel;
@@ -61,9 +63,13 @@
 
   /**
    * @param repetitions The repetitions to set.
+   * @throws Exception 
    */
-  public void setRepetitions(int repetitions) {
+  public void setRepetitions(int repetitions) throws Exception {
     this.repetitions = repetitions;
+    if (repetitions==REPEAT_EXHAUST && isParallel()) {
+      throw new Exception("REPEAT_EXHAUST is not allowed for parallel tasks");
+    }
     setSequenceName();
   }
 
@@ -88,10 +94,15 @@
     }
     
     int count = 0;
-    for (int k=0; k<repetitions; k++) {
+    boolean exhausted = false;
+    for (int k=0; (repetitions==REPEAT_EXHAUST && !exhausted) || k<repetitions;
k++) {
       for (Iterator it = tasks.iterator(); it.hasNext();) {
         PerfTask task = (PerfTask) it.next();
-        count += task.runAndMaybeStats(letChildReport);
+        try {
+          count += task.runAndMaybeStats(letChildReport);
+        } catch (NoMoreDataException e) {
+          exhausted = true;
+        }
       }
     }
     return count;
@@ -101,7 +112,8 @@
     long delayStep = (perMin ? 60000 : 1000) /rate;
     long nextStartTime = System.currentTimeMillis();
     int count = 0;
-    for (int k=0; k<repetitions; k++) {
+    boolean exhausted = false;
+    for (int k=0; (repetitions==REPEAT_EXHAUST && !exhausted) || k<repetitions;
k++) {
       for (Iterator it = tasks.iterator(); it.hasNext();) {
         PerfTask task = (PerfTask) it.next();
         long waitMore = nextStartTime - System.currentTimeMillis();
@@ -110,7 +122,11 @@
           Thread.sleep(waitMore);
         }
         nextStartTime += delayStep; // this aims at avarage rate. 
-        count += task.runAndMaybeStats(letChildReport);
+        try {
+          count += task.runAndMaybeStats(letChildReport);
+        } catch (NoMoreDataException e) {
+          exhausted = true;
+        }
       }
     }
     return count;
@@ -198,6 +214,9 @@
     if (repetitions>1) {
       sb.append(" * " + repetitions);
     }
+    if (repetitions==REPEAT_EXHAUST) {
+      sb.append(" * EXHAUST");
+    }
     if (rate>0) {
       sb.append(",  rate: " + rate+"/"+(perMin?"min":"sec"));
     }
@@ -237,7 +256,9 @@
 
   private void setSequenceName() {
     seqName = super.getName();
-    if (repetitions>1) {
+    if (repetitions==REPEAT_EXHAUST) {
+      seqName += "_Exhaust";
+    } else if (repetitions>1) {
       seqName += "_"+repetitions;
     }
     if (rate>0) {

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java?view=diff&rev=522569&r1=522568&r2=522569
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java
(original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java
Mon Mar 26 09:46:33 2007
@@ -117,8 +117,12 @@
               colonOk = false;
               // get repetitions number
               stok.nextToken();
-              if (stok.ttype!=StreamTokenizer.TT_NUMBER) throw new Exception("expexted repetitions
number: - "+stok.toString());
-              ((TaskSequence)prevTask).setRepetitions((int)stok.nval); 
+              if ((char)stok.ttype == '*') {
+                ((TaskSequence)prevTask).setRepetitions(TaskSequence.REPEAT_EXHAUST);
+              } else {
+                if (stok.ttype!=StreamTokenizer.TT_NUMBER) throw new Exception("expexted
repetitions number: - "+stok.toString());
+                ((TaskSequence)prevTask).setRepetitions((int)stok.nval);
+              }
               // check for rate specification (ops/min)
               stok.nextToken();
               if (stok.ttype!=':') {

Modified: lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java?view=diff&rev=522569&r1=522568&r2=522569
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
(original)
+++ lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
Mon Mar 26 09:46:33 2007
@@ -81,6 +81,49 @@
     assertEquals("1000 docs were added to the index, this is what we expect to find!",1000,ir.numDocs());
   }
 
+  /**
+   * Test Exhasting Doc Maker logic
+   */
+  public void testExhaustDocMaker() throws Exception {
+    // 1. alg definition (required in every "logic" test)
+    String algLines[] = {
+        "# ----- properties ",
+        "doc.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker",
+        "doc.add.log.step=1",
+        "doc.term.vector=false",
+        "doc.maker.forever=false",
+        "directory=RAMDirectory",
+        "doc.stored=false",
+        "doc.tokenized=false",
+        "# ----- alg ",
+        "CreateIndex",
+        "{ AddDoc } : * ",
+        "Optimize",
+        "CloseIndex",
+        "OpenReader",
+        "{ CountingSearchTest } : 100",
+        "CloseReader",
+        "[ CountingSearchTest > : 30",
+        "[ CountingSearchTest > : 9",
+    };
+    
+    // 2. we test this value later
+    CountingSearchTestTask.numSearches = 0;
+    
+    // 3. execute the algorithm  (required in every "logic" test)
+    Benchmark benchmark = execBenchmark(algLines);
+
+    // 4. test specific checks after the benchmark run completed.
+    assertEquals("TestSearchTask was supposed to be called!",139,CountingSearchTestTask.numSearches);
+    assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory()));
+    // now we should be able to open the index for write. 
+    IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),null,false);
+    iw.close();
+    IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());
+    assertEquals("1 docs were added to the index, this is what we expect to find!",1,ir.numDocs());
+  }
+
+  
   // create the benchmark and execute it. 
   private Benchmark execBenchmark(String[] algLines) throws Exception {
     String algText = algLinesToText(algLines);



Mime
View raw message