lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gsing...@apache.org
Subject svn commit: r495834 [1/3] - in /lucene/java/trunk/contrib/benchmark: ./ conf/ src/java/org/apache/lucene/benchmark/byTask/ src/java/org/apache/lucene/benchmark/byTask/feeds/ src/java/org/apache/lucene/benchmark/byTask/programmatic/ src/java/org/apache/...
Date Sat, 13 Jan 2007 04:08:25 GMT
Author: gsingers
Date: Fri Jan 12 20:08:23 2007
New Revision: 495834

URL: http://svn.apache.org/viewvc?view=rev&rev=495834
Log:
Lucene 675:  Initial commit of Doron Cohen's byTask benchmarking contribution.  Thanks Doron!

Added:
    lucene/java/trunk/contrib/benchmark/CHANGES.txt
    lucene/java/trunk/contrib/benchmark/conf/compound-penalty.alg
    lucene/java/trunk/contrib/benchmark/conf/deletes.alg
    lucene/java/trunk/contrib/benchmark/conf/micro-standard.alg
    lucene/java/trunk/contrib/benchmark/conf/sample.alg
    lucene/java/trunk/contrib/benchmark/conf/sloppy-phrase.alg
    lucene/java/trunk/contrib/benchmark/conf/standard.alg
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/QueryMaker.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersQueryMaker.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleQueryMaker.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleSloppyPhraseQueryMaker.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/package.html
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/programmatic/
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/programmatic/Sample.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/programmatic/package.html
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/stats/
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/stats/Points.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/stats/Report.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/stats/TaskStats.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/stats/package.html
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ClearStatsTask.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseIndexTask.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseReaderTask.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/DeleteDocTask.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewRoundTask.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenIndexTask.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenReaderTask.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OptimizeTask.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/RepAllTask.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/RepSelectByPrefTask.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByNameRoundTask.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByNameTask.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByPrefRoundTask.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByPrefTask.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReportTask.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ResetInputsTask.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ResetSystemEraseTask.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ResetSystemSoftTask.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTask.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetTask.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravTask.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SetPropTask.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WarmTask.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/package.html
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Config.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/FileUtils.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Format.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/package.html
Modified:
    lucene/java/trunk/contrib/benchmark/build.xml

Added: lucene/java/trunk/contrib/benchmark/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/CHANGES.txt?view=auto&rev=495834
==============================================================================
--- lucene/java/trunk/contrib/benchmark/CHANGES.txt (added)
+++ lucene/java/trunk/contrib/benchmark/CHANGES.txt Fri Jan 12 20:08:23 2007
@@ -0,0 +1,11 @@
+Lucene Benchmark Contrib Change Log
+
+The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways.
+
+$Id:$
+
+01/09/07
+
+1. Committed Doron Cohen's benchmarking contribution, which provides an easily expandable task based approach to benchmarking.  See the javadocs for information. (Doron Cohen via Grant Ingersoll)
+
+2. Added this file.
\ No newline at end of file

Modified: lucene/java/trunk/contrib/benchmark/build.xml
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/build.xml?view=diff&rev=495834&r1=495833&r2=495834
==============================================================================
--- lucene/java/trunk/contrib/benchmark/build.xml (original)
+++ lucene/java/trunk/contrib/benchmark/build.xml Fri Jan 12 20:08:23 2007
@@ -1,119 +1,130 @@
-<?xml version="1.0"?>
-<project name="benchmark" default="default">
-
-    <description>
-        Lucene Benchmarking Contributions
-    </description>
-
-    <import file="../contrib-build.xml"/>
-    <property name="working.dir" value="work"/>
-
-    <target name="check-files">
-
-        <available file="temp/news20.tar.gz" property="news20.exists"/>
-
-        <available file="${working.dir}/20_newsgroup" property="news20.expanded"/>
-
-        <available file="temp/reuters21578.tar.gz" property="reuters.exists"/>
-        <available file="${working.dir}/reuters" property="reuters.expanded"/>
-        <available file="${working.dir}/reuters-out" property="reuters.extracted"/>
-        <available file="temp/20news-18828.tar.gz" property="20news-18828.exists"/>
-        <available file="${working.dir}/20news-18828" property="20news-18828.expanded"/>
-        <available file="${working.dir}/mini_newsgroups" property="mini.expanded"/>
-        
-    </target>
-
-    <target name="get-news-20" unless="20news-18828.exists">
-        <get src="http://www-2.cs.cmu.edu/afs/cs.cmu.edu/project/theo-20/www/data/news20.tar.gz"
-             dest="temp/news20.tar.gz"/>
-
-    </target>
-    <target name="get-reuters" unless="reuters.exists">
-
-        <get src="http://www.daviddlewis.com/resources/testcollections/reuters21578/reuters21578.tar.gz"
-            dest="temp/reuters21578.tar.gz"/>
-    </target>
-
-    <target name="expand-news-20"  unless="news20.expanded">
-        <gunzip src="temp/news20.tar.gz" dest="temp"/>
-        <untar src="temp/news20.tar" dest="${working.dir}"/>
-    </target>
-    <target name="expand-reuters" unless="reuters.expanded">
-        <gunzip src="temp/reuters21578.tar.gz" dest="temp"/>
-        <mkdir dir="${working.dir}/reuters"/>
-        <untar src="temp/reuters21578.tar" dest="${working.dir}/reuters"/>
-        <delete >
-            <fileset dir="${working.dir}/reuters">
-                <include name="*.txt"/>
-            </fileset>
-        </delete>
-
-    </target>
-    <target name="extract-reuters" depends="check-files" unless="reuters.extracted">
-        <mkdir dir="${working.dir}/reuters-out"/>
-        <java classname="org.apache.lucene.benchmark.utils.ExtractReuters" maxmemory="1024M" fork="true">
-            <classpath refid="run.classpath"/>
-            <arg line="${working.dir}/reuters ${working.dir}/reuters-out"/>
-        </java>
-    </target>
-    <target name="get-20news-18828" unless="20news-18828.exists">
-        <get src="http://people.csail.mit.edu/u/j/jrennie/public_html/20Newsgroups/20news-18828.tar.gz"
-             dest="temp/20news-18828.tar.gz"/>
-
-    </target>
-    <target name="expand-20news-18828" unless="20news-18828.expanded">
-        <gunzip src="temp/20news-18828.tar.gz" dest="temp"/>
-        <untar src="temp/20news-18828.tar" dest="${working.dir}"/>
-    </target>
-    <target name="get-mini-news" unless="mini.exists">
-        <get src="http://kdd.ics.uci.edu/databases/20newsgroups/mini_newsgroups.tar.gz"
-             dest="temp/mini_newsgroups.tar.gz"/>
-    </target>
-    <target name="expand-mini-news" unless="mini.expanded">
-        <gunzip src="temp/mini_newsgroups.tar.gz" dest="temp"/>
-        <untar src="temp/mini_newsgroups.tar" dest="${working.dir}"/>
-    </target>
-
-    <target name="get-files" depends="check-files">
-        <mkdir dir="temp"/>
-        <antcall target="get-reuters"/>
-        <antcall target="expand-reuters"/>
-        <antcall target="extract-reuters"/>
-    </target>
-    <property name="digester.jar" value="commons-digester-1.7.jar"/>
-    <property name="collections.jar" value="commons-collections-3.1.jar"/>
-    <property name="logging.jar" value="commons-logging-1.0.4.jar"/>
-    <property name="bean-utils.jar" value="commons-beanutils-1.7.0.jar"/>
-    <path id="classpath">
-        <pathelement path="${lucene.jar}"/>
-        <pathelement path="${basedir}/lib/${digester.jar}"/>
-        <pathelement path="${basedir}/lib/${collections.jar}"/>
-        <pathelement path="${basedir}/lib/${logging.jar}"/>
-        <pathelement path="${basedir}/lib/${bean-utils.jar}"/>
-    </path>
-    <path id="run.classpath">
-        <path refid="classpath"/>
-        <pathelement location="${build.dir}/classes/java"/>
-        <pathelement path="${basedir}/lib/${digester.jar}"/>
-        <pathelement path="${basedir}/lib/${collections.jar}"/>
-        <pathelement path="${basedir}/lib/${logging.jar}"/>
-        <pathelement path="${basedir}/lib/${bean-utils.jar}"/>
-    </path>
-
-    <target name="run-standard" depends="compile,check-files,get-files" description="Run the standard baseline">
-        <echo>Working Directory: ${working.dir}</echo>
-        <java classname="org.apache.lucene.benchmark.Driver"  maxmemory="1024M" fork="true">
-            <classpath refid="run.classpath"/>
-            <arg line="${working.dir} ${basedir}/conf/standard-config.xml"/>
-        </java>
-    </target>
-    <target name="run-micro-standard" depends="compile,check-files,get-files" description="Run the standard baseline">
-        <echo>Working Directory: ${working.dir}</echo>
-        <java classname="org.apache.lucene.benchmark.Driver" maxmemory="1024M" fork="true">
-            <classpath refid="run.classpath"/>
-            <arg line="${working.dir} ${basedir}/conf/micro-standard-config.xml"/>
-        </java>
-    </target>
-
-    <target name="init" depends="common.init,check-files"/>
-</project>
+<?xml version="1.0"?>
+<project name="benchmark" default="default">
+
+    <description>
+        Lucene Benchmarking Contributions
+    </description>
+
+    <import file="../contrib-build.xml"/>
+    <property name="working.dir" value="work"/>
+
+    <target name="check-files">
+
+        <available file="temp/news20.tar.gz" property="news20.exists"/>
+
+        <available file="${working.dir}/20_newsgroup" property="news20.expanded"/>
+
+        <available file="temp/reuters21578.tar.gz" property="reuters.exists"/>
+        <available file="${working.dir}/reuters" property="reuters.expanded"/>
+        <available file="${working.dir}/reuters-out" property="reuters.extracted"/>
+        <available file="temp/20news-18828.tar.gz" property="20news-18828.exists"/>
+        <available file="${working.dir}/20news-18828" property="20news-18828.expanded"/>
+        <available file="${working.dir}/mini_newsgroups" property="mini.expanded"/>
+        
+    </target>
+
+    <target name="get-news-20" unless="20news-18828.exists">
+        <get src="http://www-2.cs.cmu.edu/afs/cs.cmu.edu/project/theo-20/www/data/news20.tar.gz"
+             dest="temp/news20.tar.gz"/>
+
+    </target>
+    <target name="get-reuters" unless="reuters.exists">
+
+        <get src="http://www.daviddlewis.com/resources/testcollections/reuters21578/reuters21578.tar.gz"
+            dest="temp/reuters21578.tar.gz"/>
+    </target>
+
+    <target name="expand-news-20"  unless="news20.expanded">
+        <gunzip src="temp/news20.tar.gz" dest="temp"/>
+        <untar src="temp/news20.tar" dest="${working.dir}"/>
+    </target>
+    <target name="expand-reuters" unless="reuters.expanded">
+        <gunzip src="temp/reuters21578.tar.gz" dest="temp"/>
+        <mkdir dir="${working.dir}/reuters"/>
+        <untar src="temp/reuters21578.tar" dest="${working.dir}/reuters"/>
+        <delete >
+            <fileset dir="${working.dir}/reuters">
+                <include name="*.txt"/>
+            </fileset>
+        </delete>
+
+    </target>
+    <target name="extract-reuters" depends="check-files" unless="reuters.extracted">
+        <mkdir dir="${working.dir}/reuters-out"/>
+        <java classname="org.apache.lucene.benchmark.utils.ExtractReuters" maxmemory="1024M" fork="true">
+            <classpath refid="run.classpath"/>
+            <arg line="${working.dir}/reuters ${working.dir}/reuters-out"/>
+        </java>
+    </target>
+    <target name="get-20news-18828" unless="20news-18828.exists">
+        <get src="http://people.csail.mit.edu/u/j/jrennie/public_html/20Newsgroups/20news-18828.tar.gz"
+             dest="temp/20news-18828.tar.gz"/>
+
+    </target>
+    <target name="expand-20news-18828" unless="20news-18828.expanded">
+        <gunzip src="temp/20news-18828.tar.gz" dest="temp"/>
+        <untar src="temp/20news-18828.tar" dest="${working.dir}"/>
+    </target>
+    <target name="get-mini-news" unless="mini.exists">
+        <get src="http://kdd.ics.uci.edu/databases/20newsgroups/mini_newsgroups.tar.gz"
+             dest="temp/mini_newsgroups.tar.gz"/>
+    </target>
+    <target name="expand-mini-news" unless="mini.expanded">
+        <gunzip src="temp/mini_newsgroups.tar.gz" dest="temp"/>
+        <untar src="temp/mini_newsgroups.tar" dest="${working.dir}"/>
+    </target>
+
+    <target name="get-files" depends="check-files">
+        <mkdir dir="temp"/>
+        <antcall target="get-reuters"/>
+        <antcall target="expand-reuters"/>
+        <antcall target="extract-reuters"/>
+    </target>
+    <property name="digester.jar" value="commons-digester-1.7.jar"/>
+    <property name="collections.jar" value="commons-collections-3.1.jar"/>
+    <property name="logging.jar" value="commons-logging-1.0.4.jar"/>
+    <property name="bean-utils.jar" value="commons-beanutils-1.7.0.jar"/>
+    <path id="classpath">
+        <pathelement path="${lucene.jar}"/>
+        <pathelement path="${basedir}/lib/${digester.jar}"/>
+        <pathelement path="${basedir}/lib/${collections.jar}"/>
+        <pathelement path="${basedir}/lib/${logging.jar}"/>
+        <pathelement path="${basedir}/lib/${bean-utils.jar}"/>
+    </path>
+    <path id="run.classpath">
+        <path refid="classpath"/>
+        <pathelement location="${build.dir}/classes/java"/>
+        <pathelement path="${basedir}/lib/${digester.jar}"/>
+        <pathelement path="${basedir}/lib/${collections.jar}"/>
+        <pathelement path="${basedir}/lib/${logging.jar}"/>
+        <pathelement path="${basedir}/lib/${bean-utils.jar}"/>
+    </path>
+
+    <target name="run-standard" depends="compile,check-files,get-files" description="Run the standard baseline">
+        <echo>Working Directory: ${working.dir}</echo>
+        <java classname="org.apache.lucene.benchmark.Driver"  maxmemory="1024M" fork="true">
+            <classpath refid="run.classpath"/>
+            <arg line="${working.dir} ${basedir}/conf/standard-config.xml"/>
+        </java>
+    </target>
+    <target name="run-micro-standard" depends="compile,check-files,get-files" description="Run the standard baseline">
+        <echo>Working Directory: ${working.dir}</echo>
+        <java classname="org.apache.lucene.benchmark.Driver" maxmemory="1024M" fork="true">
+            <classpath refid="run.classpath"/>
+            <arg line="${working.dir} ${basedir}/conf/micro-standard-config.xml"/>
+        </java>
+    </target>
+
+    <property name="task.alg" value="${basedir}/conf/micro-standard.alg"/>
+    <property name="task.mem" value="140M"/>
+
+    <target name="run-task" depends="compile,check-files,get-files" description="Run compound penalty perf test">
+        <echo>Working Directory: ${working.dir}</echo>
+        <java classname="org.apache.lucene.benchmark.byTask.Benchmark" maxmemory="${task.mem}" fork="true">
+            <classpath refid="run.classpath"/>
+            <arg line="${task.alg}"/>
+        </java>
+    </target>
+
+    <target name="init" depends="common.init,check-files"/>
+</project>

Added: lucene/java/trunk/contrib/benchmark/conf/compound-penalty.alg
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/conf/compound-penalty.alg?view=auto&rev=495834
==============================================================================
--- lucene/java/trunk/contrib/benchmark/conf/compound-penalty.alg (added)
+++ lucene/java/trunk/contrib/benchmark/conf/compound-penalty.alg Fri Jan 12 20:08:23 2007
@@ -0,0 +1,93 @@
+#/**
+# * Licensed to the Apache Software Foundation (ASF) under one or more
+# * contributor license agreements.  See the NOTICE file distributed with
+# * this work for additional information regarding copyright ownership.
+# * The ASF licenses this file to You under the Apache License, Version 2.0
+# * (the "License"); you may not use this file except in compliance with
+# * the License.  You may obtain a copy of the License at
+# *
+# *     http://www.apache.org/licenses/LICENSE-2.0
+# *
+# * Unless required by applicable law or agreed to in writing, software
+# * distributed under the License is distributed on an "AS IS" BASIS,
+# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# * See the License for the specific language governing permissions and
+# * limitations under the License.
+# */
+
+# --------------------------------------------------------
+# Compound: what is the cost of compound format in indexing?
+# It does twice as much IO, is iyt twice slower? (no)
+# --------------------------------------------------------
+
+# -------------------------------------------------------------------------------------
+# multi val params are iterated by NewRound's, added to reports, start with column name.
+
+merge.factor=mrg:10
+max.buffered=buf:10
+compound=compnd:true:false
+
+analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
+directory=FSDirectory
+#directory=RamDirectory
+
+doc.stored=stored:true:true:false:false
+doc.tokenized=true
+doc.term.vector=vector:true:true:false:false
+doc.add.log.step=500
+doc.delete.log.step=100
+
+docs.dir=reuters-out
+#docs.dir=reuters-111
+
+#doc.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker
+doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker
+
+#query.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker
+query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker
+
+# task at this depth or less would print when they start
+task.max.depth.log=1
+
+log.queries=false
+# -------------------------------------------------------------------------------------
+
+ResetSystemErase
+
+{ "Round"
+  CreateIndex
+  { "AddDocs" AddDoc > : 10000
+  CloseIndex
+
+  OpenReader  
+  { "SearchSameRdr" Search > : 500
+  CloseReader 
+              
+  { "WarmNewRdr" Warm > : 50
+              
+  { "SrchNewRdr" Search > : 500
+              
+  { "SrchTrvNewRdr" SearchTrav > : 300
+              
+  { "SrchTrvRetNewRdr" SearchTravRet > : 100
+
+  [ "WarmNewRdr" Warm > : 50
+              
+  [ "SrchNewRdr" Search > : 500
+              
+  [ "SrchTrvNewRdr" SearchTrav > : 300
+              
+  [ "SrchTrvRetNewRdr" SearchTravRet > : 100
+
+  ResetInputs
+  RepSumByName
+  NewRound
+} : 4
+            
+RepSumByName
+RepSumByNameRound
+RepSumByPrefRound AddDocs
+RepSumByPrefRound SearchSameRdr
+RepSumByPrefRound WarmNewRdr
+RepSumByPrefRound SrchTrvNewRdr
+RepSumByPrefRound SrchTrvRetNewRdr

Added: lucene/java/trunk/contrib/benchmark/conf/deletes.alg
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/conf/deletes.alg?view=auto&rev=495834
==============================================================================
--- lucene/java/trunk/contrib/benchmark/conf/deletes.alg (added)
+++ lucene/java/trunk/contrib/benchmark/conf/deletes.alg Fri Jan 12 20:08:23 2007
@@ -0,0 +1,70 @@
+#/**
+# * Licensed to the Apache Software Foundation (ASF) under one or more
+# * contributor license agreements.  See the NOTICE file distributed with
+# * this work for additional information regarding copyright ownership.
+# * The ASF licenses this file to You under the Apache License, Version 2.0
+# * (the "License"); you may not use this file except in compliance with
+# * the License.  You may obtain a copy of the License at
+# *
+# *     http://www.apache.org/licenses/LICENSE-2.0
+# *
+# * Unless required by applicable law or agreed to in writing, software
+# * distributed under the License is distributed on an "AS IS" BASIS,
+# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# * See the License for the specific language governing permissions and
+# * limitations under the License.
+# */
+# --------------------------------------------------------
+# Deletes: what is the cost of deleting documents?
+# --------------------------------------------------------
+
+# -------------------------------------------------------------------------------------
+# multi val params are iterated by NewRound's, added to reports, start with column name.
+
+merge.factor=mrg:10
+max.buffered=buf:100
+compound=true
+
+analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
+directory=FSDirectory
+#directory=RamDirectory
+
+doc.stored=true
+doc.tokenized=true
+doc.term.vector=false
+doc.add.log.step=10000
+doc.delete.log.step=100
+
+docs.dir=reuters-out
+#docs.dir=reuters-111
+
+doc.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker
+#doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker
+
+query.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker
+#query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker
+
+# task at this depth or less would print when they start
+task.max.depth.log=1
+
+log.queries=false
+# -------------------------------------------------------------------------------------
+
+ResetSystemErase
+
+CreateIndex
+CloseIndex
+
+{ "Populate"
+    OpenIndex
+    { AddDoc(10) > : 200000
+    Optimize
+    CloseIndex
+> 
+
+{ "Deletions"
+   OpenReader  DeleteDoc   CloseReader
+} : 4000
+
+RepSumByName
+

Added: lucene/java/trunk/contrib/benchmark/conf/micro-standard.alg
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/conf/micro-standard.alg?view=auto&rev=495834
==============================================================================
--- lucene/java/trunk/contrib/benchmark/conf/micro-standard.alg (added)
+++ lucene/java/trunk/contrib/benchmark/conf/micro-standard.alg Fri Jan 12 20:08:23 2007
@@ -0,0 +1,76 @@
+#/**
+# * Licensed to the Apache Software Foundation (ASF) under one or more
+# * contributor license agreements.  See the NOTICE file distributed with
+# * this work for additional information regarding copyright ownership.
+# * The ASF licenses this file to You under the Apache License, Version 2.0
+# * (the "License"); you may not use this file except in compliance with
+# * the License.  You may obtain a copy of the License at
+# *
+# *     http://www.apache.org/licenses/LICENSE-2.0
+# *
+# * Unless required by applicable law or agreed to in writing, software
+# * distributed under the License is distributed on an "AS IS" BASIS,
+# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# * See the License for the specific language governing permissions and
+# * limitations under the License.
+# */
+# -------------------------------------------------------------------------------------
+# multi val params are iterated by NewRound's, added to reports, start with column name.
+
+merge.factor=mrg:10:100:10:100
+max.buffered=buf:10:10:100:100
+compound=true
+
+analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
+directory=FSDirectory
+#directory=RamDirectory
+
+doc.stored=true
+doc.tokenized=true
+doc.term.vector=false
+doc.add.log.step=500
+
+docs.dir=reuters-out
+#docs.dir=reuters-111
+
+#doc.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker
+doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker
+
+#query.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker
+query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker
+
+# task at this depth or less would print when they start
+task.max.depth.log=2
+
+log.queries=true
+# -------------------------------------------------------------------------------------
+
+{ "Rounds"
+
+    ResetSystemErase
+
+    { "Populate"
+        CreateIndex
+        { "MAddDocs" AddDoc > : 2000
+        Optimize
+        CloseIndex
+    }
+
+    OpenReader  
+    { "SearchSameRdr" Search > : 5000
+    CloseReader 
+                
+    { "WarmNewRdr" Warm > : 50
+                
+    { "SrchNewRdr" Search > : 500
+                
+    { "SrchTrvNewRdr" SearchTrav > : 300
+                
+    { "SrchTrvRetNewRdr" SearchTravRet > : 100
+                
+    NewRound
+
+} : 4
+
+RepSumByName
+RepSumByPrefRound MAddDocs

Added: lucene/java/trunk/contrib/benchmark/conf/sample.alg
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/conf/sample.alg?view=auto&rev=495834
==============================================================================
--- lucene/java/trunk/contrib/benchmark/conf/sample.alg (added)
+++ lucene/java/trunk/contrib/benchmark/conf/sample.alg Fri Jan 12 20:08:23 2007
@@ -0,0 +1,85 @@
+#/**
+# * Licensed to the Apache Software Foundation (ASF) under one or more
+# * contributor license agreements.  See the NOTICE file distributed with
+# * this work for additional information regarding copyright ownership.
+# * The ASF licenses this file to You under the Apache License, Version 2.0
+# * (the "License"); you may not use this file except in compliance with
+# * the License.  You may obtain a copy of the License at
+# *
+# *     http://www.apache.org/licenses/LICENSE-2.0
+# *
+# * Unless required by applicable law or agreed to in writing, software
+# * distributed under the License is distributed on an "AS IS" BASIS,
+# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# * See the License for the specific language governing permissions and
+# * limitations under the License.
+# */
+# --------------------------------------------------------
+# 
+# Sample: what is the effect of doc size on indexing time?
+#
+# There are two parts in this test:
+# - PopulateShort adds 2N documents of length  L
+# - PopulateLong  adds  N documents of length 2L
+# Which one would be faster?
+# The comparison is done twice.
+#
+# --------------------------------------------------------
+
+# -------------------------------------------------------------------------------------
+# multi val params are iterated by NewRound's, added to reports, start with column name.
+
+merge.factor=mrg:10:20
+max.buffered=buf:100:1000
+compound=true
+
+analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
+directory=FSDirectory
+#directory=RamDirectory
+
+doc.stored=true
+doc.tokenized=true
+doc.term.vector=false
+doc.add.log.step=500
+
+docs.dir=reuters-out
+#docs.dir=reuters-111
+
+doc.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker
+#doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker
+
+query.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker
+#query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker
+
+# task at this depth or less would print when they start
+task.max.depth.log=2
+
+log.queries=false
+# -------------------------------------------------------------------------------------
+
+{
+
+    { "PopulateShort"
+        CreateIndex
+        { AddDoc(4000) > : 20000
+        Optimize
+        CloseIndex
+    >
+
+    ResetSystemErase
+    
+    { "PopulateLong"
+        CreateIndex
+        { AddDoc(8000) > : 10000
+        Optimize
+        CloseIndex
+    >
+
+    ResetSystemErase
+
+    NewRound
+
+} : 2
+
+RepSumByName
+RepSelectByPref Populate

Added: lucene/java/trunk/contrib/benchmark/conf/sloppy-phrase.alg
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/conf/sloppy-phrase.alg?view=auto&rev=495834
==============================================================================
--- lucene/java/trunk/contrib/benchmark/conf/sloppy-phrase.alg (added)
+++ lucene/java/trunk/contrib/benchmark/conf/sloppy-phrase.alg Fri Jan 12 20:08:23 2007
@@ -0,0 +1,74 @@
+#/**
+# * Licensed to the Apache Software Foundation (ASF) under one or more
+# * contributor license agreements.  See the NOTICE file distributed with
+# * this work for additional information regarding copyright ownership.
+# * The ASF licenses this file to You under the Apache License, Version 2.0
+# * (the "License"); you may not use this file except in compliance with
+# * the License.  You may obtain a copy of the License at
+# *
+# *     http://www.apache.org/licenses/LICENSE-2.0
+# *
+# * Unless required by applicable law or agreed to in writing, software
+# * distributed under the License is distributed on an "AS IS" BASIS,
+# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# * See the License for the specific language governing permissions and
+# * limitations under the License.
+# */
+# -------------------------------------------------------------------------------------
+# multi val params are iterated by NewRound's, added to reports, start with column name.
+
+max.buffered=100
+merge.factor=10
+compound=true
+
+analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
+directory=FSDirectory
+#directory=RamDirectory
+
+doc.stored=false
+doc.tokenized=true
+doc.term.vector=false
+doc.add.log.step=500
+
+docs.dir=reuters-out
+#docs.dir=reuters-111
+
+doc.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker
+#doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker
+
+query.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleSloppyPhraseQueryMaker
+#query.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker
+#query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker
+
+# task at this depth or less would print when they start
+task.max.depth.log=1
+
+log.queries=false
+# -------------------------------------------------------------------------------------
+
+
+ResetSystemErase
+
+{ "Populate"
+    CreateIndex
+    { "MAddDocs" AddDoc(2000) > : 20000     
+    Optimize
+    CloseIndex
+}
+
+
+{ "Round"
+
+  OpenReader  
+  { "SearchSameRdr" Search > : 6000
+  CloseReader 
+
+  ResetInputs
+  RepSumByName
+  NewRound
+} : 4
+            
+RepSumByPrefRound MAddDocs
+
+RepSumByName
+RepSumByPrefRound Search

Added: lucene/java/trunk/contrib/benchmark/conf/standard.alg
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/conf/standard.alg?view=auto&rev=495834
==============================================================================
--- lucene/java/trunk/contrib/benchmark/conf/standard.alg (added)
+++ lucene/java/trunk/contrib/benchmark/conf/standard.alg Fri Jan 12 20:08:23 2007
@@ -0,0 +1,91 @@
+#/**
+# * Licensed to the Apache Software Foundation (ASF) under one or more
+# * contributor license agreements.  See the NOTICE file distributed with
+# * this work for additional information regarding copyright ownership.
+# * The ASF licenses this file to You under the Apache License, Version 2.0
+# * (the "License"); you may not use this file except in compliance with
+# * the License.  You may obtain a copy of the License at
+# *
+# *     http://www.apache.org/licenses/LICENSE-2.0
+# *
+# * Unless required by applicable law or agreed to in writing, software
+# * distributed under the License is distributed on an "AS IS" BASIS,
+# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# * See the License for the specific language governing permissions and
+# * limitations under the License.
+# */
+# -------------------------------------------------------------------------------------
+# multi val params are iterated by NewRound's, added to reports, start with column name.
+
+merge.factor=mrg:10:100:10:100:10:100:10:100
+max.buffered=buf:10:10:100:100:10:10:100:100
+compound=cmpnd:true:true:true:true:false:false:false:false
+
+analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
+directory=FSDirectory
+#directory=RamDirectory
+
+doc.stored=true
+doc.tokenized=true
+doc.term.vector=false
+doc.add.log.step=2000
+
+docs.dir=reuters-out
+#docs.dir=reuters-111
+
+#doc.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker
+doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker
+
+#query.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker
+query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker
+
+# task at this depth or less would print when they start
+task.max.depth.log=2
+
+log.queries=true
+# -------------------------------------------------------------------------------------
+
+{ "Rounds"
+
+    ResetSystemErase
+
+    { "Populate"
+        CreateIndex
+        { "MAddDocs" AddDoc } : 20000
+        Optimize
+        CloseIndex
+    }
+
+    OpenReader  
+    { "SearchSameRdr" Search > : 5000
+    CloseReader 
+                
+    { "WarmNewRdr" Warm > : 50
+                
+    { "SrchNewRdr" Search > : 500
+                
+    { "SrchTrvNewRdr" SearchTrav > : 300
+                
+    { "SrchTrvRetNewRdr" SearchTravRet > : 100
+                
+    OpenReader  
+    [ "SearchSameRdr" Search > : 5000 : 2500
+    CloseReader 
+                
+    [ "WarmNewRdr" Warm > : 50 : 25
+                
+    [ "SrchNewRdr" Search > : 50 : 25
+                
+    [ "SrchTrvNewRdr" SearchTrav > : 300 : 150
+                
+    [ "SrchTrvRetNewRdr" SearchTravRet > : 100 : 50
+
+    RepSumByPref MAddDocs
+
+    NewRound
+
+} : 8
+
+RepSumByNameRound
+RepSumByName
+RepSumByPrefRound MAddDocs

Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java?view=auto&rev=495834
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java Fri Jan 12 20:08:23 2007
@@ -0,0 +1,91 @@
+package org.apache.lucene.benchmark.byTask;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+
+import org.apache.lucene.benchmark.byTask.utils.Algorithm;
+import org.apache.lucene.benchmark.byTask.utils.Config;
+
+
+/**
+ * Run the benchmark algorithm.
+ * <p>Usage: java Benchmark  algorithm-file
+ * <ol>
+ * <li>Read algorithm.
+ * <li> Run the algorithm.
+ * </ol>
+ */
+public class Benchmark {
+
+  /**
+   * Run the benchmark algorithm.
+   * @param args benchmark config and algorithm files
+   */
+  public static void main(String[] args) {
+    // verify command line args
+    if (args.length < 1) {
+      System.err.println("Usage: java Benchmark <algorithm file>");
+      System.exit(1);
+    }
+    
+    // verify input files 
+    File algFile = new File(args[0]);
+    if (!algFile.exists() || !algFile.isFile() || !algFile.canRead()) {
+      System.err.println("cannot find/read algorithm file: "+algFile.getAbsolutePath()); 
+      System.exit(1);
+    }
+    
+    // last preparations
+    PerfRunData runData = null;
+    try {
+      runData = new PerfRunData(new Config(algFile));
+    } catch (Exception e) {
+      System.err.println("Error: cannot init PerfRunData: "+e.getMessage());
+      e.printStackTrace();
+      System.exit(1);
+    }
+    
+    // parse algorithm
+    Algorithm algorithm = null;
+    try {
+      algorithm = new Algorithm(runData);
+    } catch (Exception e) {
+      System.err.println("Error: cannot understand algorithm from file: "+algFile.getAbsolutePath());
+      e.printStackTrace();
+      System.exit(1);
+    }
+
+    System.out.println("------------> algorithm:");
+    System.out.println(algorithm.toString());
+
+    // execute
+    try {
+      algorithm.execute();
+    } catch (Exception e) {
+      System.err.println("Error: cannot execute the algorithm! "+e.getMessage());
+      e.printStackTrace();
+    }
+
+    System.out.println("####################");
+    System.out.println("###  D O N E !!! ###");
+    System.out.println("####################");
+
+  }
+
+}

Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java?view=auto&rev=495834
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java Fri Jan 12 20:08:23 2007
@@ -0,0 +1,225 @@
+package org.apache.lucene.benchmark.byTask;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
+import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
+import org.apache.lucene.benchmark.byTask.stats.Points;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.benchmark.byTask.utils.Config;
+import org.apache.lucene.benchmark.byTask.utils.FileUtils;
+
+
+/**
+ * Data maintained by a performance test run.
+ * <p>
+ * Data includes:
+ * <ul>
+ *  <li>Configuration.
+ *  <li>Directory, Writer, Reader.
+ *  <li>Docmaker and a few instances of QueryMaker.
+ *  <li>Analyzer.
+ *  <li>Statistics data which updated during the run.
+ * </ul>
+ */
+public class PerfRunData {
+
+  private Points points;
+  
+  // objects used during performance test run
+  // directory, analyzer, docMaker - created at startup.
+  // reader, writer, searcher - maintained by basic tasks. 
+  private Directory directory;
+  private Analyzer analyzer;
+  private DocMaker docMaker;
+  private QueryMaker searchQueryMaker;
+  private QueryMaker searchTravQueryMaker;
+  private QueryMaker searchTravRetQueryMaker;
+
+  private IndexReader indexReader;
+  private IndexWriter indexWriter;
+  private Config config;
+  
+  // constructor
+  public PerfRunData (Config config) throws Exception {
+    this.config = config;
+    // analyzer (default is standard analyzer)
+    analyzer = (Analyzer) Class.forName(config.get("analyzer",
+        "org.apache.lucene.analysis.standard.StandardAnalyzer")).newInstance();
+    // doc maker
+    docMaker = (DocMaker) Class.forName(config.get("doc.maker",
+        "org.apache.lucene.benchmark.byTask.utils.SimpleDocMaker")).newInstance();
+    docMaker.setConfig(config);
+    // query makers
+    // we use separate (identical) instances for each "read" task type, so each can iterate the quries separately.
+    Class qmkrClass = Class.forName(config.get("query.maker","org.apache.lucene.benchmark.byTask.utils.SimpleQueryMaker"));
+    searchQueryMaker = (QueryMaker) qmkrClass.newInstance();
+    searchQueryMaker.setConfig(config);
+    searchTravQueryMaker = (QueryMaker) qmkrClass.newInstance();
+    searchTravQueryMaker.setConfig(config);
+    searchTravRetQueryMaker = (QueryMaker) qmkrClass.newInstance();
+    searchTravRetQueryMaker.setConfig(config);
+    // index stuff
+    reinit(false);
+    
+    // statistic points
+    points = new Points(config);
+    
+    if (Boolean.valueOf(config.get("log.queries","false")).booleanValue()) {
+      System.out.println("------------> queries:");
+      System.out.println(getSearchQueryMaker().printQueries());
+    }
+
+  }
+
+  // clean old stuff, reopen 
+  public void reinit(boolean eraseIndex) throws Exception {
+
+    // cleanup index
+    if (indexWriter!=null) {
+      indexWriter.close();
+      indexWriter = null;
+    }
+    if (indexReader!=null) {
+      indexReader.close();
+      indexReader = null;
+    }
+    if (directory!=null) {
+      directory.close();
+    }
+    
+    // directory (default is ram-dir).
+    if ("FSDirectory".equals(config.get("directory","RAMDirectory"))) {
+      File workDir = new File("work");
+      File indexDir = new File(workDir,"index");
+      if (eraseIndex && indexDir.exists()) {
+        FileUtils.fullyDelete(indexDir);
+      }
+      indexDir.mkdirs();
+      directory = FSDirectory.getDirectory(indexDir, eraseIndex);
+    } else {
+      directory = new RAMDirectory();
+    }
+
+    // inputs
+    resetInputs();
+    
+    // release unused stuff
+    System.runFinalization();
+    System.gc();
+  }
+
+  /**
+   * @return Returns the points.
+   */
+  public Points getPoints() {
+    return points;
+  }
+
+  /**
+   * @return Returns the directory.
+   */
+  public Directory getDirectory() {
+    return directory;
+  }
+
+  /**
+   * @param directory The directory to set.
+   */
+  public void setDirectory(Directory directory) {
+    this.directory = directory;
+  }
+
+  /**
+   * @return Returns the indexReader.
+   */
+  public IndexReader getIndexReader() {
+    return indexReader;
+  }
+
+  /**
+   * @param indexReader The indexReader to set.
+   */
+  public void setIndexReader(IndexReader indexReader) {
+    this.indexReader = indexReader;
+  }
+
+  /**
+   * @return Returns the indexWriter.
+   */
+  public IndexWriter getIndexWriter() {
+    return indexWriter;
+  }
+
+  /**
+   * @param indexWriter The indexWriter to set.
+   */
+  public void setIndexWriter(IndexWriter indexWriter) {
+    this.indexWriter = indexWriter;
+  }
+
+  /**
+   * @return Returns the anlyzer.
+   */
+  public Analyzer getAnalyzer() {
+    return analyzer;
+  }
+
+  /**
+   * @return Returns the docMaker.
+   */
+  public DocMaker getDocMaker() {
+    return docMaker;
+  }
+
+  /**
+   * @return Returns the config.
+   */
+  public Config getConfig() {
+    return config;
+  }
+
+  public void resetInputs() {
+    docMaker.resetInputs();
+    searchQueryMaker.resetInputs();
+    searchTravQueryMaker.resetInputs();
+    searchTravRetQueryMaker.resetInputs();
+  }
+
+  /**
+   * @return Returns the searchQueryMaker.
+   */
+  public QueryMaker getSearchQueryMaker() {
+    return searchQueryMaker;
+  }
+
+  public QueryMaker getSearchTravQueryMaker() {
+    return searchTravQueryMaker;
+  }
+
+  public QueryMaker getSearchTravRetQueryMaker() {
+    return searchTravRetQueryMaker;
+  }
+
+}

Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java?view=auto&rev=495834
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java Fri Jan 12 20:08:23 2007
@@ -0,0 +1,64 @@
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.benchmark.byTask.utils.Config;
+
+
+/**
+ * Create documents for the test.
+ * <br>Each call to makeDocument would create the next document.
+ * When input is exhausted, the DocMaker iterates over the input again, 
+ * does provifing a source for unlimited number of documents, 
+ * though not all of them are unique. 
+ */
+public interface DocMaker {
+
+  /** 
+   * Create the next document, of the given size by input bytes.
+   * If the implementation does not support control over size, an exception is thrown.
+   * @param size size of document, or 0 if there is no size requirement.
+   * @exception if cannot make the document, or if size>0 was specified but this feature is not supported.
+   */ 
+  public Document makeDocument (int size) throws Exception;
+
+  /** Create the next document. */
+  public Document makeDocument () throws Exception;
+
+  /** Set the properties */
+  public void setConfig (Config config);
+  
+  /** Reset inputs so that the test run would behave, input wise, as if it just started. */
+  public void resetInputs();
+  
+  /** Return how many real unique texts are available, 0 if not applicable. */ 
+  public int numUniqueTexts();
+  
+  /** Return total bytes of all available unique texts, 0 if not applicable */ 
+  public long numUniqueBytes();
+
+  /** Return number of docs made since last reset. */
+  public int getCount();
+
+  /** Return total byte size of docs made since last reset. */
+  public long getByteCount();
+
+  /** Print some statistics on docs available/added/etc. */ 
+  public void printDocStatistics();
+}

Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/QueryMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/QueryMaker.java?view=auto&rev=495834
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/QueryMaker.java (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/QueryMaker.java Fri Jan 12 20:08:23 2007
@@ -0,0 +1,49 @@
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import org.apache.lucene.search.Query;
+import org.apache.lucene.benchmark.byTask.utils.Config;
+
+
+/**
+ * Create queries for the test.
+ */
+public interface QueryMaker {
+
+  /** 
+   * Create the next query, of the given size.
+   * @param size the size of the query - number of terms, etc.
+   * @exception if cannot make the query, or if size>0 was specified but this feature is not supported.
+   */ 
+  public Query makeQuery (int size) throws Exception;
+
+  /** Create the next query */ 
+  public Query makeQuery () throws Exception;
+
+  /** Set the properties 
+   * @throws Exception */
+  public void setConfig (Config config) throws Exception;
+  
+  /** Reset inputs so that the test run would behave, input wise, as if it just started. */
+  public void resetInputs();
+  
+  /** Print the queries */
+  public String printQueries();
+}

Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java?view=auto&rev=495834
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java Fri Jan 12 20:08:23 2007
@@ -0,0 +1,156 @@
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Date;
+import org.apache.lucene.document.DateTools;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.benchmark.byTask.utils.Config;
+
+
+/**
+ * A DocMaker using the Reuters collection for its input.
+ */
+public class ReutersDocMaker extends SimpleDocMaker {
+
+  private DateFormat dateFormat;
+  private File dataDir = null;
+  private ArrayList txtFiles = new ArrayList();
+  private int nextFile = 0;
+  private int round=0;
+  private int count = 0;
+  
+  /* (non-Javadoc)
+   * @see SimpleDocMaker#setConfig(java.util.Properties)
+   */
+  public void setConfig(Config config) {
+    super.setConfig(config);
+    String d = config.get("docs.dir","reuters-out");
+    dataDir = new File(new File("work"),d);
+    addFiles(dataDir);
+    if (txtFiles.size()==0) {
+      throw new RuntimeException("No txt files in dataDir: "+dataDir.getAbsolutePath());
+    }
+    // date format: 30-MAR-1987 14:22:36.87
+    dateFormat = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS");
+    dateFormat.setLenient(true);
+  }
+
+  private void addFiles(File f) {
+    if (!f.canRead()) {
+      return;
+    }
+    if (f.isDirectory()) {
+      File files[] = f.listFiles();
+      for (int i = 0; i < files.length; i++) {
+        addFiles(files[i]);
+      }
+      return;
+    }
+    txtFiles.add(f);
+    addUniqueBytes(f.length());
+  }
+
+  /* (non-Javadoc)
+   * @see SimpleDocMaker#makeDocument()
+   */
+  public Document makeDocument() throws Exception {
+    File f = null;
+    String name = null;
+    synchronized (this) {
+      f = (File) txtFiles.get(nextFile++);
+      name = f.getCanonicalPath()+"_"+round;
+      if (nextFile >= txtFiles.size()) { 
+        // exhausted files, start a new round
+        nextFile = 0;
+        round++;
+      }
+    }
+    
+    Document doc = new Document();
+    doc.add(new Field("name",name,storeVal,indexVal,termVecVal));
+    BufferedReader reader = new BufferedReader(new FileReader(f));
+    String line = null;
+    //First line is the date, 3rd is the title, rest is body
+    String dateStr = reader.readLine();
+    reader.readLine();//skip an empty line
+    String title = reader.readLine();
+    reader.readLine();//skip an empty line
+    StringBuffer body = new StringBuffer(1024);
+    while ((line = reader.readLine()) != null) {
+      body.append(line).append(' ');
+    }
+    Date date = dateFormat.parse(dateStr.trim());
+    doc.add(new Field("date", DateTools.dateToString(date, DateTools.Resolution.SECOND), 
+        Field.Store.YES, Field.Index.UN_TOKENIZED));
+
+    if (title != null) {
+      doc.add(new Field("title", title, storeVal,indexVal,termVecVal));
+    }
+    if (body.length() > 0) {
+        doc.add(new Field("body", body.toString(), storeVal,indexVal,termVecVal));
+    }
+
+    count++;
+    addBytes(f.length());
+
+    return doc;
+  }
+
+  /*
+   *  (non-Javadoc)
+   * @see DocMaker#resetIinputs()
+   */
+  public synchronized void resetInputs() {
+    super.resetInputs();
+    nextFile = 0;
+    round = 0;
+    count = 0;
+  }
+
+  /*
+   *  (non-Javadoc)
+   * @see DocMaker#numUniqueTexts()
+   */
+  public int numUniqueTexts() {
+    return txtFiles.size();
+  }
+
+  /*
+   *  (non-Javadoc)
+   * @see DocMaker#getCount()
+   */
+  public int getCount() {
+    return count;
+  }
+
+  /*
+   *  (non-Javadoc)
+   * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#makeDocument(int)
+   */
+  public Document makeDocument(int size) throws Exception {
+    throw new Exception(this+".makeDocument (int size) is not supported!");
+  }
+}

Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersQueryMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersQueryMaker.java?view=auto&rev=495834
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersQueryMaker.java (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersQueryMaker.java Fri Jan 12 20:08:23 2007
@@ -0,0 +1,160 @@
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.WildcardQuery;
+import org.apache.lucene.search.spans.SpanFirstQuery;
+import org.apache.lucene.search.spans.SpanNearQuery;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.apache.lucene.benchmark.byTask.utils.Config;
+import org.apache.lucene.benchmark.byTask.utils.Format;
+
+
+/**
+ * A QueryMaker that makes queries devised manually (by Grant Ingersoll) for
+ * searching in the Reuters collection.
+ */
+public class ReutersQueryMaker implements QueryMaker {
+  
+  private int qnum = 0;
+  private Query queries[];
+  private Config config;
+  
+  private static String [] STANDARD_QUERIES = {
+    //Start with some short queries
+    "Salomon", "Comex", "night trading", "Japan Sony",
+    //Try some Phrase Queries
+    "\"Sony Japan\"", "\"food needs\"~3",
+    "\"World Bank\"^2 AND Nigeria", "\"World Bank\" -Nigeria",
+    "\"Ford Credit\"~5",
+    //Try some longer queries
+    "airline Europe Canada destination",
+    "Long term pressure by trade " +
+    "ministers is necessary if the current Uruguay round of talks on " +
+    "the General Agreement on Trade and Tariffs (GATT) is to " +
+    "succeed"
+  };
+  
+  private static Query[] getPrebuiltQueries(String field) {
+    //  be wary of unanalyzed text
+    return new Query[] {
+        new SpanFirstQuery(new SpanTermQuery(new Term(field, "ford")), 5),
+        new SpanNearQuery(new SpanQuery[]{new SpanTermQuery(new Term(field, "night")), new SpanTermQuery(new Term(field, "trading"))}, 4, false),
+        new SpanNearQuery(new SpanQuery[]{new SpanFirstQuery(new SpanTermQuery(new Term(field, "ford")), 10), new SpanTermQuery(new Term(field, "credit"))}, 10, false),
+        new WildcardQuery(new Term(field, "fo*")),
+    };
+  }
+  
+  /**
+   * Parse the strings containing Lucene queries.
+   *
+   * @param qs array of strings containing query expressions
+   * @param a  analyzer to use when parsing queries
+   * @return array of Lucene queries
+   */
+  private static Query[] createQueries(List qs, Analyzer a) {
+    QueryParser qp = new QueryParser("body", a);
+    List queries = new ArrayList();
+    for (int i = 0; i < qs.size(); i++)  {
+      try {
+        
+        Object query = qs.get(i);
+        Query q = null;
+        if (query instanceof String) {
+          q = qp.parse((String) query);
+          
+        } else if (query instanceof Query) {
+          q = (Query) query;
+          
+        } else {
+          System.err.println("Unsupported Query Type: " + query);
+        }
+        
+        if (q != null) {
+          queries.add(q);
+        }
+        
+      } catch (Exception e)  {
+        e.printStackTrace();
+      }
+    }
+    
+    return (Query[]) queries.toArray(new Query[0]);
+  }
+  
+  private void prepareQueries() throws Exception {
+    // analyzer (default is standard analyzer)
+    Analyzer anlzr= (Analyzer) Class.forName(config.get("analyzer",
+    "org.apache.lucene.analysis.StandardAnalyzer")).newInstance(); 
+    
+    List queryList = new ArrayList(20);
+    queryList.addAll(Arrays.asList(STANDARD_QUERIES));
+    queryList.addAll(Arrays.asList(getPrebuiltQueries("body")));
+    queries = createQueries(queryList, anlzr);
+  }
+  
+  public Query makeQuery() throws Exception {
+    return queries[nextQnum()];
+  }
+  
+  public void setConfig(Config config) throws Exception {
+    this.config = config;
+    prepareQueries();
+  }
+  
+  public void resetInputs() {
+    qnum = 0;
+  }
+  
+  // return next qnum
+  private synchronized int nextQnum() {
+    int res = qnum;
+    qnum = (qnum+1) % queries.length;
+    return res;
+  }
+  
+  public String printQueries() {
+    String newline = System.getProperty("line.separator");
+    StringBuffer sb = new StringBuffer();
+    if (queries != null) {
+      for (int i = 0; i < queries.length; i++) {
+        sb.append(i+". "+Format.simpleName(queries[i].getClass())+" - "+queries[i].toString());
+        sb.append(newline);
+      }
+    }
+    return sb.toString();
+  }
+  
+  /*
+   *  (non-Javadoc)
+   * @see org.apache.lucene.benchmark.byTask.feeds.QueryMaker#makeQuery(int)
+   */
+  public Query makeQuery(int size) throws Exception {
+    throw new Exception(this+".makeQuery(int size) is not supported!");
+  }
+
+}

Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java?view=auto&rev=495834
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java Fri Jan 12 20:08:23 2007
@@ -0,0 +1,227 @@
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.benchmark.byTask.utils.Config;
+import org.apache.lucene.benchmark.byTask.utils.Format;
+
+
+/**
+ * Create documents for the test
+ */
+public class SimpleDocMaker implements DocMaker {
+  
+  static final String BODY_FIELD = "body";
+  private int docID = 0;
+  private long numBytes = 0;
+  private long numUniqueBytes = 0;
+
+  protected Config config;
+  private int nextDocTextPosition = 0; // for creating docs of fixed size.
+
+  protected Field.Store storeVal = Field.Store.NO;
+  protected Field.Index indexVal = Field.Index.TOKENIZED;
+  protected Field.TermVector termVecVal = Field.TermVector.NO;
+  
+  static final String DOC_TEXT = // from a public first aid info at http://firstaid.ie.eu.org 
+    "Well it may be a little dramatic but sometimes it true. " +
+    "If you call the emergency medical services to an incident, " +
+    "your actions have started the chain of survival. " +
+    "You have acted to help someone you may not even know. " +
+    "First aid is helping, first aid is making that call, " +
+    "putting a Band-Aid on a small wound, controlling bleeding in large " +
+    "wounds or providing CPR for a collapsed person whose not breathing " +
+    "and heart has stopped beating. You can help yourself, your loved " +
+    "ones and the stranger whose life may depend on you being in the " +
+    "right place at the right time with the right knowledge.";
+  
+  private static int DOC_TEXT_LENGTH = DOC_TEXT.length(); 
+
+  /*
+   *  (non-Javadoc)
+   * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#makeDocument()
+   */
+  public Document makeDocument () throws Exception {
+    return makeDocument(0);
+  }
+
+  /*
+   *  (non-Javadoc)
+   * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#makeDocument(int)
+   */
+  public Document makeDocument(int size) throws Exception {
+    int docid = newdocid();
+    Document doc = new Document();
+    doc.add(new Field("docid", "doc"+docid, storeVal, indexVal, termVecVal));
+    String docText = createDocText(size);
+    doc.add(new Field(BODY_FIELD, "synthetic body text"+docid+" "+docText, storeVal, indexVal, termVecVal));
+    addBytes(docText.length()); // should multiply by 2 here?
+    return doc;
+  }
+
+  private synchronized int[] nextDocText(int fixedDocSize) {
+    int from = nextDocTextPosition;
+    int to = nextDocTextPosition;
+    int wraps = 0;
+    int size = 0;
+    
+    while (size<fixedDocSize) {
+      int added = DOC_TEXT_LENGTH - to;
+      if (size+added <= fixedDocSize) {
+        to = 0;
+        size += added;
+        wraps ++;
+      } else {
+        added = fixedDocSize - size;
+        size += added;
+        to += added;
+      }
+    }
+    
+    nextDocTextPosition = to;
+    
+    return new int[]{from,to,wraps};
+  }
+  
+  private String createDocText(int fixedDocSize) {
+    if (fixedDocSize<=0) { 
+      //no fixed doc size requirement
+      return DOC_TEXT;
+    } 
+      
+    // create a document wit fixed doc size
+    int fromToWraps[] = nextDocText(fixedDocSize);
+    int from = fromToWraps[0];
+    int to = fromToWraps[1];
+    int wraps = fromToWraps[2];
+    StringBuffer sb = new StringBuffer();
+    while (wraps-- > 0) {
+      sb.append(DOC_TEXT.substring(from));
+      from = 0;
+    }
+    sb.append(DOC_TEXT.substring(from,to));
+    return sb.toString();
+  }
+
+  // return a new docid
+  private synchronized int newdocid() {
+    return docID++;
+  }
+
+  /* (non-Javadoc)
+   * @see DocMaker#setConfig(java.util.Properties)
+   */
+  public void setConfig(Config config) {
+    this.config = config;
+    boolean stored = config.get("doc.stored",false); 
+    boolean tokenized = config.get("doc.tokenized",true);
+    boolean termVec = config.get("doc.term.vector",false);
+    storeVal = (stored ? Field.Store.YES : Field.Store.NO);
+    indexVal = (tokenized ? Field.Index.TOKENIZED : Field.Index.UN_TOKENIZED);
+    termVecVal = (termVec ? Field.TermVector.YES : Field.TermVector.NO);
+  }
+
+  /*
+   *  (non-Javadoc)
+   * @see DocMaker#resetIinputs()
+   */
+  public synchronized void resetInputs() {
+    printDocStatistics();
+    docID = 0;
+    numBytes = 0;
+  }
+
+  /*
+   *  (non-Javadoc)
+   * @see DocMaker#numUniqueTexts()
+   */
+  public int numUniqueTexts() {
+    return 0; // not applicable
+  }
+
+  /*
+   *  (non-Javadoc)
+   * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#numUniqueBytes()
+   */
+  public long numUniqueBytes() {
+    return numUniqueBytes;
+  }
+
+  /*
+   *  (non-Javadoc)
+   * @see DocMaker#getCount()
+   */
+  public int getCount() {
+    return docID;
+  }
+
+  /*
+   *  (non-Javadoc)
+   * @see DocMaker#getByteCount()
+   */
+  public long getByteCount() {
+    return numBytes;
+  }
+
+  protected void addUniqueBytes (long n) {
+    numUniqueBytes += n;
+  }
+  
+  protected void addBytes (long n) {
+    numBytes += n;
+  }
+
+  /*
+   *  (non-Javadoc)
+   * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#printDocStatistics()
+   */
+  private int lastPrintedNumUniqueTexts = 0;
+  private long lastPrintedNumUniqueBytes = 0;
+  private int printNum = 0;
+  public void printDocStatistics() {
+    boolean print = false;
+    String col = "                  ";
+    StringBuffer sb = new StringBuffer();
+    String newline = System.getProperty("line.separator");
+    sb.append("------------> ").append(Format.simpleName(getClass())).append(" statistics (").append(printNum).append("): ").append(newline);
+    int nut = numUniqueTexts();
+    if (nut > lastPrintedNumUniqueTexts) {
+      print = true;
+      sb.append("total bytes of unique texts: ").append(Format.format(0,nut,col)).append(newline);
+      lastPrintedNumUniqueTexts = nut;
+    }
+    long nub = numUniqueBytes();
+    if (nub > lastPrintedNumUniqueBytes) {
+      print = true;
+      sb.append("total bytes of unique texts: ").append(Format.format(0,nub,col)).append(newline);
+      lastPrintedNumUniqueBytes = nub;
+    }
+    if (getCount()>0) {
+      print = true;
+      sb.append("num files added since last inputs reset:   ").append(Format.format(0,getCount(),col)).append(newline);
+      sb.append("total bytes added since last inputs reset: ").append(Format.format(0,getByteCount(),col)).append(newline);
+    }
+    if (print) {
+      System.out.println(sb.append(newline).toString());
+      printNum++;
+    }
+  }
+
+}

Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleQueryMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleQueryMaker.java?view=auto&rev=495834
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleQueryMaker.java (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleQueryMaker.java Fri Jan 12 20:08:23 2007
@@ -0,0 +1,113 @@
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.benchmark.byTask.utils.Config;
+import org.apache.lucene.benchmark.byTask.utils.Format;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.BooleanClause.Occur;
+
+/**
+ * A QueryMaker that makes queries for a collection created 
+ * using {@link org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker}.
+ */
+public class SimpleQueryMaker implements QueryMaker {
+
+  private int qnum = 0;
+  private Query queries[];
+  private Config config;
+  
+  /**
+   * Prepare the queries for this test.
+   * Extending classes can overide this method for preparing different queries. 
+   * @return prepared queries.
+   * @throws Exception if canot prepare the queries.
+   */
+  protected Query[] prepareQueries() throws Exception {
+    // analyzer (default is standard analyzer)
+    Analyzer anlzr= (Analyzer) Class.forName(config.get("analyzer",
+        "org.apache.lucene.analysis.StandardAnalyzer")).newInstance(); 
+    
+    QueryParser qp = new QueryParser("body",anlzr);
+    ArrayList qq = new ArrayList();
+    Query q1 = new TermQuery(new Term("docid","doc2"));
+    qq.add(q1);
+    Query q2 = new TermQuery(new Term("body","simple"));
+    qq.add(q2);
+    BooleanQuery bq = new BooleanQuery();
+    bq.add(q1,Occur.MUST);
+    bq.add(q2,Occur.MUST);
+    qq.add(bq);
+    qq.add(qp.parse("synthetic body"));
+    qq.add(qp.parse("\"synthetic body\""));
+    qq.add(qp.parse("synthetic text"));
+    qq.add(qp.parse("\"synthetic text\""));
+    qq.add(qp.parse("\"synthetic text\"~3"));
+    qq.add(qp.parse("zoom*"));
+    qq.add(qp.parse("synth*"));
+    return (Query []) qq.toArray(new Query[0]);
+  }
+
+  public Query makeQuery() throws Exception {
+    return queries[nextQnum()];
+  }
+
+  public void setConfig(Config config) throws Exception {
+    this.config = config;
+    queries = prepareQueries();
+  }
+
+  public void resetInputs() {
+    qnum = 0;
+  }
+
+  // return next qnum
+  private synchronized int nextQnum() {
+    int res = qnum;
+    qnum = (qnum+1) % queries.length;
+    return res;
+  }
+
+  public String printQueries() {
+    String newline = System.getProperty("line.separator");
+    StringBuffer sb = new StringBuffer();
+    if (queries != null) {
+      for (int i = 0; i < queries.length; i++) {
+        sb.append(i+". "+Format.simpleName(queries[i].getClass())+" - "+queries[i].toString());
+        sb.append(newline);
+      }
+    }
+    return sb.toString();
+  }
+
+  /*
+   *  (non-Javadoc)
+   * @see org.apache.lucene.benchmark.byTask.feeds.QueryMaker#makeQuery(int)
+   */
+  public Query makeQuery(int size) throws Exception {
+    throw new Exception(this+".makeQuery(int size) is not supported!");
+  }
+
+}

Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleSloppyPhraseQueryMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleSloppyPhraseQueryMaker.java?view=auto&rev=495834
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleSloppyPhraseQueryMaker.java (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleSloppyPhraseQueryMaker.java Fri Jan 12 20:08:23 2007
@@ -0,0 +1,65 @@
+package org.apache.lucene.benchmark.byTask.feeds;
+
+import java.util.ArrayList;
+import java.util.StringTokenizer;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.Query;
+
+/**
+ * Create sloppy phrase queries for performance test, in an index created using simple doc maker.
+ */
+public class SimpleSloppyPhraseQueryMaker extends SimpleQueryMaker {
+
+  /* (non-Javadoc)
+   * @see org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker#prepareQueries()
+   */
+  protected Query[] prepareQueries() throws Exception {
+    // exatract some 100 words from doc text to an array
+    String words[];
+    ArrayList w = new ArrayList();
+    StringTokenizer st = new StringTokenizer(SimpleDocMaker.DOC_TEXT);
+    while (st.hasMoreTokens() && w.size()<100) {
+      w.add(st.nextToken());
+    }
+    words = (String[]) w.toArray(new String[0]);
+
+    // create queries (that would find stuff) with varying slops
+    ArrayList queries = new ArrayList(); 
+    for (int slop=0; slop<8; slop++) {
+      for (int qlen=2; qlen<6; qlen++) {
+        for (int wd=0; wd<words.length-qlen-slop; wd++) {
+          // ordered
+          int remainedSlop = slop;
+          PhraseQuery q = new PhraseQuery();
+          q.setSlop(slop);
+          int wind = wd;
+          for (int i=0; i<qlen; i++) {
+            q.add(new Term(SimpleDocMaker.BODY_FIELD,words[wind++]));
+            if (remainedSlop>0) {
+              remainedSlop--;
+              wind++;
+            }
+          }
+          queries.add(q);
+          // reveresed
+          remainedSlop = slop;
+          q = new PhraseQuery();
+          q.setSlop(slop+2*qlen);
+          wind = wd+qlen+remainedSlop-1;
+          for (int i=0; i<qlen; i++) {
+            q.add(new Term(SimpleDocMaker.BODY_FIELD,words[wind--]));
+            if (remainedSlop>0) {
+              remainedSlop--;
+              wind--;
+            }
+          }
+          queries.add(q);
+        }
+      }
+    }
+    return (Query[]) queries.toArray(new Query[0]);
+  }
+
+}

Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/package.html
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/package.html?view=auto&rev=495834
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/package.html (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/package.html Fri Jan 12 20:08:23 2007
@@ -0,0 +1,6 @@
+<html>
+<body>
+Sources for benchmark inputs: documents and queries.
+</body>
+
+</html>



Mime
View raw message