lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dsmi...@apache.org
Subject svn commit: r1536176 - in /lucene/dev/trunk: dev-tools/idea/lucene/benchmark/src/ dev-tools/idea/lucene/spatial/ dev-tools/maven/lucene/benchmark/ lucene/benchmark/ lucene/benchmark/conf/ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/ lu...
Date Sun, 27 Oct 2013 18:17:45 GMT
Author: dsmiley
Date: Sun Oct 27 18:17:45 2013
New Revision: 1536176

URL: http://svn.apache.org/r1536176
Log:
LUCENE-2844: spatial benchmark

Added:
    lucene/dev/trunk/lucene/benchmark/conf/spatial.alg   (with props)
    lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/GeonamesLineParser.java
  (with props)
    lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialDocMaker.java
  (with props)
    lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialFileQueryMaker.java
  (with props)
Modified:
    lucene/dev/trunk/dev-tools/idea/lucene/benchmark/src/benchmark.iml
    lucene/dev/trunk/dev-tools/idea/lucene/spatial/spatial.iml
    lucene/dev/trunk/dev-tools/maven/lucene/benchmark/pom.xml.template
    lucene/dev/trunk/lucene/benchmark/build.xml
    lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html

Modified: lucene/dev/trunk/dev-tools/idea/lucene/benchmark/src/benchmark.iml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/dev-tools/idea/lucene/benchmark/src/benchmark.iml?rev=1536176&r1=1536175&r2=1536176&view=diff
==============================================================================
--- lucene/dev/trunk/dev-tools/idea/lucene/benchmark/src/benchmark.iml (original)
+++ lucene/dev/trunk/dev-tools/idea/lucene/benchmark/src/benchmark.iml Sun Oct 27 18:17:45
2013
@@ -24,6 +24,7 @@
     <orderEntry type="library" scope="TEST" name="JUnit" level="project" />
     <orderEntry type="module" scope="TEST" module-name="lucene-test-framework" />
     <orderEntry type="module" scope="TEST" module-name="benchmark-conf" />
+    <orderEntry type="module" module-name="spatial" />
     <orderEntry type="module" module-name="facet" />
     <orderEntry type="module" module-name="highlighter" />
     <orderEntry type="module" module-name="icu" />

Modified: lucene/dev/trunk/dev-tools/idea/lucene/spatial/spatial.iml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/dev-tools/idea/lucene/spatial/spatial.iml?rev=1536176&r1=1536175&r2=1536176&view=diff
==============================================================================
--- lucene/dev/trunk/dev-tools/idea/lucene/spatial/spatial.iml (original)
+++ lucene/dev/trunk/dev-tools/idea/lucene/spatial/spatial.iml Sun Oct 27 18:17:45 2013
@@ -11,7 +11,7 @@
     </content>
     <orderEntry type="inheritedJdk" />
     <orderEntry type="sourceFolder" forTests="false" />
-    <orderEntry type="module-library">
+    <orderEntry type="module-library" exported="">
       <library>
         <CLASSES>
           <root url="file://$MODULE_DIR$/lib" />

Modified: lucene/dev/trunk/dev-tools/maven/lucene/benchmark/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/dev-tools/maven/lucene/benchmark/pom.xml.template?rev=1536176&r1=1536175&r2=1536176&view=diff
==============================================================================
--- lucene/dev/trunk/dev-tools/maven/lucene/benchmark/pom.xml.template (original)
+++ lucene/dev/trunk/dev-tools/maven/lucene/benchmark/pom.xml.template Sun Oct 27 18:17:45
2013
@@ -80,6 +80,11 @@
       <version>${project.version}</version>
     </dependency>
     <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>lucene-spatial</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
       <groupId>com.ibm.icu</groupId>
       <artifactId>icu4j</artifactId>
     </dependency>

Modified: lucene/dev/trunk/lucene/benchmark/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/benchmark/build.xml?rev=1536176&r1=1536175&r2=1536176&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/benchmark/build.xml (original)
+++ lucene/dev/trunk/lucene/benchmark/build.xml Sun Oct 27 18:17:45 2013
@@ -37,6 +37,8 @@
         <available file="temp/20news-18828.tar.gz" property="20news-18828.exists"/>
         <available file="${working.dir}/20news-18828" property="20news-18828.expanded"/>
         <available file="${working.dir}/mini_newsgroups" property="mini.expanded"/>
+        <available file="temp/allCountries.txt.bz2" property="geonames.exists"/>
+        <available file="${working.dir}/geonames" property="geonames.expanded"/>
         
         <available file="temp/enwiki-20070527-pages-articles.xml.bz2" property="enwiki.exists"/>
         <available file="temp/enwiki-20070527-pages-articles.xml" property="enwiki.expanded"/>
@@ -62,6 +64,25 @@
         <bunzip2 src="temp/enwiki-20070527-pages-articles.xml.bz2" dest="temp"/>
     </target>
 
+    <target name="geonames-files" depends="check-files">
+        <mkdir dir="temp"/>
+        <antcall target="get-geonames"/>
+        <antcall target="expand-geonames"/>
+    </target>
+
+    <target name="get-geonames" unless="geonames.exists">
+        <!-- note: latest data is at: http://download.geonames.org/export/dump/allCountries.zip
+         and then randomize with: gsort -R -S 1500M file.txt > file_random.txt
+         and then compress with: bzip2 -9 -k file_random.txt -->
+        <get src="http://people.apache.org/~dsmiley/data/geonames_20130921_randomOrder_allCountries.txt.bz2"
+             dest="temp/allCountries.txt.bz2"/>
+    </target>
+
+    <target name="expand-geonames" unless="geonames.expanded">
+        <mkdir dir="${working.dir}/geonames"/>
+        <bunzip2 src="temp/allCountries.txt.bz2" dest="${working.dir}/geonames"/>
+    </target>
+
     <target name="get-news-20" unless="20news-18828.exists">
         <get src="http://www-2.cs.cmu.edu/afs/cs.cmu.edu/project/theo-20/www/data/news20.tar.gz"
              dest="temp/news20.tar.gz"/>
@@ -147,8 +168,10 @@
       <pathelement path="${analyzers-common.jar}"/>
       <pathelement path="${queryparser.jar}"/>
       <pathelement path="${facet.jar}"/>
+      <pathelement path="${spatial.jar}"/>
       <pathelement path="${queries.jar}"/>
       <fileset dir="${common.dir}/analysis/icu/lib"/>
+      <fileset dir="${common.dir}/spatial/lib"/>
       <path refid="base.classpath"/>
       <fileset dir="lib"/>
     </path>
@@ -158,7 +181,8 @@
         <pathelement path="${benchmark.ext.classpath}"/>
     </path>
 
-    <target name="javadocs" depends="javadocs-memory,javadocs-highlighter,javadocs-analyzers-common,javadocs-queryparser,javadocs-facet,compile-core">
+    <target name="javadocs" depends="javadocs-memory,javadocs-highlighter,javadocs-analyzers-common,
+      javadocs-queryparser,javadocs-facet,javadocs-spatial,compile-core">
     <invoke-module-javadoc>
       <links>
         <link href="../memory"/>
@@ -166,6 +190,7 @@
         <link href="../analyzers-common"/>
         <link href="../queryparser"/>
         <link href="../facet"/>
+        <link href="../spatial"/>
       </links>
     </invoke-module-javadoc>
     </target>
@@ -256,7 +281,7 @@
       </ant>
     </target>
 
-    <target name="init" depends="module-build.init,resolve-icu,jar-memory,jar-highlighter,jar-analyzers-common,jar-queryparser,jar-facet"/>
+    <target name="init" depends="module-build.init,resolve-icu,jar-memory,jar-highlighter,jar-analyzers-common,jar-queryparser,jar-facet,jar-spatial"/>
   
     <target name="compile-test" depends="copy-alg-files-for-testing,module-build.compile-test"/>
     <target name="copy-alg-files-for-testing" description="copy .alg files as resources
for testing">

Added: lucene/dev/trunk/lucene/benchmark/conf/spatial.alg
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/benchmark/conf/spatial.alg?rev=1536176&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/benchmark/conf/spatial.alg (added)
+++ lucene/dev/trunk/lucene/benchmark/conf/spatial.alg Sun Oct 27 18:17:45 2013
@@ -0,0 +1,111 @@
+#/**
+# * Licensed to the Apache Software Foundation (ASF) under one or more
+# * contributor license agreements.  See the NOTICE file distributed with
+# * this work for additional information regarding copyright ownership.
+# * The ASF licenses this file to You under the Apache License, Version 2.0
+# * (the "License"); you may not use this file except in compliance with
+# * the License.  You may obtain a copy of the License at
+# *
+# *     http://www.apache.org/licenses/LICENSE-2.0
+# *
+# * Unless required by applicable law or agreed to in writing, software
+# * distributed under the License is distributed on an "AS IS" BASIS,
+# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# * See the License for the specific language governing permissions and
+# * limitations under the License.
+# */
+# -------------------------------------------------------------------------------------
+# Spatial search benchmark
+#  In order to use this, you'll need to first run 'ant geonames-files'.
+#  You may need more memory when running this: -Dtask.mem=1000M
+#  For docs on what options are available, see the javadocs.
+
+### Spatial Context, Grid, Strategy config
+doc.maker=org.apache.lucene.benchmark.byTask.feeds.SpatialDocMaker
+#  SpatialContext:    see SpatialContextFactory.makeSpatialContext
+#spatial.spatialContextFactory=com.spatial4j.core.context.jts.JtsSpatialContextFactory
+#spatial.geo=true
+#spatial.distCalculator=haversine
+#spatial.worldBounds=...
+#  Spatial Grid: (PrefixTree)  see SpatialPrefixTreeFactory.makeSPT
+#spatial.prefixTree=geohash  (or quad)
+#spatial.maxLevels=11
+#spatial.maxDistErr (in degrees) to compute maxLevels -- defaults to 1 meter's worth
+#  RecursivePrefixTreeStrategy:
+spatial.docPointsOnly=true
+#spatial.distErrPct=.25
+#spatial.prefixGridScanLevel=-4
+
+### Source & Doc
+content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource
+line.parser=org.apache.lucene.benchmark.byTask.feeds.GeonamesLineParser
+docs.file=work/geonames/allCountries.txt
+doc.tokenized=false
+#  Next 3 props convert doc points to circles with a random radius and then optionally bbox'es
+#doc.spatial.radiusDegrees=0.0
+#doc.spatial.radiusDegreesRandPlusMinus=0.0
+#doc.spatial.bbox=false
+
+### Directory
+directory=FSDirectory
+#directory=RamDirectory
+compound=false
+merge.factor=10
+ram.flush.mb=64
+concurrent.merge.scheduler.max.thread.count=2
+
+### Query
+query.maker=org.apache.lucene.benchmark.byTask.feeds.SpatialFileQueryMaker
+query.file=work/geonames/allCountries.txt
+query.file.line.parser=org.apache.lucene.benchmark.byTask.feeds.GeonamesLineParser
+query.file.maxQueries=1000
+#  Next 3 props convert query points to circles with a random radius and then optionally
bbox'es
+query.spatial.radiusDegrees=0
+query.spatial.radiusDegreesRandPlusMinus=3
+query.spatial.bbox=false
+
+query.spatial.score=false
+#query.spatial.predicate=Intersects
+# (defaults to spatial.distErrPct)
+query.spatial.distErrPct=qDistErrPct:0.0:0.025:0.1:0.5
+
+### Misc
+
+log.step.AddDoc = 100000
+task.max.depth.log=1
+
+# -------------------------------------------------------------------------------------
+
+{ "Populate"
+  ResetSystemErase
+  CreateIndex
+  #1 million docs
+  [{ "MAddDocs" AddDoc} : 250000] : 4
+  ForceMerge(1)
+  CommitIndex
+  CloseIndex
+  
+  RepSumByPref MAddDocs
+} : 1
+#set above round to 0 on subsequent runs if not changing indexing but experimenting with
search
+
+OpenReader
+{"WarmJIT" Search > : 4000
+CloseReader
+
+{ "Rounds"
+  ResetSystemSoft
+  
+  OpenReader
+  Search
+  {"RealQueries" Search > : 2000
+  CloseReader
+  
+  NewRound
+} : 4
+
+
+#RepSumByName
+RepSumByPrefRound RealQueries
+
+

Added: lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/GeonamesLineParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/GeonamesLineParser.java?rev=1536176&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/GeonamesLineParser.java
(added)
+++ lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/GeonamesLineParser.java
Sun Oct 27 18:17:45 2013
@@ -0,0 +1,44 @@
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A line parser for Geonames.org data.
+ * See <a href="http://download.geonames.org/export/dump/readme.txt">'geoname' table</a>.
+ * Requires {@link SpatialDocMaker}.
+ */
+public class GeonamesLineParser extends LineDocSource.LineParser {
+
+  /** This header will be ignored; the geonames format is fixed and doesn't have a header
line. */
+  public GeonamesLineParser(String[] header) {
+    super(header);
+  }
+
+  @Override
+  public void parseLine(DocData docData, String line) {
+    String[] parts = line.split("\\t", 7);//no more than first 6 fields needed
+
+    //    Sample data line:
+    // 3578267, Morne du Vitet, Morne du Vitet, 17.88333, -62.8, ...
+    // ID, Name, Alternate name (unused), Lat, Lon, ...
+
+    docData.setID(Integer.parseInt(parts[0]));//note: overwrites ID assigned by LineDocSource
+    docData.setName(parts[1]);
+    docData.setBody(parts[4]+","+parts[5]); // latitude , longitude
+  }
+}

Added: lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialDocMaker.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialDocMaker.java?rev=1536176&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialDocMaker.java
(added)
+++ lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialDocMaker.java
Sun Oct 27 18:17:45 2013
@@ -0,0 +1,207 @@
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import com.spatial4j.core.context.SpatialContext;
+import com.spatial4j.core.context.SpatialContextFactory;
+import com.spatial4j.core.shape.Point;
+import com.spatial4j.core.shape.Shape;
+import org.apache.lucene.benchmark.byTask.utils.Config;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.spatial.SpatialStrategy;
+import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy;
+import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
+import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTreeFactory;
+
+import java.util.AbstractMap;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
+
+/**
+ * Indexes spatial data according to a configured {@link SpatialStrategy} with optional
+ * shape transformation via a configured {@link ShapeConverter}. The converter can turn points
into
+ * circles and bounding boxes, in order to vary the type of indexing performance tests.
+ * Unless it's subclass-ed to do otherwise, this class configures a {@link SpatialContext},
+ * {@link SpatialPrefixTree}, and {@link RecursivePrefixTreeStrategy}. The Strategy is made
+ * available to a query maker via the static method {@link #getSpatialStrategy(int)}.
+ * See spatial.alg for a listing of spatial parameters, in particular those starting with
"spatial."
+ * and "doc.spatial".
+ */
+public class SpatialDocMaker extends DocMaker {
+
+  public static final String SPATIAL_FIELD = "spatial";
+
+  //cache spatialStrategy by round number
+  private static Map<Integer,SpatialStrategy> spatialStrategyCache = new HashMap<Integer,SpatialStrategy>();
+
+  private SpatialStrategy strategy;
+  private ShapeConverter shapeConverter;
+
+  /**
+   * Looks up the SpatialStrategy from the given round --
+   * {@link org.apache.lucene.benchmark.byTask.utils.Config#getRoundNumber()}. It's an error
+   * if it wasn't created already for this round -- when SpatialDocMaker is initialized.
+   */
+  public static SpatialStrategy getSpatialStrategy(int roundNumber) {
+    SpatialStrategy result = spatialStrategyCache.get(roundNumber);
+    if (result == null) {
+      throw new IllegalStateException("Strategy should have been init'ed by SpatialDocMaker
by now");
+    }
+    return result;
+  }
+
+  /**
+   * Builds a SpatialStrategy from configuration options.
+   */
+  protected SpatialStrategy makeSpatialStrategy(final Config config) {
+    //A Map view of Config that prefixes keys with "spatial."
+    Map<String, String> configMap = new AbstractMap<String, String>() {
+      @Override
+      public Set<Entry<String, String>> entrySet() {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public String get(Object key) {
+        return config.get("spatial." + key, null);
+      }
+    };
+
+    SpatialContext ctx = SpatialContextFactory.makeSpatialContext(configMap, null);
+
+    //Some day the strategy might be initialized with a factory but such a factory
+    // is non-existent.
+    return makeSpatialStrategy(config, configMap, ctx);
+  }
+
+  protected SpatialStrategy makeSpatialStrategy(final Config config, Map<String, String>
configMap,
+                                                SpatialContext ctx) {
+    //A factory for the prefix tree grid
+    SpatialPrefixTree grid = SpatialPrefixTreeFactory.makeSPT(configMap, null, ctx);
+
+    RecursivePrefixTreeStrategy strategy = new RecursivePrefixTreeStrategy(grid, SPATIAL_FIELD)
{
+      {
+        //protected field
+        this.pointsOnly = config.get("spatial.docPointsOnly", false);
+      }
+    };
+
+    int prefixGridScanLevel = config.get("query.spatial.prefixGridScanLevel", -4);
+    if (prefixGridScanLevel < 0)
+      prefixGridScanLevel = grid.getMaxLevels() + prefixGridScanLevel;
+    strategy.setPrefixGridScanLevel(prefixGridScanLevel);
+
+    double distErrPct = config.get("spatial.distErrPct", .025);//doc & query; a default
+    strategy.setDistErrPct(distErrPct);
+    return strategy;
+  }
+
+  @Override
+  public void setConfig(Config config, ContentSource source) {
+    super.setConfig(config, source);
+    SpatialStrategy existing = spatialStrategyCache.get(config.getRoundNumber());
+    if (existing == null) {
+      //new round; we need to re-initialize
+      strategy = makeSpatialStrategy(config);
+      spatialStrategyCache.put(config.getRoundNumber(), strategy);
+      //TODO remove previous round config?
+      shapeConverter = makeShapeConverter(strategy, config, "doc.spatial.");
+      System.out.println("Spatial Strategy: " + strategy);
+    }
+  }
+
+  /**
+   * Optionally converts points to circles, and optionally bbox'es result.
+   */
+  public static ShapeConverter makeShapeConverter(final SpatialStrategy spatialStrategy,
+                                                  Config config, String configKeyPrefix)
{
+    //by default does no conversion
+    final double radiusDegrees = config.get(configKeyPrefix+"radiusDegrees", 0.0);
+    final double plusMinus = config.get(configKeyPrefix+"radiusDegreesRandPlusMinus", 0.0);
+    final boolean bbox = config.get(configKeyPrefix + "bbox", false);
+
+    return new ShapeConverter() {
+      @Override
+      public Shape convert(Shape shape) {
+        if (shape instanceof Point && (radiusDegrees != 0.0 || plusMinus != 0.0))
{
+          Point point = (Point)shape;
+          double radius = radiusDegrees;
+          if (plusMinus > 0.0) {
+            Random random = new Random(point.hashCode());//use hashCode so it's reproducibly
random
+            radius += random.nextDouble() * 2 * plusMinus - plusMinus;
+            radius = Math.abs(radius);//can happen if configured plusMinus > radiusDegrees
+          }
+          shape = spatialStrategy.getSpatialContext().makeCircle(point, radius);
+        }
+        if (bbox)
+          shape = shape.getBoundingBox();
+        return shape;
+      }
+    };
+  }
+
+  /** Converts one shape to another. Created by
+   * {@link #makeShapeConverter(org.apache.lucene.spatial.SpatialStrategy, org.apache.lucene.benchmark.byTask.utils.Config,
String)} */
+  public interface ShapeConverter {
+    Shape convert(Shape shape);
+  }
+
+  @Override
+  public Document makeDocument() throws Exception {
+
+    DocState docState = getDocState();
+
+    Document doc = super.makeDocument();
+
+    // Set SPATIAL_FIELD from body
+    DocData docData = docState.docData;
+    //   makeDocument() resets docState.getBody() so we can't look there; look in Document
+    String shapeStr = doc.getField(DocMaker.BODY_FIELD).stringValue();
+    Shape shape = makeShapeFromString(strategy, docData.getName(), shapeStr);
+    if (shape != null) {
+      shape = shapeConverter.convert(shape);
+      //index
+      for (Field f : strategy.createIndexableFields(shape)) {
+        doc.add(f);
+      }
+    }
+
+    return doc;
+  }
+
+  public static Shape makeShapeFromString(SpatialStrategy strategy, String name, String shapeStr)
{
+    if (shapeStr != null && shapeStr.length() > 0) {
+      try {
+        return strategy.getSpatialContext().readShape(shapeStr);
+      } catch (Exception e) {//InvalidShapeException TODO
+        System.err.println("Shape "+name+" wasn't parseable: "+e+"  (skipping it)");
+        return null;
+      }
+    }
+    return null;
+  }
+
+  @Override
+  public Document makeDocument(int size) throws Exception {
+    //TODO consider abusing the 'size' notion to number of shapes per document
+    throw new UnsupportedOperationException();
+  }
+}

Added: lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialFileQueryMaker.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialFileQueryMaker.java?rev=1536176&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialFileQueryMaker.java
(added)
+++ lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialFileQueryMaker.java
Sun Oct 27 18:17:45 2013
@@ -0,0 +1,120 @@
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import com.spatial4j.core.shape.Shape;
+import org.apache.lucene.benchmark.byTask.utils.Config;
+import org.apache.lucene.queries.CustomScoreQuery;
+import org.apache.lucene.queries.function.FunctionQuery;
+import org.apache.lucene.queries.function.ValueSource;
+import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.Filter;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.QueryWrapperFilter;
+import org.apache.lucene.spatial.SpatialStrategy;
+import org.apache.lucene.spatial.query.SpatialArgs;
+import org.apache.lucene.spatial.query.SpatialOperation;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Properties;
+
+/**
+ * Reads spatial data from the body field docs from an internally created {@link LineDocSource}.
+ * It's parsed by {@link com.spatial4j.core.context.SpatialContext#readShape(String)} and
then
+ * further manipulated via a configurable {@link SpatialDocMaker.ShapeConverter}. When using
point
+ * data, it's likely you'll want to configure the shape converter so that the query shapes
actually
+ * cover a region. The queries are all created & cached in advance. This query maker
works in
+ * conjunction with {@link SpatialDocMaker}.  See spatial.alg for a listing of options, in
+ * particular the options starting with "query.".
+ */
+public class SpatialFileQueryMaker extends AbstractQueryMaker {
+  protected SpatialStrategy strategy;
+  protected double distErrPct;//NaN if not set
+  protected SpatialOperation operation;
+  protected boolean score;
+
+  protected SpatialDocMaker.ShapeConverter shapeConverter;
+
+  @Override
+  public void setConfig(Config config) throws Exception {
+    strategy = SpatialDocMaker.getSpatialStrategy(config.getRoundNumber());
+    shapeConverter = SpatialDocMaker.makeShapeConverter(strategy, config, "query.spatial.");
+
+    distErrPct = config.get("query.spatial.distErrPct", Double.NaN);
+    operation = SpatialOperation.get(config.get("query.spatial.predicate", "Intersects"));
+    score = config.get("query.spatial.score", false);
+
+    super.setConfig(config);//call last, will call prepareQueries()
+  }
+
+  @Override
+  protected Query[] prepareQueries() throws Exception {
+    final int maxQueries = config.get("query.file.maxQueries", 1000);
+    Config srcConfig = new Config(new Properties());
+    srcConfig.set("docs.file", config.get("query.file", null));
+    srcConfig.set("line.parser", config.get("query.file.line.parser", null));
+    srcConfig.set("content.source.forever", "false");
+
+    List<Query> queries = new ArrayList<>();
+    LineDocSource src = new LineDocSource();
+    try {
+      src.setConfig(srcConfig);
+      src.resetInputs();
+      DocData docData = new DocData();
+      for (int i = 0; i < maxQueries; i++) {
+        docData = src.getNextDocData(docData);
+        Shape shape = SpatialDocMaker.makeShapeFromString(strategy, docData.getName(), docData.getBody());
+        if (shape != null) {
+          shape = shapeConverter.convert(shape);
+          queries.add(makeQueryFromShape(shape));
+        } else {
+          i--;//skip
+        }
+      }
+    } catch (NoMoreDataException e) {
+      //all-done
+    } finally {
+      src.close();
+    }
+    return queries.toArray(new Query[queries.size()]);
+  }
+
+
+  protected Query makeQueryFromShape(Shape shape) {
+    SpatialArgs args = new SpatialArgs(operation, shape);
+    if (!Double.isNaN(distErrPct))
+      args.setDistErrPct(distErrPct);
+
+    if (score) {
+      ValueSource valueSource = strategy.makeDistanceValueSource(shape.getCenter());
+      return new CustomScoreQuery(strategy.makeQuery(args), new FunctionQuery(valueSource));
+    } else {
+      //strategy.makeQuery() could potentially score (isn't well defined) so instead we call
+      // makeFilter() and wrap
+
+      Filter filter = strategy.makeFilter(args);
+      if (filter instanceof QueryWrapperFilter) {
+        return ((QueryWrapperFilter)filter).getQuery();
+      } else {
+        return new ConstantScoreQuery(filter);
+      }
+    }
+  }
+
+}

Modified: lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html?rev=1536176&r1=1536175&r2=1536176&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html
(original)
+++ lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html
Sun Oct 27 18:17:45 2013
@@ -469,9 +469,10 @@ regular index/search work tasks, report 
      its own queryMaker instance.
 	 <li>
 	 <font color="#FF0066">CommitIndex</font> and 
-	 <font color="#FF0066">Optimize</font> can be used to commit
-	 changes to the index and/or optimize the index created thus
-	 far.
+	 <font color="#FF0066">ForceMerge</font> can be used to commit
+	 changes to the index then merge the index segments. The integer
+   parameter specifies how many segments to merge down to (default
+   1).
 	 <li>
 	 <font color="#FF0066">WriteLineDoc</font> prepares a 'line'
 	 file where each line holds a document with <i>title</i>, 
@@ -593,6 +594,9 @@ Here is a list of currently defined prop
     <ul><li>doc.delete.step
     </li></ul>
   </li>
+
+  <li><b>Spatial</b>: Numerous; see spatial.alg
+  </li>
   
   <li><b>Task alternative packages</b>:
     <ul><li>alt.tasks.packages



Mime
View raw message