lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From markrmil...@apache.org
Subject svn commit: r800193 - in /lucene/java/trunk: ./ contrib/ contrib/miscellaneous/src/java/org/apache/lucene/queryParser/complexPhrase/ contrib/miscellaneous/src/test/org/apache/lucene/queryParser/complexPhrase/ src/java/org/apache/lucene/queryParser/ src...
Date Mon, 03 Aug 2009 04:06:23 GMT
Author: markrmiller
Date: Mon Aug  3 04:06:22 2009
New Revision: 800193

URL: http://svn.apache.org/viewvc?rev=800193&view=rev
Log:
LUCENE-1486: Move ComplexPhraseQueryParser to contrib
    

Added:
    lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/queryParser/complexPhrase/
    lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/queryParser/complexPhrase/ComplexPhraseQueryParser.java
    lucene/java/trunk/contrib/miscellaneous/src/test/org/apache/lucene/queryParser/complexPhrase/
    lucene/java/trunk/contrib/miscellaneous/src/test/org/apache/lucene/queryParser/complexPhrase/TestComplexPhraseQuery.java
Removed:
    lucene/java/trunk/src/java/org/apache/lucene/queryParser/ComplexPhraseQueryParser.java
    lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestComplexPhraseQuery.java
Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/contrib/CHANGES.txt

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=800193&r1=800192&r2=800193&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Mon Aug  3 04:06:22 2009
@@ -621,18 +621,12 @@
     All standard parsers now also implement Serializable and enforce
     their singleton status.  (Uwe Schindler, Mike McCandless)
     
-31. LUCENE-1486: Added ComplexPhraseQueryParser, an extension of QueryParser 
-    that allows a subset of the Lucene query language to be embedded in
-    PhraseQuerys. Wildcard, Range, and Fuzzy queries, as well as limited 
-    boolean logic, can be used within quote operators with this parser, ie: 
-    "(jo* -john) smyth~". (Mark Harwood via Mark Miller)
-    
-32. LUCENE-1741: User configureable maximum chunk size in MMapDirectory.
+31. LUCENE-1741: User configureable maximum chunk size in MMapDirectory.
     On 32 bit platforms, the address space can be very fragmented, so
     one big ByteBuffer for the whole file may not fit into address space.
     (Eks Dev via Uwe Schindler)
 
-33. LUCENE-1644: Enable 4 rewrite modes for queries deriving from
+32. LUCENE-1644: Enable 4 rewrite modes for queries deriving from
     MultiTermQuery (WildcardQuery, PrefixQuery, TermRangeQuery,
     NumericRangeQuery): CONSTANT_SCORE_FILTER_REWRITE first creates a
     filter and then assigns constant score (boost) to docs;
@@ -642,7 +636,7 @@
     CONSTANT_SCORE_AUTO_REWRITE tries to pick the most performant
     constant-score rewrite method.  (Mike McCandless)
     
-34. LUCENE-1448: Added TokenStream.end(), to perform end-of-stream
+33. LUCENE-1448: Added TokenStream.end(), to perform end-of-stream
     operations.  This is currently used to fix offset problems when 
     multiple fields with the same name are added to a document.
     (Mike McCandless, Mark Miller, Michael Busch)

Modified: lucene/java/trunk/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/CHANGES.txt?rev=800193&r1=800192&r2=800193&view=diff
==============================================================================
--- lucene/java/trunk/contrib/CHANGES.txt (original)
+++ lucene/java/trunk/contrib/CHANGES.txt Mon Aug  3 04:06:22 2009
@@ -106,6 +106,13 @@
     3.0 and will then replace the current core QueryParser, which
     has been deprecated with this patch.
     (Luis Alves and Adriano Campos via Michael Busch)
+    
+13. LUCENE-1486: Added ComplexPhraseQueryParser, an extension of QueryParser 
+    that allows a subset of the Lucene query language to be embedded in
+    PhraseQuerys. Wildcard, Range, and Fuzzy queries, as well as limited 
+    boolean logic, can be used within quote operators with this parser, ie: 
+    "(jo* -john) smyth~". (Mark Harwood via Mark Miller)
+
 
 Optimizations
 

Added: lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/queryParser/complexPhrase/ComplexPhraseQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/queryParser/complexPhrase/ComplexPhraseQueryParser.java?rev=800193&view=auto
==============================================================================
--- lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/queryParser/complexPhrase/ComplexPhraseQueryParser.java
(added)
+++ lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/queryParser/complexPhrase/ComplexPhraseQueryParser.java
Mon Aug  3 04:06:22 2009
@@ -0,0 +1,389 @@
+package org.apache.lucene.queryParser.complexPhrase;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.MultiTermQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TermRangeQuery;
+import org.apache.lucene.search.spans.SpanNearQuery;
+import org.apache.lucene.search.spans.SpanNotQuery;
+import org.apache.lucene.search.spans.SpanOrQuery;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
+
+/**
+ * QueryParser which permits complex phrase query syntax e.g. "(john jon
+ * jonathan~) peters*"
+ * 
+ * Performs potentially multiple passes over Query text to parse any nested
+ * logic in PhraseQueries. - First pass takes any PhraseQuery content between
+ * quotes and stores for subsequent pass. All other query content is parsed as
+ * normal - Second pass parses any stored PhraseQuery content, checking all
+ * embedded clauses are referring to the same field and therefore can be
+ * rewritten as Span queries. All PhraseQuery clauses are expressed as
+ * ComplexPhraseQuery objects
+ * 
+ * This could arguably be done in one pass using a new QueryParser but here I am
+ * working within the constraints of the existing parser as a base class. This
+ * currently simply feeds all phrase content through an analyzer to select
+ * phrase terms - any "special" syntax such as * ~ * etc are not given special
+ * status
+ * 
+ * 
+ */
+public class ComplexPhraseQueryParser extends QueryParser {
+  private ArrayList/*<ComplexPhraseQuery>*/complexPhrases = null;
+
+  private boolean isPass2ResolvingPhrases;
+
+  private ComplexPhraseQuery currentPhraseQuery = null;
+
+  public ComplexPhraseQueryParser(String f, Analyzer a) {
+    super(f, a);
+  }
+
+  protected Query getFieldQuery(String field, String queryText, int slop) {
+    ComplexPhraseQuery cpq = new ComplexPhraseQuery(field, queryText, slop);
+    complexPhrases.add(cpq); // add to list of phrases to be parsed once
+    // we
+    // are through with this pass
+    return cpq;
+  }
+
+  public Query parse(String query) throws ParseException {
+    if (isPass2ResolvingPhrases) {
+      MultiTermQuery.RewriteMethod oldMethod = getMultiTermRewriteMethod();
+      try {
+        // Temporarily force BooleanQuery rewrite so that Parser will
+        // generate visible
+        // collection of terms which we can convert into SpanQueries.
+        // ConstantScoreRewrite mode produces an
+        // opaque ConstantScoreQuery object which cannot be interrogated for
+        // terms in the same way a BooleanQuery can.
+        // QueryParser is not guaranteed threadsafe anyway so this temporary
+        // state change should not
+        // present an issue
+        setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+        return super.parse(query);
+      } finally {
+        setMultiTermRewriteMethod(oldMethod);
+      }
+    }
+
+    // First pass - parse the top-level query recording any PhraseQuerys
+    // which will need to be resolved
+    complexPhrases = new ArrayList/*<ComplexPhraseQuery>*/();
+    Query q = super.parse(query);
+
+    // Perform second pass, using this QueryParser to parse any nested
+    // PhraseQueries with different
+    // set of syntax restrictions (i.e. all fields must be same)
+    isPass2ResolvingPhrases = true;
+    try {
+      for (Iterator iterator = complexPhrases.iterator(); iterator.hasNext();) {
+        currentPhraseQuery = (ComplexPhraseQuery) iterator.next();
+        // in each phrase, now parse the contents between quotes as a
+        // separate parse operation
+        currentPhraseQuery.parsePhraseElements(this);
+      }
+    } finally {
+      isPass2ResolvingPhrases = false;
+    }
+    return q;
+  }
+
+  // There is No "getTermQuery throws ParseException" method to override so
+  // unfortunately need
+  // to throw a runtime exception here if a term for another field is embedded
+  // in phrase query
+  protected Query newTermQuery(Term term) {
+    if (isPass2ResolvingPhrases) {
+      try {
+        checkPhraseClauseIsForSameField(term.field());
+      } catch (ParseException pe) {
+        throw new RuntimeException("Error parsing complex phrase", pe);
+      }
+    }
+    return super.newTermQuery(term);
+  }
+
+  // Helper method used to report on any clauses that appear in query syntax
+  private void checkPhraseClauseIsForSameField(String field)
+      throws ParseException {
+    if (!field.equals(currentPhraseQuery.field)) {
+      throw new ParseException("Cannot have clause for field \"" + field
+          + "\" nested in phrase " + " for field \"" + currentPhraseQuery.field
+          + "\"");
+    }
+  }
+
+  protected Query getWildcardQuery(String field, String termStr)
+      throws ParseException {
+    if (isPass2ResolvingPhrases) {
+      checkPhraseClauseIsForSameField(field);
+    }
+    return super.getWildcardQuery(field, termStr);
+  }
+
+  protected Query getRangeQuery(String field, String part1, String part2,
+      boolean inclusive) throws ParseException {
+    if (isPass2ResolvingPhrases) {
+      checkPhraseClauseIsForSameField(field);
+    }
+    return super.getRangeQuery(field, part1, part2, inclusive);
+  }
+
+  protected Query newRangeQuery(String field, String part1, String part2,
+      boolean inclusive) {
+    if (isPass2ResolvingPhrases) {
+      // Must use old-style RangeQuery in order to produce a BooleanQuery
+      // that can be turned into SpanOr clause
+      TermRangeQuery rangeQuery = new TermRangeQuery(field, part1, part2, inclusive, inclusive,
+          getRangeCollator());
+      rangeQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+      return rangeQuery;
+    }
+    return super.newRangeQuery(field, part1, part2, inclusive);
+  }
+
+  protected Query getFuzzyQuery(String field, String termStr,
+      float minSimilarity) throws ParseException {
+    if (isPass2ResolvingPhrases) {
+      checkPhraseClauseIsForSameField(field);
+    }
+    return super.getFuzzyQuery(field, termStr, minSimilarity);
+  }
+
+  /*
+   * Used to handle the query content in between quotes and produced Span-based
+   * interpretations of the clauses.
+   */
+  static class ComplexPhraseQuery extends Query {
+
+    String field;
+
+    String phrasedQueryStringContents;
+
+    int slopFactor;
+
+    private Query contents;
+
+    public ComplexPhraseQuery(String field, String phrasedQueryStringContents,
+        int slopFactor) {
+      super();
+      this.field = field;
+      this.phrasedQueryStringContents = phrasedQueryStringContents;
+      this.slopFactor = slopFactor;
+    }
+
+    // Called by ComplexPhraseQueryParser for each phrase after the main
+    // parse
+    // thread is through
+    protected void parsePhraseElements(QueryParser qp) throws ParseException {
+      // TODO ensure that field-sensitivity is preserved ie the query
+      // string below is parsed as
+      // field+":("+phrasedQueryStringContents+")"
+      // but this will need code in rewrite to unwrap the first layer of
+      // boolean query
+      contents = qp.parse(phrasedQueryStringContents);
+    }
+
+    public Query rewrite(IndexReader reader) throws IOException {
+      // ArrayList spanClauses = new ArrayList();
+      if (contents instanceof TermQuery) {
+        return contents;
+      }
+      // Build a sequence of Span clauses arranged in a SpanNear - child
+      // clauses can be complex
+      // Booleans e.g. nots and ors etc
+      int numNegatives = 0;
+      if (!(contents instanceof BooleanQuery)) {
+        throw new IllegalArgumentException("Unknown query type \""
+            + contents.getClass().getName()
+            + "\" found in phrase query string \"" + phrasedQueryStringContents
+            + "\"");
+      }
+      BooleanQuery bq = (BooleanQuery) contents;
+      BooleanClause[] bclauses = bq.getClauses();
+      SpanQuery[] allSpanClauses = new SpanQuery[bclauses.length];
+      // For all clauses e.g. one* two~
+      for (int i = 0; i < bclauses.length; i++) {
+        // HashSet bclauseterms=new HashSet();
+        Query qc = bclauses[i].getQuery();
+        // Rewrite this clause e.g one* becomes (one OR onerous)
+        qc = qc.rewrite(reader);
+        if (bclauses[i].getOccur().equals(BooleanClause.Occur.MUST_NOT)) {
+          numNegatives++;
+        }
+
+        if (qc instanceof BooleanQuery) {
+          ArrayList sc = new ArrayList();
+          addComplexPhraseClause(sc, (BooleanQuery) qc);
+          if (sc.size() > 0) {
+            allSpanClauses[i] = (SpanQuery) sc.get(0);
+          } else {
+            // Insert fake term e.g. phrase query was for "Fred Smithe*" and
+            // there were no "Smithe*" terms - need to
+            // prevent match on just "Fred".
+            allSpanClauses[i] = new SpanTermQuery(new Term(field,
+                "Dummy clause because no terms found - must match nothing"));
+          }
+        } else {
+          if (qc instanceof TermQuery) {
+            TermQuery tq = (TermQuery) qc;
+            allSpanClauses[i] = new SpanTermQuery(tq.getTerm());
+          } else {
+            throw new IllegalArgumentException("Unknown query type \""
+                + qc.getClass().getName()
+                + "\" found in phrase query string \""
+                + phrasedQueryStringContents + "\"");
+          }
+
+        }
+      }
+      if (numNegatives == 0) {
+        // The simple case - no negative elements in phrase
+        return new SpanNearQuery(allSpanClauses, slopFactor, true);
+      }
+      // Complex case - we have mixed positives and negatives in the
+      // sequence.
+      // Need to return a SpanNotQuery
+      ArrayList positiveClauses = new ArrayList();
+      for (int j = 0; j < allSpanClauses.length; j++) {
+        if (!bclauses[j].getOccur().equals(BooleanClause.Occur.MUST_NOT)) {
+          positiveClauses.add(allSpanClauses[j]);
+        }
+      }
+
+      SpanQuery[] includeClauses = (SpanQuery[]) positiveClauses
+          .toArray(new SpanQuery[positiveClauses.size()]);
+
+      SpanQuery include = null;
+      if (includeClauses.length == 1) {
+        include = includeClauses[0]; // only one positive clause
+      } else {
+        // need to increase slop factor based on gaps introduced by
+        // negatives
+        include = new SpanNearQuery(includeClauses, slopFactor + numNegatives,
+            true);
+      }
+      // Use sequence of positive and negative values as the exclude.
+      SpanNearQuery exclude = new SpanNearQuery(allSpanClauses, slopFactor,
+          true);
+      SpanNotQuery snot = new SpanNotQuery(include, exclude);
+      return snot;
+    }
+
+    private void addComplexPhraseClause(List spanClauses, BooleanQuery qc) {
+      ArrayList ors = new ArrayList();
+      ArrayList nots = new ArrayList();
+      BooleanClause[] bclauses = qc.getClauses();
+
+      // For all clauses e.g. one* two~
+      for (int i = 0; i < bclauses.length; i++) {
+        Query childQuery = bclauses[i].getQuery();
+
+        // select the list to which we will add these options
+        ArrayList chosenList = ors;
+        if (bclauses[i].getOccur() == BooleanClause.Occur.MUST_NOT) {
+          chosenList = nots;
+        }
+
+        if (childQuery instanceof TermQuery) {
+          TermQuery tq = (TermQuery) childQuery;
+          SpanTermQuery stq = new SpanTermQuery(tq.getTerm());
+          stq.setBoost(tq.getBoost());
+          chosenList.add(stq);
+        } else if (childQuery instanceof BooleanQuery) {
+          BooleanQuery cbq = (BooleanQuery) childQuery;
+          addComplexPhraseClause(chosenList, cbq);
+        } else {
+          // TODO alternatively could call extract terms here?
+          throw new IllegalArgumentException("Unknown query type:"
+              + childQuery.getClass().getName());
+        }
+      }
+      if (ors.size() == 0) {
+        return;
+      }
+      SpanOrQuery soq = new SpanOrQuery((SpanQuery[]) ors
+          .toArray(new SpanQuery[ors.size()]));
+      if (nots.size() == 0) {
+        spanClauses.add(soq);
+      } else {
+        SpanOrQuery snqs = new SpanOrQuery((SpanQuery[]) nots
+            .toArray(new SpanQuery[nots.size()]));
+        SpanNotQuery snq = new SpanNotQuery(soq, snqs);
+        spanClauses.add(snq);
+      }
+    }
+
+    public String toString(String field) {
+      return "\"" + phrasedQueryStringContents + "\"";
+    }
+
+    public int hashCode() {
+      final int prime = 31;
+      int result = 1;
+      result = prime * result + ((field == null) ? 0 : field.hashCode());
+      result = prime
+          * result
+          + ((phrasedQueryStringContents == null) ? 0
+              : phrasedQueryStringContents.hashCode());
+      result = prime * result + slopFactor;
+      return result;
+    }
+
+    public boolean equals(Object obj) {
+      if (this == obj)
+        return true;
+      if (obj == null)
+        return false;
+      if (getClass() != obj.getClass())
+        return false;
+      ComplexPhraseQuery other = (ComplexPhraseQuery) obj;
+      if (field == null) {
+        if (other.field != null)
+          return false;
+      } else if (!field.equals(other.field))
+        return false;
+      if (phrasedQueryStringContents == null) {
+        if (other.phrasedQueryStringContents != null)
+          return false;
+      } else if (!phrasedQueryStringContents
+          .equals(other.phrasedQueryStringContents))
+        return false;
+      if (slopFactor != other.slopFactor)
+        return false;
+      return true;
+    }
+  }
+}

Added: lucene/java/trunk/contrib/miscellaneous/src/test/org/apache/lucene/queryParser/complexPhrase/TestComplexPhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/miscellaneous/src/test/org/apache/lucene/queryParser/complexPhrase/TestComplexPhraseQuery.java?rev=800193&view=auto
==============================================================================
--- lucene/java/trunk/contrib/miscellaneous/src/test/org/apache/lucene/queryParser/complexPhrase/TestComplexPhraseQuery.java
(added)
+++ lucene/java/trunk/contrib/miscellaneous/src/test/org/apache/lucene/queryParser/complexPhrase/TestComplexPhraseQuery.java
Mon Aug  3 04:06:22 2009
@@ -0,0 +1,144 @@
+package org.apache.lucene.queryParser.complexPhrase;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.HashSet;
+
+import junit.framework.TestCase;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriter.MaxFieldLength;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.store.RAMDirectory;
+
+public class TestComplexPhraseQuery extends TestCase {
+
+  Analyzer analyzer = new StandardAnalyzer();
+
+  DocData docsContent[] = { new DocData("john smith", "1"),
+      new DocData("johathon smith", "2"),
+      new DocData("john percival smith", "3"),
+      new DocData("jackson waits tom", "4") };
+
+  private IndexSearcher searcher;
+
+  String defaultFieldName = "name";
+
+  public void testComplexPhrases() throws Exception {
+    checkMatches("\"john smith\"", "1"); // Simple multi-term still works
+    checkMatches("\"j*   smyth~\"", "1,2"); // wildcards and fuzzies are OK in
+    // phrases
+    checkMatches("\"(jo* -john)  smith\"", "2"); // boolean logic works
+    checkMatches("\"jo*  smith\"~2", "1,2,3"); // position logic works.
+    checkMatches("\"jo* [sma TO smZ]\" ", "1,2"); // range queries supported
+    checkMatches("\"john\"", "1,3"); // Simple single-term still works
+    checkMatches("\"(john OR johathon)  smith\"", "1,2"); // boolean logic with
+    // brackets works.
+    checkMatches("\"(jo* -john) smyth~\"", "2"); // boolean logic with
+    // brackets works.
+
+    // checkMatches("\"john -percival\"", "1"); // not logic doesn't work
+    // currently :(.
+
+    checkMatches("\"john  nosuchword*\"", ""); // phrases with clauses producing
+    // empty sets
+
+    checkBadQuery("\"jo*  id:1 smith\""); // mixing fields in a phrase is bad
+    checkBadQuery("\"jo* \"smith\" \""); // phrases inside phrases is bad
+  }
+
+  private void checkBadQuery(String qString) {
+    QueryParser qp = new ComplexPhraseQueryParser(defaultFieldName, analyzer);
+    Throwable expected = null;
+    try {
+      qp.parse(qString);
+    } catch (Throwable e) {
+      expected = e;
+    }
+    assertNotNull("Expected parse error in " + qString, expected);
+
+  }
+
+  private void checkMatches(String qString, String expectedVals)
+      throws Exception {
+    QueryParser qp = new ComplexPhraseQueryParser(defaultFieldName, analyzer);
+    qp.setFuzzyPrefixLength(1); // usually a good idea
+
+    Query q = qp.parse(qString);
+
+    HashSet expecteds = new HashSet();
+    String[] vals = expectedVals.split(",");
+    for (int i = 0; i < vals.length; i++) {
+      if (vals[i].length() > 0)
+        expecteds.add(vals[i]);
+    }
+
+    TopDocs td = searcher.search(q, 10);
+    ScoreDoc[] sd = td.scoreDocs;
+    for (int i = 0; i < sd.length; i++) {
+      Document doc = searcher.doc(sd[i].doc);
+      String id = doc.get("id");
+      assertTrue(qString + "matched doc#" + id + " not expected", expecteds
+          .contains(id));
+      expecteds.remove(id);
+    }
+
+    assertEquals(qString + " missing some matches ", 0, expecteds.size());
+
+  }
+
+  protected void setUp() throws Exception {
+    RAMDirectory rd = new RAMDirectory();
+    IndexWriter w = new IndexWriter(rd, analyzer, MaxFieldLength.UNLIMITED);
+    for (int i = 0; i < docsContent.length; i++) {
+      Document doc = new Document();
+      doc.add(new Field("name", docsContent[i].name, Field.Store.YES,
+          Field.Index.ANALYZED));
+      doc.add(new Field("id", docsContent[i].id, Field.Store.YES,
+          Field.Index.ANALYZED));
+      w.addDocument(doc);
+    }
+    w.close();
+    searcher = new IndexSearcher(rd);
+  }
+
+  protected void tearDown() throws Exception {
+    searcher.close();
+  }
+
+  static class DocData {
+    String name;
+
+    String id;
+
+    public DocData(String name, String id) {
+      super();
+      this.name = name;
+      this.id = id;
+    }
+  }
+
+}



Mime
View raw message