lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From markrmil...@apache.org
Subject svn commit: r831024 - in /lucene/java/trunk: ./ contrib/highlighter/ contrib/highlighter/src/java/org/apache/lucene/search/highlight/ contrib/highlighter/src/test/org/apache/lucene/search/highlight/
Date Thu, 29 Oct 2009 16:41:04 GMT
Author: markrmiller
Date: Thu Oct 29 16:41:04 2009
New Revision: 831024

URL: http://svn.apache.org/viewvc?rev=831024&view=rev
Log:
LUCENE-2013: SpanRegexQuery does not work with QueryScorer.

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/contrib/highlighter/build.xml
    lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
    lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=831024&r1=831023&r2=831024&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Thu Oct 29 16:41:04 2009
@@ -160,6 +160,9 @@
 
 * LUCENE-2004: Fix Constants.LUCENE_MAIN_VERSION to not be inlined
   by client code.  (Uwe Schindler)
+  
+* LUCENE-2013: SpanRegexQuery does not work with QueryScorer.
+  (Benjamin Keil via Mark Miller)
 
 New features
 

Modified: lucene/java/trunk/contrib/highlighter/build.xml
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/build.xml?rev=831024&r1=831023&r2=831024&view=diff
==============================================================================
--- lucene/java/trunk/contrib/highlighter/build.xml (original)
+++ lucene/java/trunk/contrib/highlighter/build.xml Thu Oct 29 16:41:04 2009
@@ -27,18 +27,26 @@
 
   <property name="memory.jar" location="${common.dir}/build/contrib/memory/lucene-memory-${version}.jar"/>
   <available property="memory.jar.present" type="file" file="${memory.jar}"/>
+  
+  <property name="regex.jar" location="${common.dir}/build/contrib/regex/lucene-regex-${version}.jar"/>
+  <available property="regex.jar.present" type="file" file="${regex.jar}"/>
 
   <path id="classpath">
     <pathelement path="${lucene.jar}"/>
     <pathelement path="${memory.jar}"/>
+    <pathelement path="${regex.jar}"/>
     <pathelement path="${project.classpath}"/>
   </path>
 
-  <target name="compile-core" depends="build-memory, common.compile-core" />
+  <target name="compile-core" depends="build-memory, build-regex, common.compile-core"
/>
 
   <target name="build-memory" unless="memory.jar.present">
     <echo>Highlighter building dependency ${memory.jar}</echo>
     <ant antfile="../memory/build.xml" target="default" inheritall="false" dir="../memory"
/>
   </target>
-
+  
+  <target name="build-regex" unless="regex.jar.present">
+    <echo>Highlighter building dependency ${regex.jar}</echo>
+    <ant antfile="../regex/build.xml" target="default" inheritall="false" dir="../regex"
/>
+  </target>
 </project>

Modified: lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java?rev=831024&r1=831023&r2=831024&view=diff
==============================================================================
--- lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
(original)
+++ lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
Thu Oct 29 16:41:04 2009
@@ -32,7 +32,10 @@
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.memory.MemoryIndex;
 import org.apache.lucene.search.*;
+import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
+import org.apache.lucene.search.spans.SpanFirstQuery;
 import org.apache.lucene.search.spans.SpanNearQuery;
+import org.apache.lucene.search.spans.SpanNotQuery;
 import org.apache.lucene.search.spans.SpanOrQuery;
 import org.apache.lucene.search.spans.SpanQuery;
 import org.apache.lucene.search.spans.SpanTermQuery;
@@ -220,16 +223,11 @@
    * @throws IOException
    */
   private void extractWeightedSpanTerms(Map<String,WeightedSpanTerm> terms, SpanQuery
spanQuery) throws IOException {
-    Set<Term> nonWeightedTerms = new HashSet<Term>();
-    spanQuery.extractTerms(nonWeightedTerms);
-
     Set<String> fieldNames;
 
     if (fieldName == null) {
       fieldNames = new HashSet<String>();
-      for (final Term queryTerm : nonWeightedTerms) {
-        fieldNames.add(queryTerm.field());
-      }
+      collectSpanQueryFields(spanQuery, fieldNames);
     } else {
       fieldNames = new HashSet<String>(1);
       fieldNames.add(fieldName);
@@ -238,13 +236,33 @@
     if (defaultField != null) {
       fieldNames.add(defaultField);
     }
+    
+    Map<String, SpanQuery> queries = new HashMap<String, SpanQuery>();
+ 
+    Set<Term> nonWeightedTerms = new HashSet<Term>();
+    final boolean mustRewriteQuery = mustRewriteQuery(spanQuery);
+    if (mustRewriteQuery) {
+      for (final String field : fieldNames) {
+        final SpanQuery rewrittenQuery = (SpanQuery) spanQuery.rewrite(getReaderForField(field));
+        queries.put(field, rewrittenQuery);
+        rewrittenQuery.extractTerms(nonWeightedTerms);
+      }
+    } else {
+      spanQuery.extractTerms(nonWeightedTerms);
+    }
 
     List<PositionSpan> spanPositions = new ArrayList<PositionSpan>();
 
     for (final String field : fieldNames) {
 
       IndexReader reader = getReaderForField(field);
-      Spans spans = spanQuery.getSpans(reader);
+      final Spans spans;
+      if (mustRewriteQuery) {
+        spans = queries.get(field).getSpans(reader);
+      } else {
+        spans = spanQuery.getSpans(reader);
+      }
+
 
       // collect span positions
       while (spans.next()) {
@@ -429,6 +447,57 @@
     return terms;
   }
   
+  private void collectSpanQueryFields(SpanQuery spanQuery, Set<String> fieldNames)
{
+    if (spanQuery instanceof FieldMaskingSpanQuery) {
+      collectSpanQueryFields(((FieldMaskingSpanQuery)spanQuery).getMaskedQuery(), fieldNames);
+    } else if (spanQuery instanceof SpanFirstQuery) {
+      collectSpanQueryFields(((SpanFirstQuery)spanQuery).getMatch(), fieldNames);
+    } else if (spanQuery instanceof SpanNearQuery) {
+      for (final SpanQuery clause : ((SpanNearQuery)spanQuery).getClauses()) {
+        collectSpanQueryFields(clause, fieldNames);
+      }
+    } else if (spanQuery instanceof SpanNotQuery) {
+      collectSpanQueryFields(((SpanNotQuery)spanQuery).getInclude(), fieldNames);
+    } else if (spanQuery instanceof SpanOrQuery) {
+      for (final SpanQuery clause : ((SpanOrQuery)spanQuery).getClauses()) {
+        collectSpanQueryFields(clause, fieldNames);
+      }
+    } else {
+      fieldNames.add(spanQuery.getField());
+    }
+  }
+  
+  private boolean mustRewriteQuery(SpanQuery spanQuery) {
+    if (!expandMultiTermQuery) {
+      return false; // Will throw UnsupportedOperationException in case of a SpanRegexQuery.
+    } else if (spanQuery instanceof FieldMaskingSpanQuery) {
+      return mustRewriteQuery(((FieldMaskingSpanQuery)spanQuery).getMaskedQuery());
+    } else if (spanQuery instanceof SpanFirstQuery) {
+      return mustRewriteQuery(((SpanFirstQuery)spanQuery).getMatch());
+    } else if (spanQuery instanceof SpanNearQuery) {
+      for (final SpanQuery clause : ((SpanNearQuery)spanQuery).getClauses()) {
+        if (mustRewriteQuery(clause)) {
+          return true;
+        }
+      }
+      return false; 
+    } else if (spanQuery instanceof SpanNotQuery) {
+      SpanNotQuery spanNotQuery = (SpanNotQuery)spanQuery;
+      return mustRewriteQuery(spanNotQuery.getInclude()) || mustRewriteQuery(spanNotQuery.getExclude());
+    } else if (spanQuery instanceof SpanOrQuery) {
+      for (final SpanQuery clause : ((SpanOrQuery)spanQuery).getClauses()) {
+        if (mustRewriteQuery(clause)) {
+          return true;
+        }
+      }
+      return false; 
+    } else if (spanQuery instanceof SpanTermQuery) {
+      return false;
+    } else {
+      return true;
+    }
+  }
+  
   /**
    * This class makes sure that if both position sensitive and insensitive
    * versions of the same term are added, the position insensitive one wins.

Modified: lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java?rev=831024&r1=831023&r2=831024&view=diff
==============================================================================
--- lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
(original)
+++ lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
Thu Oct 29 16:41:04 2009
@@ -70,8 +70,10 @@
 import org.apache.lucene.search.WildcardQuery;
 import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner;
+import org.apache.lucene.search.regex.SpanRegexQuery;
 import org.apache.lucene.search.spans.SpanNearQuery;
 import org.apache.lucene.search.spans.SpanNotQuery;
+import org.apache.lucene.search.spans.SpanOrQuery;
 import org.apache.lucene.search.spans.SpanQuery;
 import org.apache.lucene.search.spans.SpanTermQuery;
 import org.apache.lucene.store.Directory;
@@ -306,6 +308,31 @@
     
   }
   
+  public void testSpanRegexQuery() throws Exception {
+    query = new SpanOrQuery(new SpanQuery [] {
+        new SpanRegexQuery(new Term(FIELD_NAME, "ken.*")) });
+    searcher = new IndexSearcher(ramDir, true);
+    hits = searcher.search(query, 100);
+    int maxNumFragmentsRequired = 2;
+
+    QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
+    Highlighter highlighter = new Highlighter(this, scorer);
+    
+    for (int i = 0; i < hits.totalHits; i++) {
+      String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
+      TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
+
+      highlighter.setTextFragmenter(new SimpleFragmenter(40));
+
+      String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
+          "...");
+      System.out.println("\t" + result);
+    }
+    
+    assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
+        numHighlights == 5);
+  }
+  
   public void testNumericRangeQuery() throws Exception {
     // doesn't currently highlight, but make sure it doesn't cause exception either
     query = NumericRangeQuery.newIntRange(NUMERIC_FIELD_NAME, 2, 6, true, true);



Mime
View raw message