lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sim...@apache.org
Subject svn commit: r1343957 - in /lucene/dev/branches/LUCENE-2878/lucene: core/src/java/org/apache/lucene/search/ core/src/java/org/apache/lucene/search/positions/ highlighter/src/test/org/apache/lucene/search/poshighlight/
Date Tue, 29 May 2012 20:18:21 GMT
Author: simonw
Date: Tue May 29 20:18:21 2012
New Revision: 1343957

URL: http://svn.apache.org/viewvc?rev=1343957&view=rev
Log:
LUCENE-2878: add position iterator support to PhraseQuery (except of Sloppy)

Modified:
    lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
    lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
    lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
    lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
    lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/positions/BlockPositionIterator.java
    lucene/dev/branches/LUCENE-2878/lucene/highlighter/src/test/org/apache/lucene/search/poshighlight/PosHighlighterTest.java

Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java?rev=1343957&r1=1343956&r2=1343957&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
(original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
Tue May 29 20:18:21 2012
@@ -18,6 +18,7 @@ package org.apache.lucene.search;
  */
 
 import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.search.positions.BlockPositionIterator;
 import org.apache.lucene.search.positions.PositionIntervalIterator;
 import org.apache.lucene.search.similarities.Similarity;
 
@@ -36,6 +37,7 @@ final class ExactPhraseScorer extends Sc
   boolean noDocs;
   
   private final static class ChunkState {
+    final TermQuery.TermDocsEnumFactory factory;
     final DocsAndPositionsEnum posEnum;
     final int offset;
     final boolean useAdvance;
@@ -44,8 +46,9 @@ final class ExactPhraseScorer extends Sc
     int pos;
     int lastPos;
     
-    public ChunkState(DocsAndPositionsEnum posEnum, int offset,
-        boolean useAdvance) {
+    public ChunkState(TermQuery.TermDocsEnumFactory factory, DocsAndPositionsEnum posEnum,
int offset,
+        boolean useAdvance) throws IOException {
+      this.factory = factory;
       this.posEnum = posEnum;
       this.offset = offset;
       this.useAdvance = useAdvance;
@@ -77,7 +80,7 @@ final class ExactPhraseScorer extends Sc
       // ANDing. This buys ~15% gain for phrases where
       // freq of rarest 2 terms is close:
       final boolean useAdvance = postings[i].docFreq > 5 * postings[0].docFreq;
-      chunkStates[i] = new ChunkState(postings[i].postings,
+      chunkStates[i] = new ChunkState(postings[i].factory, postings[i].postings,
           -postings[i].position, useAdvance);
       if (i > 0
           && postings[i].postings.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
@@ -323,7 +326,10 @@ final class ExactPhraseScorer extends Sc
   
   @Override
   public PositionIntervalIterator positions(boolean needsPayloads, boolean needsOffsets)
throws IOException {
-    // nocommit implement this
-    throw new UnsupportedOperationException();
+    TermScorer.TermPositions[] posIters = new TermScorer.TermPositions[chunkStates.length];
+    for (int i = 0; i < chunkStates.length; i++) {
+      posIters[i] = new TermScorer.TermPositions(this, chunkStates[i].factory.docsAndPositionsEnum(needsOffsets),
needsPayloads);
+    }
+    return new BlockPositionIterator(this, posIters);
   }
 }

Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java?rev=1343957&r1=1343956&r2=1343957&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
(original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
Tue May 29 20:18:21 2012
@@ -17,26 +17,14 @@ package org.apache.lucene.search;
  * limitations under the License.
  */
 
-import java.io.IOException;
-import java.util.*;
-
-import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.DocsAndPositionsEnum;
-import org.apache.lucene.index.AtomicReader;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexReaderContext;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermState;
-import org.apache.lucene.index.Terms;
-import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
+import org.apache.lucene.index.*;
 import org.apache.lucene.search.similarities.Similarity;
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
+import org.apache.lucene.util.*;
 import org.apache.lucene.util.PriorityQueue;
-import org.apache.lucene.util.TermContext;
-import org.apache.lucene.util.ToStringUtils;
+
+import java.io.IOException;
+import java.util.*;
 
 /**
  * MultiPhraseQuery is a generalized version of PhraseQuery, with an added
@@ -196,7 +184,7 @@ public class MultiPhraseQuery extends Qu
 
         final DocsAndPositionsEnum postingsEnum;
         int docFreq;
-
+        TermQuery.TermDocsEnumFactory factory;
         if (terms.length > 1) {
           postingsEnum = new UnionDocsAndPositionsEnum(liveDocs, context, terms, termContexts,
termsEnum);
 
@@ -218,6 +206,7 @@ public class MultiPhraseQuery extends Qu
             // None of the terms are in this reader
             return null;
           }
+          factory = null; // nocommit - what to do here
         } else {
           final Term term = terms[0];
           TermState termState = termContexts.get(term).get(context.ord);
@@ -235,9 +224,10 @@ public class MultiPhraseQuery extends Qu
           }
 
           docFreq = termsEnum.docFreq();
+          factory = new TermQuery.TermDocsEnumFactory(BytesRef.deepCopyOf(term.bytes()),
termState, termsEnum, postingsEnum, postingsEnum, acceptDocs);
         }
-
-        postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue(),
terms);
+        
+        postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, factory, termsEnum.docFreq()
, positions.get(pos).intValue(), terms);
       }
 
       // sort by increasing docFreq order

Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java?rev=1343957&r1=1343956&r2=1343957&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
(original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
Tue May 29 20:18:21 2012
@@ -17,28 +17,16 @@ package org.apache.lucene.search;
  * limitations under the License.
  */
 
+import org.apache.lucene.index.*;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
+import org.apache.lucene.util.*;
+
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Set;
 
-import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.DocsAndPositionsEnum;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.AtomicReader;
-import org.apache.lucene.index.IndexReaderContext;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermState;
-import org.apache.lucene.index.Terms;
-import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
-import org.apache.lucene.search.similarities.Similarity;
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.TermContext;
-import org.apache.lucene.util.ToStringUtils;
-
 /** A Query that matches documents containing a particular sequence of terms.
  * A PhraseQuery is built by QueryParser for input like <code>"new york"</code>.
  * 
@@ -135,13 +123,15 @@ public class PhraseQuery extends Query {
   }
 
   static class PostingsAndFreq implements Comparable<PostingsAndFreq> {
+    final TermQuery.TermDocsEnumFactory factory;
     final DocsAndPositionsEnum postings;
     final int docFreq;
     final int position;
     final Term[] terms;
     final int nTerms; // for faster comparisons
 
-    public PostingsAndFreq(DocsAndPositionsEnum postings, int docFreq, int position, Term...
terms) {
+    public PostingsAndFreq(DocsAndPositionsEnum postings, TermQuery.TermDocsEnumFactory factory,
int docFreq, int position, Term... terms) throws IOException {
+      this.factory = factory;
       this.postings = postings;
       this.docFreq = docFreq;
       this.position = position;
@@ -264,7 +254,7 @@ public class PhraseQuery extends Query {
           return null;
         }
         te.seekExact(t.bytes(), state);
-        DocsAndPositionsEnum postingsEnum = te.docsAndPositions(liveDocs, null, false);
+        final DocsAndPositionsEnum postingsEnum = te.docsAndPositions(liveDocs, null, false);
 
         // PhraseQuery on a field that did not index
         // positions.
@@ -273,7 +263,8 @@ public class PhraseQuery extends Query {
           // term does exist, but has no positions
           throw new IllegalStateException("field \"" + t.field() + "\" was indexed without
position data; cannot run PhraseQuery (term=" + t.text() + ")");
         }
-        postingsFreqs[i] = new PostingsAndFreq(postingsEnum, te.docFreq(), positions.get(i).intValue(),
t);
+        TermQuery.TermDocsEnumFactory factory = new TermQuery.TermDocsEnumFactory(BytesRef.deepCopyOf(t.bytes()),
state, te, null, null, acceptDocs);
+        postingsFreqs[i] = new PostingsAndFreq(postingsEnum, factory, te.docFreq(), positions.get(i).intValue(),
t);
       }
 
       // sort by increasing docFreq order

Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/TermQuery.java?rev=1343957&r1=1343956&r2=1343957&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
(original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
Tue May 29 20:18:21 2012
@@ -252,22 +252,37 @@ public class TermQuery extends Query {
     private final Bits liveDocs;
     private final DocsEnum docs;
     private final DocsEnum docsAndFreqs;
+    private final TermState state;
+    private BytesRef term;
     
     TermDocsEnumFactory(TermsEnum termsEnum, DocsEnum docs, DocsEnum docsAndFreqs, Bits liveDocs)
{
+      this(null, null, termsEnum, docs, docsAndFreqs, liveDocs);
+
+    }
+    
+    TermDocsEnumFactory(BytesRef term, TermState state, TermsEnum termsEnum,
+        DocsEnum docs, DocsEnum docsAndFreqs, Bits liveDocs) {
       this.termsEnum = termsEnum;
       this.liveDocs = liveDocs;
       this.docs = docs;
       this.docsAndFreqs = docsAndFreqs;
+      this.state = state;
+      this.term = term;
     }
     
     public DocsEnum docsEnum() throws IOException {
       return docs;
     }
     
-    public DocsAndPositionsEnum docsAndPositionsEnum(boolean offsets) throws IOException
{
+    public DocsAndPositionsEnum docsAndPositionsEnum(boolean offsets)
+        throws IOException {
+      if (state != null) {
+        assert term != null;
+        termsEnum.seekExact(term, state);
+      }
       return termsEnum.docsAndPositions(liveDocs, null, offsets);
     }
-
+    
     public DocsEnum docsAndFreqsEnum() throws IOException{
       return docsAndFreqs;
     }

Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/positions/BlockPositionIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/positions/BlockPositionIterator.java?rev=1343957&r1=1343956&r2=1343957&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/positions/BlockPositionIterator.java
(original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/positions/BlockPositionIterator.java
Tue May 29 20:18:21 2012
@@ -78,6 +78,19 @@ public final class BlockPositionIterator
     this.gaps = gaps;
   }
 
+  public BlockPositionIterator(Scorer scorer, int[] gaps, PositionIntervalIterator[] iterators)
{
+    super(scorer);
+    assert iterators.length > 1;
+    this.iterators = iterators;
+    intervals = new PositionInterval[iterators.length];
+    lastIter = iterators.length - 1;
+    this.gaps = gaps;
+  }
+
+  public BlockPositionIterator(Scorer scorer, PositionIntervalIterator[] iterators) {
+    this(scorer, defaultGaps(iterators.length), iterators);
+  }
+
   @Override
   public PositionInterval next() throws IOException {
     if ((intervals[0] = iterators[0].next()) == null) {

Modified: lucene/dev/branches/LUCENE-2878/lucene/highlighter/src/test/org/apache/lucene/search/poshighlight/PosHighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/highlighter/src/test/org/apache/lucene/search/poshighlight/PosHighlighterTest.java?rev=1343957&r1=1343956&r2=1343957&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/highlighter/src/test/org/apache/lucene/search/poshighlight/PosHighlighterTest.java
(original)
+++ lucene/dev/branches/LUCENE-2878/lucene/highlighter/src/test/org/apache/lucene/search/poshighlight/PosHighlighterTest.java
Tue May 29 20:18:21 2012
@@ -1,5 +1,20 @@
 package org.apache.lucene.search.poshighlight;
-
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockTokenizer;
@@ -31,7 +46,6 @@ import org.apache.lucene.search.position
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util._TestUtil;
-import org.junit.Ignore;
 
 import java.io.IOException;
 
@@ -262,7 +276,7 @@ public class PosHighlighterTest extends 
   /*
    * Failing ... PhraseQuery scorer needs positions()?
    */
-  @Ignore
+  //@Ignore
   public void testPhraseOriginal() throws Exception {
     insertDocs(analyzer, "This is a test");
     PhraseQuery pq = new PhraseQuery();



Mime
View raw message