lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sha...@apache.org
Subject [11/50] [abbrv] lucene-solr:jira/solr-11990: LUCENE-8306: Allow iteration over submatches
Date Sat, 28 Jul 2018 04:49:34 GMT
LUCENE-8306: Allow iteration over submatches


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/a8839b7e
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/a8839b7e
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/a8839b7e

Branch: refs/heads/jira/solr-11990
Commit: a8839b7eab122037ed1c1674b2f909666718e331
Parents: 2826a95
Author: Alan Woodward <romseygeek@apache.org>
Authored: Sun Jul 22 20:59:50 2018 +0100
Committer: Alan Woodward <romseygeek@apache.org>
Committed: Sun Jul 22 21:42:46 2018 +0100

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |   3 +
 .../search/DisjunctionMatchesIterator.java      |   9 +
 .../lucene/search/ExactPhraseMatcher.java       |  44 ++++
 .../apache/lucene/search/MatchesIterator.java   |  58 +++++
 .../apache/lucene/search/MultiPhraseQuery.java  |   5 +-
 .../org/apache/lucene/search/PhraseMatcher.java |   2 +
 .../org/apache/lucene/search/PhraseQuery.java   |   4 +-
 .../org/apache/lucene/search/PhraseWeight.java  |  10 +
 .../lucene/search/SloppyPhraseMatcher.java      | 121 ++++++---
 .../lucene/search/TermMatchesIterator.java      |  50 ++++
 .../apache/lucene/search/spans/SpanWeight.java  | 131 ++++++++++
 .../lucene/search/TestMatchesIterator.java      | 252 ++++++++++++++++++-
 .../lucene/search/AssertingMatchesIterator.java |  11 +
 13 files changed, 656 insertions(+), 44 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a8839b7e/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 8257dbc..47769cd 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -198,6 +198,9 @@ Improvements
 * LUCENE-8345, GitHub PR #392: Remove instantiation of redundant wrapper classes for primitives;
   add wrapper class constructors to forbiddenapis.  (Michael Braun via Uwe Schindler)
 
+* LUCENE-8306: Matches API now allows iteration over sub-matches in Spans (Alan Woodward,
+  Jim Ferenczi, David Smiley)
+
 Other:
 
 * LUCENE-8366: Upgrade to ICU 62.1. Emoji handling now uses Unicode 11's

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a8839b7e/lucene/core/src/java/org/apache/lucene/search/DisjunctionMatchesIterator.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMatchesIterator.java
b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMatchesIterator.java
index 975199b..cff723b 100644
--- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMatchesIterator.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMatchesIterator.java
@@ -158,4 +158,13 @@ final class DisjunctionMatchesIterator implements MatchesIterator {
     return queue.top().endOffset();
   }
 
+  @Override
+  public MatchesIterator getSubMatches() throws IOException {
+    return queue.top().getSubMatches();
+  }
+
+  @Override
+  public Object label() {
+    return queue.top().label();
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a8839b7e/lucene/core/src/java/org/apache/lucene/search/ExactPhraseMatcher.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseMatcher.java b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseMatcher.java
index b95077d..6fb49b1 100644
--- a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseMatcher.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseMatcher.java
@@ -149,4 +149,48 @@ final class ExactPhraseMatcher extends PhraseMatcher {
     return postings[postings.length - 1].postings.endOffset();
   }
 
+  @Override
+  MatchesIterator getSubMatches() {
+    return new MatchesIterator() {
+
+      int upTo = -1;
+
+      @Override
+      public boolean next() throws IOException {
+        upTo++;
+        return upTo < postings.length;
+      }
+
+      @Override
+      public int startPosition() {
+        return postings[upTo].pos;
+      }
+
+      @Override
+      public int endPosition() {
+        return postings[upTo].pos;
+      }
+
+      @Override
+      public int startOffset() throws IOException {
+        return postings[upTo].postings.startOffset();
+      }
+
+      @Override
+      public int endOffset() throws IOException {
+        return postings[upTo].postings.endOffset();
+      }
+
+      @Override
+      public MatchesIterator getSubMatches() throws IOException {
+        return MatchesIterator.EMPTY_ITERATOR;
+      }
+
+      @Override
+      public Object label() {
+        return this;
+      }
+    };
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a8839b7e/lucene/core/src/java/org/apache/lucene/search/MatchesIterator.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/MatchesIterator.java b/lucene/core/src/java/org/apache/lucene/search/MatchesIterator.java
index 450a352..5814d7c 100644
--- a/lucene/core/src/java/org/apache/lucene/search/MatchesIterator.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MatchesIterator.java
@@ -28,6 +28,9 @@ import org.apache.lucene.index.LeafReaderContext;
  * positions and/or offsets after each call.  You should not call the position or offset
methods
  * before {@link #next()} has been called, or after {@link #next()} has returned {@code false}.
  *
+ * Matches from some queries may span multiple positions.  You can retrieve the positions
of
+ * individual matching terms on the current match by calling {@link #getSubMatches()}.
+ *
  * Matches are ordered by start position, and then by end position.  Match intervals may
overlap.
  *
  * @see Weight#matches(LeafReaderContext, int)
@@ -70,4 +73,59 @@ public interface MatchesIterator {
    */
   int endOffset() throws IOException;
 
+  /**
+   * Returns a MatchesIterator that iterates over the positions and offsets of individual
+   * terms within the current match
+   *
+   * Should only be called after {@link #next()} has returned {@code true}
+   */
+  MatchesIterator getSubMatches() throws IOException;
+
+  /**
+   * Returns a label identifying the leaf query causing the current match
+   *
+   * Should only be called after {@link #next()} has returned {@code true}
+   */
+  Object label();
+
+  /**
+   * A MatchesIterator that is immediately exhausted
+   */
+  MatchesIterator EMPTY_ITERATOR = new MatchesIterator() {
+    @Override
+    public boolean next() throws IOException {
+      return false;
+    }
+
+    @Override
+    public int startPosition() {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public int endPosition() {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public int startOffset() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public int endOffset() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public MatchesIterator getSubMatches() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public Object label() {
+      return this;
+    }
+  };
+
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a8839b7e/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
index 22b7127..c8d22ba 100644
--- a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
@@ -269,7 +269,7 @@ public class MultiPhraseQuery extends Query {
             TermState termState = termStates.get(term).get(context);
             if (termState != null) {
               termsEnum.seekExact(term.bytes(), termState);
-              postings.add(termsEnum.postings(null, exposeOffsets ? PostingsEnum.OFFSETS
: PostingsEnum.POSITIONS));
+              postings.add(termsEnum.postings(null, exposeOffsets ? PostingsEnum.ALL : PostingsEnum.POSITIONS));
               totalMatchCost += PhraseQuery.termPositionsCost(termsEnum);
             }
           }
@@ -294,7 +294,7 @@ public class MultiPhraseQuery extends Query {
           return new ExactPhraseMatcher(postingsFreqs, totalMatchCost);
         }
         else {
-          return new SloppyPhraseMatcher(postingsFreqs, slop, totalMatchCost);
+          return new SloppyPhraseMatcher(postingsFreqs, slop, totalMatchCost, exposeOffsets);
         }
 
       }
@@ -647,5 +647,6 @@ public class MultiPhraseQuery extends Query {
     public BytesRef getPayload() throws IOException {
       return posQueue.top().pe.getPayload();
     }
+
   }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a8839b7e/lucene/core/src/java/org/apache/lucene/search/PhraseMatcher.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/PhraseMatcher.java b/lucene/core/src/java/org/apache/lucene/search/PhraseMatcher.java
index 81040d5..c4b1e36 100644
--- a/lucene/core/src/java/org/apache/lucene/search/PhraseMatcher.java
+++ b/lucene/core/src/java/org/apache/lucene/search/PhraseMatcher.java
@@ -88,4 +88,6 @@ abstract class PhraseMatcher {
   public float getMatchCost() {
     return matchCost;
   }
+
+  abstract MatchesIterator getSubMatches() throws IOException;
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a8839b7e/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
index 70d2e09..8f04271 100644
--- a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
@@ -446,7 +446,7 @@ public class PhraseQuery extends Query {
             return null;
           }
           te.seekExact(t.bytes(), state);
-          PostingsEnum postingsEnum = te.postings(null, exposeOffsets ? PostingsEnum.OFFSETS
: PostingsEnum.POSITIONS);
+          PostingsEnum postingsEnum = te.postings(null, exposeOffsets ? PostingsEnum.ALL
: PostingsEnum.POSITIONS);
           postingsFreqs[i] = new PostingsAndFreq(postingsEnum, positions[i], t);
           totalMatchCost += termPositionsCost(te);
         }
@@ -457,7 +457,7 @@ public class PhraseQuery extends Query {
           return new ExactPhraseMatcher(postingsFreqs, totalMatchCost);
         }
         else {
-          return new SloppyPhraseMatcher(postingsFreqs, slop, totalMatchCost);
+          return new SloppyPhraseMatcher(postingsFreqs, slop, totalMatchCost, exposeOffsets);
         }
       }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a8839b7e/lucene/core/src/java/org/apache/lucene/search/PhraseWeight.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/PhraseWeight.java b/lucene/core/src/java/org/apache/lucene/search/PhraseWeight.java
index 90fa537..2547b8c 100644
--- a/lucene/core/src/java/org/apache/lucene/search/PhraseWeight.java
+++ b/lucene/core/src/java/org/apache/lucene/search/PhraseWeight.java
@@ -123,6 +123,16 @@ abstract class PhraseWeight extends Weight {
         public int endOffset() throws IOException {
           return matcher.endOffset();
         }
+
+        @Override
+        public MatchesIterator getSubMatches() throws IOException {
+          return matcher.getSubMatches();
+        }
+
+        @Override
+        public Object label() {
+          return matcher;
+        }
       };
     });
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a8839b7e/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseMatcher.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseMatcher.java b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseMatcher.java
index 326816d..e9c4519 100644
--- a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseMatcher.java
+++ b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseMatcher.java
@@ -54,13 +54,14 @@ final class SloppyPhraseMatcher extends PhraseMatcher {
   private final int slop;
   private final int numPostings;
   private final PhraseQueue pq; // for advancing min position
+  private final boolean captureLeadMatch;
 
   private int end; // current largest phrase position
 
   private int leadPosition;
   private int leadOffset;
-  private int currentEndPostings;
-  private int advanceEndPostings;
+  private int leadEndOffset;
+  private int leadOrd;
 
   private boolean hasRpts; // flag indicating that there are repetitions (as checked in first
candidate doc)
   private boolean checkedRpts; // flag to only check for repetitions in first candidate doc
@@ -71,10 +72,11 @@ final class SloppyPhraseMatcher extends PhraseMatcher {
   private boolean positioned;
   private int matchLength;
 
-  SloppyPhraseMatcher(PhraseQuery.PostingsAndFreq[] postings, int slop, float matchCost)
{
+  SloppyPhraseMatcher(PhraseQuery.PostingsAndFreq[] postings, int slop, float matchCost,
boolean captureLeadMatch) {
     super(approximation(postings), matchCost);
     this.slop = slop;
     this.numPostings = postings.length;
+    this.captureLeadMatch = captureLeadMatch;
     pq = new PhraseQueue(postings.length);
     phrasePositions = new PhrasePositions[postings.length];
     for (int i = 0; i < postings.length; ++i) {
@@ -120,10 +122,8 @@ final class SloppyPhraseMatcher extends PhraseMatcher {
       return false;
     }
     PhrasePositions pp = pq.pop();
-    assert pp != null;  // if the pq is empty, then positioned == false
-    leadPosition = pp.position + pp.offset;
-    leadOffset = pp.postings.startOffset();
-    currentEndPostings = advanceEndPostings;
+    assert pp != null;  // if the pq is not full, then positioned == false
+    captureLead(pp);
     matchLength = end - pp.position;
     int next = pq.top().position; 
     while (advancePP(pp)) {
@@ -137,6 +137,7 @@ final class SloppyPhraseMatcher extends PhraseMatcher {
         }
         pp = pq.pop();
         next = pq.top().position;
+        assert pp != null;  // if the pq is not full, then positioned == false
         matchLength = end - pp.position;
       } else {
         int matchLength2 = end - pp.position;
@@ -144,14 +145,22 @@ final class SloppyPhraseMatcher extends PhraseMatcher {
           matchLength = matchLength2;
         }
       }
-      leadPosition = pp.position + pp.offset;
-      leadOffset = pp.postings.startOffset();
-      currentEndPostings = advanceEndPostings;
+      captureLead(pp);
     }
     positioned = false;
     return matchLength <= slop;
   }
 
+  private void captureLead(PhrasePositions pp) throws IOException {
+    if (captureLeadMatch == false) {
+      return;
+    }
+    leadOrd = pp.ord;
+    leadPosition = pp.position + pp.offset;
+    leadOffset = pp.postings.startOffset();
+    leadEndOffset = pp.postings.endOffset();
+  }
+
   @Override
   public int startPosition() {
     // when a match is detected, the top postings is advanced until it has moved
@@ -160,6 +169,7 @@ final class SloppyPhraseMatcher extends PhraseMatcher {
     // However, the priority queue doesn't guarantee that the top postings is in fact the
     // earliest in the list, so we need to cycle through all terms to check.
     // this is slow, but Matches is slow anyway...
+    int leadPosition = this.leadPosition;
     for (PhrasePositions pp : phrasePositions) {
       leadPosition = Math.min(leadPosition, pp.position + pp.offset);
     }
@@ -168,7 +178,13 @@ final class SloppyPhraseMatcher extends PhraseMatcher {
 
   @Override
   public int endPosition() {
-    return phrasePositions[currentEndPostings].position + phrasePositions[currentEndPostings].offset;
+    int endPosition = leadPosition;
+    for (PhrasePositions pp : phrasePositions) {
+      if (pp.ord != leadOrd) {
+        endPosition = Math.max(endPosition, pp.position + pp.offset);
+      }
+    }
+    return endPosition;
   }
 
   @Override
@@ -179,6 +195,7 @@ final class SloppyPhraseMatcher extends PhraseMatcher {
     // However, the priority queue doesn't guarantee that the top postings is in fact the
     // earliest in the list, so we need to cycle through all terms to check
     // this is slow, but Matches is slow anyway...
+    int leadOffset = this.leadOffset;
     for (PhrasePositions pp : phrasePositions) {
       leadOffset = Math.min(leadOffset, pp.postings.startOffset());
     }
@@ -187,7 +204,69 @@ final class SloppyPhraseMatcher extends PhraseMatcher {
 
   @Override
   public int endOffset() throws IOException {
-    return phrasePositions[currentEndPostings].postings.endOffset();
+    int endOffset = leadEndOffset;
+    for (PhrasePositions pp : phrasePositions) {
+      if (pp.ord != leadOrd) {
+        endOffset = Math.max(endOffset, pp.postings.endOffset());
+      }
+    }
+    return endOffset;
+  }
+
+  @Override
+  MatchesIterator getSubMatches() throws IOException {
+    int[][] submatches = new int[phrasePositions.length][3];
+    for (PhrasePositions pp : phrasePositions) {
+      if (pp.ord == leadOrd) {
+        submatches[pp.ord][0] = leadPosition;
+        submatches[pp.ord][1] = leadOffset;
+        submatches[pp.ord][2] = leadEndOffset;
+      }
+      else {
+        submatches[pp.ord][0] = pp.position + pp.offset;
+        submatches[pp.ord][1] = pp.postings.startOffset();
+        submatches[pp.ord][2] = pp.postings.endOffset();
+      }
+    }
+    Arrays.sort(submatches, Comparator.comparingInt(a -> a[0]));
+    return new MatchesIterator() {
+      int upTo = -1;
+      @Override
+      public boolean next() throws IOException {
+        upTo++;
+        return upTo < submatches.length;
+      }
+
+      @Override
+      public int startPosition() {
+        return submatches[upTo][0];
+      }
+
+      @Override
+      public int endPosition() {
+        return submatches[upTo][0];
+      }
+
+      @Override
+      public int startOffset() {
+        return submatches[upTo][1];
+      }
+
+      @Override
+      public int endOffset() {
+        return submatches[upTo][2];
+      }
+
+      @Override
+      public MatchesIterator getSubMatches() {
+        return MatchesIterator.EMPTY_ITERATOR;
+      }
+
+      @Override
+      public Object label() {
+        return this;
+      }
+    };
   }
 
   /** advance a PhrasePosition and update 'end', return false if exhausted */
@@ -197,12 +276,6 @@ final class SloppyPhraseMatcher extends PhraseMatcher {
     }
     if (pp.position > end) {
       end = pp.position;
-      advanceEndPostings = pp.ord;
-    }
-    if (pp.position == end) {
-      if (pp.ord > advanceEndPostings) {
-        advanceEndPostings = pp.ord;
-      }
     }
     return true;
   }
@@ -307,12 +380,6 @@ final class SloppyPhraseMatcher extends PhraseMatcher {
       pp.firstPosition();
       if (pp.position > end) {
         end = pp.position;
-        advanceEndPostings = pp.ord;
-      }
-      if (pp.position == end) {
-        if (pp.ord > advanceEndPostings) {
-          advanceEndPostings = pp.ord;
-        }
       }
       pq.add(pp);
     }
@@ -342,12 +409,6 @@ final class SloppyPhraseMatcher extends PhraseMatcher {
     for (PhrasePositions pp : phrasePositions) {  // iterate cyclic list: done once handled
max
       if (pp.position > end) {
         end = pp.position;
-        advanceEndPostings = pp.ord;
-      }
-      if (pp.position == end) {
-        if (pp.ord > advanceEndPostings) {
-          advanceEndPostings = pp.ord;
-        }
       }
       pq.add(pp);
     }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a8839b7e/lucene/core/src/java/org/apache/lucene/search/TermMatchesIterator.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/TermMatchesIterator.java b/lucene/core/src/java/org/apache/lucene/search/TermMatchesIterator.java
index defc3af..23858c6 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TermMatchesIterator.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TermMatchesIterator.java
@@ -67,4 +67,54 @@ class TermMatchesIterator implements MatchesIterator {
     return pe.endOffset();
   }
 
+  @Override
+  public Object label() {
+    return pe;
+  }
+
+  @Override
+  public MatchesIterator getSubMatches() throws IOException {
+    return new MatchesIterator() {
+
+      boolean exhausted = false;
+
+      @Override
+      public boolean next() {
+        if (exhausted) {
+          return false;
+        }
+        return exhausted = true;
+      }
+
+      @Override
+      public int startPosition() {
+        return pos;
+      }
+
+      @Override
+      public int endPosition() {
+        return pos;
+      }
+
+      @Override
+      public int startOffset() throws IOException {
+        return pe.startOffset();
+      }
+
+      @Override
+      public int endOffset() throws IOException {
+        return pe.endOffset();
+      }
+
+      @Override
+      public MatchesIterator getSubMatches() {
+        return MatchesIterator.EMPTY_ITERATOR;
+      }
+
+      @Override
+      public Object label() {
+        return this;
+      }
+    };
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a8839b7e/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java
index 0313d56..ca789ce 100644
--- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java
@@ -18,6 +18,8 @@ package org.apache.lucene.search.spans;
 
 
 import java.io.IOException;
+import java.util.Arrays;
+import java.util.Comparator;
 import java.util.Map;
 
 import org.apache.lucene.index.LeafReaderContext;
@@ -28,6 +30,8 @@ import org.apache.lucene.search.CollectionStatistics;
 import org.apache.lucene.search.Explanation;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.LeafSimScorer;
+import org.apache.lucene.search.Matches;
+import org.apache.lucene.search.MatchesIterator;
 import org.apache.lucene.search.TermStatistics;
 import org.apache.lucene.search.Weight;
 import org.apache.lucene.search.similarities.Similarity;
@@ -161,4 +165,131 @@ public abstract class SpanWeight extends Weight {
 
     return Explanation.noMatch("no matching term");
   }
+
+  @Override
+  public Matches matches(LeafReaderContext context, int doc) throws IOException {
+    return Matches.forField(field, () -> {
+      Spans spans = getSpans(context, Postings.OFFSETS);
+      if (spans == null) {
+        return null;
+      }
+      if (spans.advance(doc) != doc) {
+        return null;
+      }
+      return new MatchesIterator() {
+
+        int innerTermCount = 0;
+        int[][] innerTerms = new int[2][3];
+        SpanCollector termCollector = new SpanCollector() {
+          @Override
+          public void collectLeaf(PostingsEnum postings, int position, Term term) throws
IOException {
+            innerTermCount++;
+            if (innerTermCount > innerTerms.length) {
+              int[][] temp = new int[innerTermCount][3];
+              System.arraycopy(innerTerms, 0, temp, 0, innerTermCount - 1);
+              innerTerms = temp;
+            }
+            innerTerms[innerTermCount - 1][0] = position;
+            innerTerms[innerTermCount - 1][1] = postings.startOffset();
+            innerTerms[innerTermCount - 1][2] = postings.endOffset();
+          }
+
+          @Override
+          public void reset() {
+            innerTermCount = 0;
+          }
+        };
+
+        @Override
+        public boolean next() throws IOException {
+          innerTermCount = 0;
+          return spans.nextStartPosition() != Spans.NO_MORE_POSITIONS;
+        }
+
+        @Override
+        public int startPosition() {
+          return spans.startPosition();
+        }
+
+        @Override
+        public int endPosition() {
+          return spans.endPosition() - 1;
+        }
+
+        @Override
+        public int startOffset() throws IOException {
+          if (innerTermCount == 0) {
+            collectInnerTerms();
+          }
+          return innerTerms[0][1];
+        }
+
+        @Override
+        public int endOffset() throws IOException {
+          if (innerTermCount == 0) {
+            collectInnerTerms();
+          }
+          return innerTerms[innerTermCount - 1][2];
+        }
+
+        @Override
+        public MatchesIterator getSubMatches() throws IOException {
+          if (innerTermCount == 0) {
+            collectInnerTerms();
+          }
+          return new MatchesIterator() {
+
+            int upto = -1;
+
+            @Override
+            public boolean next() throws IOException {
+              upto++;
+              return upto < innerTermCount;
+            }
+
+            @Override
+            public int startPosition() {
+              return innerTerms[upto][0];
+            }
+
+            @Override
+            public int endPosition() {
+              return innerTerms[upto][0];
+            }
+
+            @Override
+            public int startOffset() throws IOException {
+              return innerTerms[upto][1];
+            }
+
+            @Override
+            public int endOffset() throws IOException {
+              return innerTerms[upto][2];
+            }
+
+            @Override
+            public MatchesIterator getSubMatches() throws IOException {
+              return MatchesIterator.EMPTY_ITERATOR;
+            }
+
+            @Override
+            public Object label() {
+              return this;
+            }
+          };
+        }
+
+        @Override
+        public Object label() {
+          return SpanWeight.this;
+        }
+
+        void collectInnerTerms() throws IOException {
+          termCollector.reset();
+          spans.collect(termCollector);
+          Arrays.sort(innerTerms, 0, innerTermCount, Comparator.comparing(a -> a[0]));
+        }
+      };
+    });
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a8839b7e/lucene/core/src/test/org/apache/lucene/search/TestMatchesIterator.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestMatchesIterator.java b/lucene/core/src/test/org/apache/lucene/search/TestMatchesIterator.java
index 3855b04..86e4399 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestMatchesIterator.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestMatchesIterator.java
@@ -18,8 +18,12 @@
 package org.apache.lucene.search;
 
 import java.io.IOException;
+import java.util.Arrays;
 import java.util.HashSet;
+import java.util.IdentityHashMap;
+import java.util.Objects;
 import java.util.Set;
+import java.util.stream.Collectors;
 
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
@@ -29,9 +33,14 @@ import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.ReaderUtil;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.search.spans.SpanNearQuery;
+import org.apache.lucene.search.spans.SpanOrQuery;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
 
@@ -99,7 +108,7 @@ public class TestMatchesIterator extends LuceneTestCase {
       "nothing matches this document"
   };
 
-  void checkMatches(Query q, String field, int[][] expected) throws IOException {
+  private void checkMatches(Query q, String field, int[][] expected) throws IOException {
     Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE_NO_SCORES, 1);
     for (int i = 0; i < expected.length; i++) {
       LeafReaderContext ctx = searcher.leafContexts.get(ReaderUtil.subIndex(expected[i][0],
searcher.leafContexts));
@@ -112,14 +121,40 @@ public class TestMatchesIterator extends LuceneTestCase {
       MatchesIterator it = matches.getMatches(field);
       if (expected[i].length == 1) {
         assertNull(it);
-        return;
+        continue;
       }
       checkFieldMatches(it, expected[i]);
       checkFieldMatches(matches.getMatches(field), expected[i]);  // test multiple calls
     }
   }
 
-  void checkFieldMatches(MatchesIterator it, int[] expected) throws IOException {
+  private void checkLabelCount(Query q, String field, int[] expected) throws IOException
{
+    Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE_NO_SCORES, 1);
+    for (int i = 0; i < expected.length; i++) {
+      LeafReaderContext ctx = searcher.leafContexts.get(ReaderUtil.subIndex(i, searcher.leafContexts));
+      int doc = i - ctx.docBase;
+      Matches matches = w.matches(ctx, doc);
+      if (matches == null) {
+        assertEquals("Expected to get matches on document " + i, 0, expected[i]);
+        continue;
+      }
+      MatchesIterator it = matches.getMatches(field);
+      if (expected[i] == 0) {
+        assertNull(it);
+        continue;
+      }
+      else {
+        assertNotNull(it);
+      }
+      IdentityHashMap<Object, Integer> labels = new IdentityHashMap<>();
+      while (it.next()) {
+        labels.put(it.label(), 1);
+      }
+      assertEquals(expected[i], labels.size());
+    }
+  }
+
+  private void checkFieldMatches(MatchesIterator it, int[] expected) throws IOException {
     int pos = 1;
     while (it.next()) {
       //System.out.println(expected[i][pos] + "->" + expected[i][pos + 1] + "[" + expected[i][pos
+ 2] + "->" + expected[i][pos + 3] + "]");
@@ -132,7 +167,7 @@ public class TestMatchesIterator extends LuceneTestCase {
     assertEquals(expected.length, pos);
   }
 
-  void checkNoPositionsMatches(Query q, String field, boolean[] expected) throws IOException
{
+  private void checkNoPositionsMatches(Query q, String field, boolean[] expected) throws
IOException {
     Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE_NO_SCORES, 1);
     for (int i = 0; i < expected.length; i++) {
       LeafReaderContext ctx = searcher.leafContexts.get(ReaderUtil.subIndex(i, searcher.leafContexts));
@@ -148,8 +183,90 @@ public class TestMatchesIterator extends LuceneTestCase {
     }
   }
 
+  private void checkTermMatches(Query q, String field, TermMatch[][][] expected) throws IOException
{
+    Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE_NO_SCORES, 1);
+    for (int i = 0; i < expected.length; i++) {
+      LeafReaderContext ctx = searcher.leafContexts.get(ReaderUtil.subIndex(i, searcher.leafContexts));
+      int doc = i - ctx.docBase;
+      Matches matches = w.matches(ctx, doc);
+      if (matches == null) {
+        assertEquals(expected[i].length, 0);
+        continue;
+      }
+      MatchesIterator it = matches.getMatches(field);
+      if (expected[i].length == 0) {
+        assertNull(it);
+        continue;
+      }
+      checkTerms(expected[i], it);
+    }
+  }
+
+  private void checkTerms(TermMatch[][] expected, MatchesIterator it) throws IOException
{
+    int upTo = 0;
+    while (it.next()) {
+      Set<TermMatch> expectedMatches = new HashSet<>(Arrays.asList(expected[upTo]));
+      MatchesIterator submatches = it.getSubMatches();
+      while (submatches.next()) {
+        TermMatch tm = new TermMatch(submatches.startPosition(), submatches.startOffset(),
submatches.endOffset());
+        if (expectedMatches.remove(tm) == false) {
+          fail("Unexpected term match: " + tm);
+        }
+      }
+      if (expectedMatches.size() != 0) {
+        fail("Missing term matches: " + expectedMatches.stream().map(Object::toString).collect(Collectors.joining(",
")));
+      }
+      upTo++;
+    }
+    if (upTo < expected.length - 1) {
+      fail("Missing expected match");
+    }
+  }
+
+  static class TermMatch {
+
+    public final int position;
+
+    public final int startOffset;
+
+    public final int endOffset;
+
+    public TermMatch(PostingsEnum pe, int position) throws IOException {
+      this.position = position;
+      this.startOffset = pe.startOffset();
+      this.endOffset = pe.endOffset();
+    }
+
+    public TermMatch(int position, int startOffset, int endOffset) {
+      this.position = position;
+      this.startOffset = startOffset;
+      this.endOffset = endOffset;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) return true;
+      if (o == null || getClass() != o.getClass()) return false;
+      TermMatch termMatch = (TermMatch) o;
+      return position == termMatch.position &&
+          startOffset == termMatch.startOffset &&
+          endOffset == termMatch.endOffset;
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(position, startOffset, endOffset);
+    }
+
+    @Override
+    public String toString() {
+      return position + "[" + startOffset + "->" + endOffset + "]";
+    }
+  }
+
   public void testTermQuery() throws IOException {
-    Query q = new TermQuery(new Term(FIELD_WITH_OFFSETS, "w1"));
+    Term t = new Term(FIELD_WITH_OFFSETS, "w1");
+    Query q = new TermQuery(t);
     checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
         { 0, 0, 0, 0, 2 },
         { 1, 0, 0, 0, 2 },
@@ -157,6 +274,14 @@ public class TestMatchesIterator extends LuceneTestCase {
         { 3, 0, 0, 0, 2, 2, 2, 6, 8 },
         { 4 }
     });
+    checkTermMatches(q, FIELD_WITH_OFFSETS, new TermMatch[][][]{
+        { { new TermMatch(0, 0, 2) } },
+        { { new TermMatch(0, 0, 2) } },
+        { { new TermMatch(0, 0, 2) } },
+        { { new TermMatch(0, 0, 2) }, { new TermMatch(2, 6, 8) } },
+        {}
+    });
+    checkLabelCount(q, FIELD_WITH_OFFSETS, new int[]{ 1, 1, 1, 1, 0, 0 });
   }
 
   public void testTermQueryNoStoredOffsets() throws IOException {
@@ -191,6 +316,7 @@ public class TestMatchesIterator extends LuceneTestCase {
         { 3, 0, 0, 0, 2, 2, 2, 6, 8, 5, 5, 15, 17 },
         { 4 }
     });
+    checkLabelCount(q, FIELD_WITH_OFFSETS, new int[]{ 2, 2, 1, 2, 0, 0 });
   }
 
   public void testDisjunctionNoPositions() throws IOException {
@@ -215,6 +341,7 @@ public class TestMatchesIterator extends LuceneTestCase {
         { 3, 0, 0, 0, 2, 2, 2, 6, 8, 5, 5, 15, 17 },
         { 4 }
     });
+    checkLabelCount(q, FIELD_WITH_OFFSETS, new int[]{ 2, 2, 0, 2, 0, 0 });
   }
 
   public void testReqOptNoPositions() throws IOException {
@@ -248,6 +375,7 @@ public class TestMatchesIterator extends LuceneTestCase {
         { 3, 0, 0, 0, 2, 2, 2, 6, 8, 3, 3, 9, 11, 5, 5, 15, 17 },
         { 4 }
     });
+    checkLabelCount(q, FIELD_WITH_OFFSETS, new int[]{ 3, 1, 3, 3, 0, 0 });
   }
 
   public void testMinShouldMatchNoPositions() throws IOException {
@@ -331,6 +459,7 @@ public class TestMatchesIterator extends LuceneTestCase {
         { 3, 0, 0, 0, 2, 1, 1, 3, 5, 2, 2, 6, 8, 4, 4, 12, 14 },
         { 4 }
     });
+    checkLabelCount(rq, FIELD_WITH_OFFSETS, new int[]{ 2, 2, 2, 2, 0 });
 
   }
 
@@ -392,12 +521,55 @@ public class TestMatchesIterator extends LuceneTestCase {
   //  0         1         2         3         4         5         6         7
   // "a phrase sentence with many phrase sentence iterations of a phrase sentence",
 
+  public void testSloppyPhraseQueryWithRepeats() throws IOException {
+    Term p = new Term(FIELD_WITH_OFFSETS, "phrase");
+    Term s = new Term(FIELD_WITH_OFFSETS, "sentence");
+    PhraseQuery pq = new PhraseQuery(10, FIELD_WITH_OFFSETS, "phrase", "sentence", "sentence");
+    checkMatches(pq, FIELD_WITH_OFFSETS, new int[][]{
+        { 0 }, { 1 }, { 2 }, { 3 },
+        { 4, 1, 6, 2, 43, 2, 11, 9, 75, 5, 11, 28, 75, 6, 11, 35, 75 }
+    });
+    checkLabelCount(pq, FIELD_WITH_OFFSETS, new int[]{ 0, 0, 0, 0, 1 });
+    checkTermMatches(pq, FIELD_WITH_OFFSETS, new TermMatch[][][]{
+        {}, {}, {}, {},
+        { {
+            new TermMatch(1, 2, 8),
+            new TermMatch(2, 9, 17),
+            new TermMatch(6, 35, 43)
+          }, {
+            new TermMatch(5, 28, 34),
+            new TermMatch(2, 9, 17),
+            new TermMatch(11, 67, 75)
+        }, {
+            new TermMatch(5, 28, 34),
+            new TermMatch(6, 35, 43),
+            new TermMatch(11, 67, 75)
+        }, {
+            new TermMatch(10, 60, 66),
+            new TermMatch(6, 35, 43),
+            new TermMatch(11, 67, 75)
+        } }
+    });
+  }
+
   public void testSloppyPhraseQuery() throws IOException {
+    Term a = new Term(FIELD_WITH_OFFSETS, "a");
+    Term s = new Term(FIELD_WITH_OFFSETS, "sentence");
     PhraseQuery pq = new PhraseQuery(4, FIELD_WITH_OFFSETS, "a", "sentence");
     checkMatches(pq, FIELD_WITH_OFFSETS, new int[][]{
         { 0 }, { 1 }, { 2 }, { 3 },
         { 4, 0, 2, 0, 17, 6, 9, 35, 59, 9, 11, 58, 75 }
     });
+    checkTermMatches(pq, FIELD_WITH_OFFSETS, new TermMatch[][][]{
+        {}, {}, {}, {},
+        { {
+          new TermMatch(0, 0, 1), new TermMatch(2, 9, 17)
+        }, {
+          new TermMatch(9, 58, 59), new TermMatch(6, 35, 43)
+        }, {
+          new TermMatch(9, 58, 59), new TermMatch(11, 67, 75)
+        } }
+    });
   }
 
   public void testExactPhraseQuery() throws IOException {
@@ -407,28 +579,57 @@ public class TestMatchesIterator extends LuceneTestCase {
         { 4, 1, 2, 2, 17, 5, 6, 28, 43, 10, 11, 60, 75 }
     });
 
+    Term a = new Term(FIELD_WITH_OFFSETS, "a");
+    Term s = new Term(FIELD_WITH_OFFSETS, "sentence");
     PhraseQuery pq2 = new PhraseQuery.Builder()
-        .add(new Term(FIELD_WITH_OFFSETS, "a"))
-        .add(new Term(FIELD_WITH_OFFSETS, "sentence"), 2)
+        .add(a)
+        .add(s, 2)
         .build();
     checkMatches(pq2, FIELD_WITH_OFFSETS, new int[][]{
         { 0 }, { 1 }, { 2 }, { 3 },
         { 4, 0, 2, 0, 17, 9, 11, 58, 75 }
     });
+    checkTermMatches(pq2, FIELD_WITH_OFFSETS, new TermMatch[][][]{
+        {}, {}, {}, {},
+        { {
+          new TermMatch(0, 0, 1), new TermMatch(2, 9, 17)
+        }, {
+          new TermMatch(9, 58, 59), new TermMatch(11, 67, 75)
+        } }
+    });
   }
 
   //  0         1         2         3         4         5         6         7
   // "a phrase sentence with many phrase sentence iterations of a phrase sentence",
 
   public void testSloppyMultiPhraseQuery() throws IOException {
+    Term p = new Term(FIELD_WITH_OFFSETS, "phrase");
+    Term s = new Term(FIELD_WITH_OFFSETS, "sentence");
+    Term i = new Term(FIELD_WITH_OFFSETS, "iterations");
     MultiPhraseQuery mpq = new MultiPhraseQuery.Builder()
-        .add(new Term(FIELD_WITH_OFFSETS, "phrase"))
-        .add(new Term[]{ new Term(FIELD_WITH_OFFSETS, "sentence"), new Term(FIELD_WITH_OFFSETS,
"iterations") })
+        .add(p)
+        .add(new Term[]{ s, i })
         .setSlop(4)
         .build();
     checkMatches(mpq, FIELD_WITH_OFFSETS, new int[][]{
         { 0 }, { 1 }, { 2 }, { 3 },
-        { 4, 1, 2, 2, 17, 5, 7, 28, 54, 5, 7, 28, 54, 10, 11, 60, 75 }
+        { 4, 1, 2, 2, 17, 5, 6, 28, 43, 5, 7, 28, 54, 10, 11, 60, 75 }
+    });
+    checkTermMatches(mpq, FIELD_WITH_OFFSETS, new TermMatch[][][]{
+        {}, {}, {}, {},
+        { {
+            new TermMatch(1, 2, 8),
+            new TermMatch(2, 9, 17)
+          }, {
+            new TermMatch(5, 28, 34),
+            new TermMatch(6, 35, 43)
+          }, {
+            new TermMatch(5, 28, 34),
+            new TermMatch(7, 44, 54)
+          }, {
+            new TermMatch(10, 60, 66),
+            new TermMatch(11, 67, 75)
+        } }
     });
   }
 
@@ -452,4 +653,35 @@ public class TestMatchesIterator extends LuceneTestCase {
     });
   }
 
+  //  0         1         2         3         4         5         6         7
+  // "a phrase sentence with many phrase sentence iterations of a phrase sentence",
+
+  public void testSpanQuery() throws IOException {
+    SpanQuery subq = SpanNearQuery.newOrderedNearQuery(FIELD_WITH_OFFSETS)
+        .addClause(new SpanTermQuery(new Term(FIELD_WITH_OFFSETS, "with")))
+        .addClause(new SpanTermQuery(new Term(FIELD_WITH_OFFSETS, "many")))
+        .build();
+    Query q = SpanNearQuery.newOrderedNearQuery(FIELD_WITH_OFFSETS)
+        .addClause(new SpanTermQuery(new Term(FIELD_WITH_OFFSETS, "sentence")))
+        .addClause(new SpanOrQuery(subq, new SpanTermQuery(new Term(FIELD_WITH_OFFSETS, "iterations"))))
+        .build();
+    checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
+        { 0 }, { 1 }, { 2 }, { 3 },
+        { 4, 2, 4, 9, 27, 6, 7, 35, 54 }
+    });
+    checkLabelCount(q, FIELD_WITH_OFFSETS, new int[]{ 0, 0, 0, 0, 1 });
+    checkTermMatches(q, FIELD_WITH_OFFSETS, new TermMatch[][][]{
+        {}, {}, {}, {},
+        {
+            {
+                new TermMatch(2, 9, 17),
+                new TermMatch(3, 18, 22),
+                new TermMatch(4, 23, 27)
+            }, {
+              new TermMatch(6, 35, 43), new TermMatch(7, 44, 54)
+        }
+        }
+    });
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a8839b7e/lucene/test-framework/src/java/org/apache/lucene/search/AssertingMatchesIterator.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingMatchesIterator.java
b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingMatchesIterator.java
index 4f06512..5f7e307 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingMatchesIterator.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingMatchesIterator.java
@@ -67,4 +67,15 @@ class AssertingMatchesIterator implements MatchesIterator {
     return in.endOffset();
   }
 
+  @Override
+  public MatchesIterator getSubMatches() throws IOException {
+    assert state == State.ITERATING : state;
+    return in.getSubMatches();
+  }
+
+  @Override
+  public Object label() {
+    assert state == State.ITERATING : state;
+    return in.label();
+  }
 }


Mime
View raw message