lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dsmi...@apache.org
Subject lucene-solr:master: LUCENE-7431: SpanNotQuery should support negative pre/post distance for overlap
Date Tue, 08 Nov 2016 17:45:51 GMT
Repository: lucene-solr
Updated Branches:
  refs/heads/master cfcf4081f -> 750da7c5f


LUCENE-7431: SpanNotQuery should support negative pre/post distance for overlap


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/750da7c5
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/750da7c5
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/750da7c5

Branch: refs/heads/master
Commit: 750da7c5f7af74f2d9db36265639d7ae9dc9e4eb
Parents: cfcf408
Author: David Smiley <dsmiley@apache.org>
Authored: Tue Nov 8 12:45:23 2016 -0500
Committer: David Smiley <dsmiley@apache.org>
Committed: Tue Nov 8 12:45:23 2016 -0500

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  4 +
 .../lucene/search/spans/SpanNotQuery.java       | 14 ++--
 .../apache/lucene/search/spans/TestBasics.java  | 30 +++++++-
 .../apache/lucene/search/spans/TestSpans.java   | 79 +++++++++++---------
 4 files changed, 84 insertions(+), 43 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/750da7c5/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 805fc7e..0ccb5ee 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -63,6 +63,10 @@ Improvements
   PhraseQuery or MultiPhraseQuery when the word automaton is simple
   (Mike McCandless)
 
+* LUCENE-7431: Allow a certain amount of overlap to be specified between the include
+  and exclude arguments of SpanNotQuery via negative pre and/or post arguments.
+  (Marc Morissette via David Smiley)
+
 ======================= Lucene 6.3.0 =======================
 
 API Changes

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/750da7c5/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java
index 05d3f8e..00bcc4c 100644
--- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java
@@ -49,19 +49,23 @@ public final class SpanNotQuery extends SpanQuery {
 
   /** Construct a SpanNotQuery matching spans from <code>include</code> which
    * have no overlap with spans from <code>exclude</code> within
-   * <code>dist</code> tokens of <code>include</code>. */
+   * <code>dist</code> tokens of <code>include</code>. Inversely,
a negative
+   * <code>dist</code> value may be used to specify a certain amount of allowable
+   * overlap. */
   public SpanNotQuery(SpanQuery include, SpanQuery exclude, int dist) {
      this(include, exclude, dist, dist);
   }
 
   /** Construct a SpanNotQuery matching spans from <code>include</code> which
    * have no overlap with spans from <code>exclude</code> within
-   * <code>pre</code> tokens before or <code>post</code> tokens of
<code>include</code>. */
+   * <code>pre</code> tokens before or <code>post</code> tokens of
+   * <code>include</code>. Inversely, negative values for <code>pre</code>
and/or
+   * <code>post</code> allow a certain amount of overlap to occur. */
   public SpanNotQuery(SpanQuery include, SpanQuery exclude, int pre, int post) {
     this.include = Objects.requireNonNull(include);
     this.exclude = Objects.requireNonNull(exclude);
-    this.pre = (pre >=0) ? pre : 0;
-    this.post = (post >= 0) ? post : 0;
+    this.pre = pre;
+    this.post = post;
 
     if (include.getField() != null && exclude.getField() != null && !include.getField().equals(exclude.getField()))
       throw new IllegalArgumentException("Clauses must have same field.");
@@ -226,4 +230,4 @@ public final class SpanNotQuery extends SpanQuery {
     return h;
   }
 
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/750da7c5/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java
index b18a38d..d699719 100644
--- a/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java
+++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java
@@ -274,20 +274,42 @@ public class TestBasics extends LuceneTestCase {
     assertTrue(searcher.explain(query, 849).getValue() > 0.0f);
   }
 
-  public void testSpanNotWindowNeg() throws Exception {
+  public void testSpanNotWindowNegPost() throws Exception {
     //test handling of invalid window < 0
     SpanQuery near = spanNearOrderedQuery("field", 4, "eight", "one");
     SpanQuery or = spanOrQuery("field", "forty");
-    SpanQuery query = spanNotQuery(near, or);
-
+    SpanQuery query = spanNotQuery(near, or, 0, -1);
     checkHits(query, new int[]
        {801, 821, 831, 851, 861, 871, 881, 891,
                1801, 1821, 1831, 1851, 1861, 1871, 1881, 1891});
 
+    query = spanNotQuery(near, or, 0, -2);
+    checkHits(query, new int[]
+       {801, 821, 831, 841, 851, 861, 871, 881, 891,
+               1801, 1821, 1831, 1841, 1851, 1861, 1871, 1881, 1891});
+
     assertTrue(searcher.explain(query, 801).getValue() > 0.0f);
     assertTrue(searcher.explain(query, 891).getValue() > 0.0f);
   }
-  
+
+  public void testSpanNotWindowNegPre() throws Exception {
+    //test handling of invalid window < 0
+    SpanQuery near = spanNearOrderedQuery("field", 4, "eight", "one");
+    SpanQuery or = spanOrQuery("field", "forty");
+    SpanQuery query = spanNotQuery(near, or, -2, 0);
+    checkHits(query, new int[]
+        {801, 821, 831, 851, 861, 871, 881, 891,
+            1801, 1821, 1831, 1851, 1861, 1871, 1881, 1891});
+
+    query = spanNotQuery(near, or, -3, 0);
+    checkHits(query, new int[]
+        {801, 821, 831, 841, 851, 861, 871, 881, 891,
+            1801, 1821, 1831, 1841, 1851, 1861, 1871, 1881, 1891});
+
+    assertTrue(searcher.explain(query, 801).getValue() > 0.0f);
+    assertTrue(searcher.explain(query, 891).getValue() > 0.0f);
+  }
+
   public void testSpanNotWindowDoubleExcludesBefore() throws Exception {
     //test hitting two excludes before an include
     SpanQuery near = spanNearOrderedQuery("field", 2, "forty", "two");

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/750da7c5/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java
index 2d5e05c..2b5b919 100644
--- a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java
+++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java
@@ -99,7 +99,6 @@ public class TestSpans extends LuceneTestCase {
     "s2 s1 s1 xx xx s2 xx s2 xx s1 xx xx xx xx xx s2 xx",
     "r1 s11",
     "r1 s21"
-
   };
   
   private void checkHits(Query query, int[] results) throws IOException {
@@ -406,42 +405,54 @@ public class TestSpans extends LuceneTestCase {
 
 
   }
-  
-  public void testSpanNots() throws Throwable{
-     assertEquals("SpanNotIncludeExcludeSame1", 0, spanCount("s2", "s2", 0, 0), 0);
-     assertEquals("SpanNotIncludeExcludeSame2", 0, spanCount("s2", "s2", 10, 10), 0);
-     
-     //focus on behind
-     assertEquals("SpanNotS2NotS1_6_0", 1, spanCount("s2", "s1", 6, 0));
-     assertEquals("SpanNotS2NotS1_5_0", 2, spanCount("s2", "s1", 5, 0));
-     assertEquals("SpanNotS2NotS1_3_0", 3, spanCount("s2", "s1", 3, 0));
-     assertEquals("SpanNotS2NotS1_2_0", 4, spanCount("s2", "s1", 2, 0));
-     assertEquals("SpanNotS2NotS1_0_0", 4, spanCount("s2", "s1", 0, 0));
-     
-     //focus on both
-     assertEquals("SpanNotS2NotS1_3_1", 2, spanCount("s2", "s1", 3, 1));
-     assertEquals("SpanNotS2NotS1_2_1", 3, spanCount("s2", "s1", 2, 1));
-     assertEquals("SpanNotS2NotS1_1_1", 3, spanCount("s2", "s1", 1, 1));
-     assertEquals("SpanNotS2NotS1_10_10", 0, spanCount("s2", "s1", 10, 10));
-     
-     //focus on ahead
-     assertEquals("SpanNotS1NotS2_10_10", 0, spanCount("s1", "s2", 10, 10));  
-     assertEquals("SpanNotS1NotS2_0_1", 3, spanCount("s1", "s2", 0, 1));  
-     assertEquals("SpanNotS1NotS2_0_2", 3, spanCount("s1", "s2", 0, 2));  
-     assertEquals("SpanNotS1NotS2_0_3", 2, spanCount("s1", "s2", 0, 3));  
-     assertEquals("SpanNotS1NotS2_0_4", 1, spanCount("s1", "s2", 0, 4));
-     assertEquals("SpanNotS1NotS2_0_8", 0, spanCount("s1", "s2", 0, 8));
-     
-     //exclude doesn't exist
-     assertEquals("SpanNotS1NotS3_8_8", 3, spanCount("s1", "s3", 8, 8));
 
-     //include doesn't exist
-     assertEquals("SpanNotS3NotS1_8_8", 0, spanCount("s3", "s1", 8, 8));
+  public void testSpanNots() throws Throwable {
+
+    assertEquals("SpanNotIncludeExcludeSame1", 0, spanCount("s2", 0, "s2", 0, 0), 0);
+    assertEquals("SpanNotIncludeExcludeSame2", 0, spanCount("s2", 0, "s2", 10, 10), 0);
+
+    //focus on behind
+    assertEquals("SpanNotS2NotS1_6_0", 1, spanCount("s2", 0, "s1", 6, 0));
+    assertEquals("SpanNotS2NotS1_5_0", 2, spanCount("s2", 0, "s1", 5, 0));
+    assertEquals("SpanNotS2NotS1_3_0", 3, spanCount("s2", 0, "s1", 3, 0));
+    assertEquals("SpanNotS2NotS1_2_0", 4, spanCount("s2", 0, "s1", 2, 0));
+    assertEquals("SpanNotS2NotS1_0_0", 4, spanCount("s2", 0, "s1", 0, 0));
 
+    //focus on both
+    assertEquals("SpanNotS2NotS1_3_1", 2, spanCount("s2", 0, "s1", 3, 1));
+    assertEquals("SpanNotS2NotS1_2_1", 3, spanCount("s2", 0, "s1", 2, 1));
+    assertEquals("SpanNotS2NotS1_1_1", 3, spanCount("s2", 0, "s1", 1, 1));
+    assertEquals("SpanNotS2NotS1_10_10", 0, spanCount("s2", 0, "s1", 10, 10));
+
+    //focus on ahead
+    assertEquals("SpanNotS1NotS2_10_10", 0, spanCount("s1", 0, "s2", 10, 10));
+    assertEquals("SpanNotS1NotS2_0_1", 3, spanCount("s1", 0, "s2", 0, 1));
+    assertEquals("SpanNotS1NotS2_0_2", 3, spanCount("s1", 0, "s2", 0, 2));
+    assertEquals("SpanNotS1NotS2_0_3", 2, spanCount("s1", 0, "s2", 0, 3));
+    assertEquals("SpanNotS1NotS2_0_4", 1, spanCount("s1", 0, "s2", 0, 4));
+    assertEquals("SpanNotS1NotS2_0_8", 0, spanCount("s1", 0, "s2", 0, 8));
+
+    //exclude doesn't exist
+    assertEquals("SpanNotS1NotS3_8_8", 3, spanCount("s1", 0, "s3", 8, 8));
+
+    //include doesn't exist
+    assertEquals("SpanNotS3NotS1_8_8", 0, spanCount("s3", 0, "s1", 8, 8));
+
+    // Negative values
+    assertEquals("SpanNotS2S1NotXXNeg_0_0", 1, spanCount("s2 s1", 10, "xx", 0, 0));
+    assertEquals("SpanNotS2S1NotXXNeg_1_1", 1, spanCount("s2 s1", 10, "xx", -1, -1));
+    assertEquals("SpanNotS2S1NotXXNeg_0_2", 2, spanCount("s2 s1", 10, "xx",  0, -2));
+    assertEquals("SpanNotS2S1NotXXNeg_1_2", 2, spanCount("s2 s1", 10, "xx", -1, -2));
+    assertEquals("SpanNotS2S1NotXXNeg_2_1", 2, spanCount("s2 s1", 10, "xx", -2, -1));
+    assertEquals("SpanNotS2S1NotXXNeg_3_1", 2, spanCount("s2 s1", 10, "xx", -3, -1));
+    assertEquals("SpanNotS2S1NotXXNeg_1_3", 2, spanCount("s2 s1", 10, "xx", -1, -3));
+    assertEquals("SpanNotS2S1NotXXNeg_2_2", 3, spanCount("s2 s1", 10, "xx", -2, -2));
   }
-  
-  private int spanCount(String include, String exclude, int pre, int post) throws IOException{
-     SpanQuery iq = spanTermQuery(field, include);
+
+
+  private int spanCount(String include, int slop, String exclude, int pre, int post) throws
IOException{
+     String[] includeTerms = include.split(" +");
+     SpanQuery iq = includeTerms.length == 1 ? spanTermQuery(field, include) : spanNearOrderedQuery(field,
slop, includeTerms);
      SpanQuery eq = spanTermQuery(field, exclude);
      SpanQuery snq = spanNotQuery(iq, eq, pre, post);
      Spans spans = snq.createWeight(searcher, false, 1f).getSpans(searcher.getIndexReader().leaves().get(0),
SpanWeight.Postings.POSITIONS);


Mime
View raw message