lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From romseyg...@apache.org
Subject [1/2] lucene-solr:master: LUCENE-8376, LUCENE-8371: ConditionalTokenFilter fixes
Date Mon, 02 Jul 2018 14:38:35 GMT
Repository: lucene-solr
Updated Branches:
  refs/heads/branch_7x 9a395f83c -> 3a7ca355f
  refs/heads/master ea4043b95 -> f835d2499


LUCENE-8376, LUCENE-8371: ConditionalTokenFilter fixes


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/f835d249
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/f835d249
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/f835d249

Branch: refs/heads/master
Commit: f835d2499778972ad901a6be11ecf6ef308c0bb0
Parents: ea4043b
Author: Alan Woodward <romseygeek@apache.org>
Authored: Mon Jul 2 15:36:12 2018 +0100
Committer: Alan Woodward <romseygeek@apache.org>
Committed: Mon Jul 2 15:36:12 2018 +0100

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  4 ++++
 .../miscellaneous/ConditionalTokenFilter.java   |  4 ++--
 .../analysis/shingle/FixedShingleFilter.java    |  1 +
 .../lucene/analysis/core/TestRandomChains.java  |  4 ++++
 .../TestConditionalTokenFilter.java             | 22 +++++++++++---------
 5 files changed, 23 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f835d249/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 920948d..4d2769f 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -137,6 +137,10 @@ Bug Fixes:
   TestLucene{54,70}DocValuesFormat.testSortedSetVariableLengthBigVsStoredFields()
   failures (Erick Erickson)
 
+* LUCENE-8376, LUCENE-8371: ConditionalTokenFilter.end() would not propagate correctly
+  if the last token in the stream was subsequently dropped; FixedShingleFilter did
+  not set position increment in end() (Alan Woodward)
+
 Changes in Runtime Behavior:
 
 * LUCENE-7976: TieredMergePolicy now respects maxSegmentSizeMB by default when executing

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f835d249/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConditionalTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConditionalTokenFilter.java
b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConditionalTokenFilter.java
index b3ef2ab..dca4a3e 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConditionalTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConditionalTokenFilter.java
@@ -159,6 +159,7 @@ public abstract class ConditionalTokenFilter extends TokenFilter {
 
   @Override
   public final boolean incrementToken() throws IOException {
+    lastTokenFiltered = false;
     while (true) {
       if (state == TokenState.READING) {
         if (bufferedState != null) {
@@ -192,16 +193,15 @@ public abstract class ConditionalTokenFilter extends TokenFilter {
             adjustPosition = false;
           }
           else {
-            lastTokenFiltered = false;
             state = TokenState.READING;
             return endDelegating();
           }
           return true;
         }
-        lastTokenFiltered = false;
         return true;
       }
       if (state == TokenState.DELEGATING) {
+        lastTokenFiltered = true;
         if (delegate.incrementToken()) {
           return true;
         }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f835d249/lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/FixedShingleFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/FixedShingleFilter.java
b/lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/FixedShingleFilter.java
index fd89e6b..cf82363 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/FixedShingleFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/FixedShingleFilter.java
@@ -139,6 +139,7 @@ public final class FixedShingleFilter extends TokenFilter {
     }
     clearAttributes();
     this.offsetAtt.setOffset(0, endToken.endOffset());
+    this.incAtt.setPositionIncrement(endToken.posInc());
   }
 
   private void finishInnerStream() throws IOException {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f835d249/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
index 8ab57d7..557a69e 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
@@ -132,6 +132,10 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
     // FlattenGraphFilter changes the output graph entirely, so wrapping it in a condition
     // can break position lengths
     avoidConditionals.add(FlattenGraphFilter.class);
+    // LimitToken*Filters don't set end offsets correctly
+    avoidConditionals.add(LimitTokenOffsetFilter.class);
+    avoidConditionals.add(LimitTokenCountFilter.class);
+    avoidConditionals.add(LimitTokenPositionFilter.class);
   }
 
   private static final Map<Constructor<?>,Predicate<Object[]>> brokenConstructors
= new HashMap<>();

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f835d249/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestConditionalTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestConditionalTokenFilter.java
b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestConditionalTokenFilter.java
index 00a2df1..7b07bf9 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestConditionalTokenFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestConditionalTokenFilter.java
@@ -173,29 +173,31 @@ public class TestConditionalTokenFilter extends BaseTokenStreamTestCase
{
   }
 
   public void testEndPropagation() throws IOException {
-    CannedTokenStream cts1 = new CannedTokenStream(0, 20,
+
+    CannedTokenStream cts2 = new CannedTokenStream(0, 20,
         new Token("alice", 0, 5), new Token("bob", 6, 8)
     );
-    TokenStream ts1 = new ConditionalTokenFilter(cts1, EndTrimmingFilter::new) {
+    TokenStream ts2 = new ConditionalTokenFilter(cts2, EndTrimmingFilter::new) {
       @Override
       protected boolean shouldFilter() throws IOException {
-        return false;
+        return true;
       }
     };
-    assertTokenStreamContents(ts1, new String[]{ "alice", "bob" },
-        null, null, null, null, null, 20);
+    assertTokenStreamContents(ts2, new String[]{ "alice", "bob" },
+        null, null, null, null, null, 18);
 
-    CannedTokenStream cts2 = new CannedTokenStream(0, 20,
+    CannedTokenStream cts1 = new CannedTokenStream(0, 20,
         new Token("alice", 0, 5), new Token("bob", 6, 8)
     );
-    TokenStream ts2 = new ConditionalTokenFilter(cts2, EndTrimmingFilter::new) {
+    TokenStream ts1 = new ConditionalTokenFilter(cts1, EndTrimmingFilter::new) {
       @Override
       protected boolean shouldFilter() throws IOException {
-        return true;
+        return false;
       }
     };
-    assertTokenStreamContents(ts2, new String[]{ "alice", "bob" },
-        null, null, null, null, null, 18);
+    assertTokenStreamContents(ts1, new String[]{ "alice", "bob" },
+        null, null, null, null, null, 20);
+
   }
 
   public void testWrapGraphs() throws Exception {


Mime
View raw message