lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject lucene-solr:branch_6x: TokenStreamToAutomaton failed to handle certain holes correctly
Date Sun, 08 Jan 2017 11:27:08 GMT
Repository: lucene-solr
Updated Branches:
  refs/heads/branch_6x 373826a69 -> 2336152fb


TokenStreamToAutomaton failed to handle certain holes correctly


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/2336152f
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/2336152f
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/2336152f

Branch: refs/heads/branch_6x
Commit: 2336152fb4acf20bfc4936ad5e2cddde8efebaf1
Parents: 373826a
Author: Mike McCandless <mikemccand@apache.org>
Authored: Sun Jan 8 06:26:08 2017 -0500
Committer: Mike McCandless <mikemccand@apache.org>
Committed: Sun Jan 8 06:26:27 2017 -0500

----------------------------------------------------------------------
 .../apache/lucene/analysis/TokenStreamToAutomaton.java  | 11 ++++++++++-
 .../org/apache/lucene/analysis/TestGraphTokenizers.java | 12 ++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2336152f/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java b/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java
index 071fa4a..64bac66 100644
--- a/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java
+++ b/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java
@@ -113,6 +113,7 @@ public class TokenStreamToAutomaton {
     final RollingBuffer<Position> positions = new Positions();
 
     int pos = -1;
+    int freedPos = 0;
     Position posData = null;
     int maxOffset = 0;
     while (in.incrementToken()) {
@@ -150,7 +151,15 @@ public class TokenStreamToAutomaton {
             addHoles(builder, positions, pos);
           }
         }
-        positions.freeBefore(pos);
+        while (freedPos <= pos) {
+          Position freePosData = positions.get(freedPos);
+          // don't free this position yet if we may still need to fill holes over it:
+          if (freePosData.arriving == -1 || freePosData.leaving == -1) {
+            break;
+          }
+          positions.freeBefore(freedPos);
+          freedPos++;
+        }
       }
 
       final int endPos = pos + posLengthAtt.getPositionLength();

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2336152f/lucene/test-framework/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java
b/lucene/test-framework/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java
index 78fb127..8899dd1 100644
--- a/lucene/test-framework/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java
+++ b/lucene/test-framework/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java
@@ -585,4 +585,16 @@ public class TestGraphTokenizers extends BaseTokenStreamTestCase {
       Operations.determinize(Operations.removeDeadStates(expected), DEFAULT_MAX_DETERMINIZED_STATES),
       Operations.determinize(Operations.removeDeadStates(actual), DEFAULT_MAX_DETERMINIZED_STATES)));
   }
+
+  public void testTokenStreamGraphWithHoles() throws Exception {
+    final TokenStream ts = new CannedTokenStream(
+      new Token[] {
+        token("abc", 1, 1),
+        token("xyz", 1, 8),
+        token("def", 1, 1),
+        token("ghi", 1, 1),
+      });
+    assertSameLanguage(Operations.union(join(s2a("abc"), SEP_A, s2a("xyz")),
+                                        join(s2a("abc"), SEP_A, HOLE_A, SEP_A, s2a("def"),
SEP_A, s2a("ghi"))), ts);
+  }
 }


Mime
View raw message