lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From is...@apache.org
Subject [02/14] lucene-solr:jira/solr-5944: copy all attributes including payload to new tokens
Date Mon, 16 Jan 2017 21:27:27 GMT
copy all attributes including payload to new tokens


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/6570e6ec
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/6570e6ec
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/6570e6ec

Branch: refs/heads/jira/solr-5944
Commit: 6570e6ecc2b14a28da9873948083791ba47145d0
Parents: 61e4528
Author: Nathan Gass <gass@search.ch>
Authored: Mon Jan 9 15:00:21 2017 +0100
Committer: Nathan Gass <gass@search.ch>
Committed: Fri Jan 13 12:14:27 2017 +0100

----------------------------------------------------------------------
 .../lucene/analysis/ngram/EdgeNGramTokenFilter.java | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6570e6ec/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
index 827e26f..303b7e320 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
@@ -22,9 +22,8 @@ import java.io.IOException;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
+import org.apache.lucene.util.AttributeSource;
 
 /**
  * Tokenizes the given token into n-grams of given size(s).
@@ -43,15 +42,11 @@ public final class EdgeNGramTokenFilter extends TokenFilter {
   private int curTermLength;
   private int curCodePointCount;
   private int curGramSize;
-  private int tokStart;
-  private int tokEnd; // only used if the length changed before this filter
   private int savePosIncr;
-  private int savePosLen;
+  private AttributeSource attributes;
   
   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
   private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
-  private final PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class);
 
   /**
    * Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given range
@@ -86,17 +81,15 @@ public final class EdgeNGramTokenFilter extends TokenFilter {
           curTermLength = termAtt.length();
           curCodePointCount = Character.codePointCount(termAtt, 0, termAtt.length());
           curGramSize = minGram;
-          tokStart = offsetAtt.startOffset();
-          tokEnd = offsetAtt.endOffset();
+          attributes = input.cloneAttributes();
           savePosIncr += posIncrAtt.getPositionIncrement();
-          savePosLen = posLenAtt.getPositionLength();
         }
       }
       if (curGramSize <= maxGram) {         // if we have hit the end of our n-gram size
range, quit
         if (curGramSize <= curCodePointCount) { // if the remaining input is too short,
we can't generate any n-grams
           // grab gramSize chars from front or back
           clearAttributes();
-          offsetAtt.setOffset(tokStart, tokEnd);
+          attributes.copyTo(this);
           // first ngram gets increment, others don't
           if (curGramSize == minGram) {
             posIncrAtt.setPositionIncrement(savePosIncr);
@@ -104,7 +97,6 @@ public final class EdgeNGramTokenFilter extends TokenFilter {
           } else {
             posIncrAtt.setPositionIncrement(0);
           }
-          posLenAtt.setPositionLength(savePosLen);
           final int charLength = Character.offsetByCodePoints(curTermBuffer, 0, curTermLength,
0, curGramSize);
           termAtt.copyBuffer(curTermBuffer, 0, charLength);
           curGramSize++;


Mime
View raw message