lucene-java-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Paul Taylor <paul_t...@fastmail.fm>
Subject Converting from TermAttribute to CharTermAttribute
Date Thu, 15 Sep 2011 09:39:08 GMT
Have updated from Lucene 3.0 to lucene 3.1 an dnow getting various 
deprecations that Im trying to move

I change this filter class and now my test are failing, anybody able to 
see what Im missing please

Paul

package org.musicbrainz.search.analysis;

import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;

import java.io.IOException;

/**
  * A filter that replaces accented characters by their unaccented 
equivalents.
  */
public class AccentFilter extends TokenFilter {

     private char[] output = new char[256];
     private int outputPos;

     private TermAttribute termAttr;

     public AccentFilter(TokenStream input) {
         super(input);
         termAttr = (TermAttribute) addAttribute(TermAttribute.class);
     }

     @Override
     public boolean incrementToken() throws IOException {
         if (!input.incrementToken())
             return false;

         final char[] buffer = termAttr.termBuffer();
         final int length    = termAttr.termLength();
         if (removeAccents(buffer, length))  {
             termAttr.setTermBuffer(output, 0, outputPos);
         }
         return true;
     }

     protected final boolean removeAccents(char[] input, int length) {
         final int maxSizeNeeded = 2 * length;
         int size = output.length;
         while (size < maxSizeNeeded)
             size *= 2;

         int inputPos = 0;
         outputPos = 0;

         for (int i = 0; i < length; i++) {
             int c = (int) input[i];

             int block = UnaccentIndexes.indexes[c >> 
UnaccentData.BLOCK_SHIFT];
             int position = c & UnaccentData.BLOCK_MASK;

             short[] positions = UnaccentPositions.positions[block];
             int unacPosition = positions[position];
             int unacLength = positions[position + 1] - unacPosition;

             if (unacLength > 0) {
                 // allocate a new char array, if necessary
                 if (size != output.length)
                     output = new char[size];
                 // copy front of the input
                 if (inputPos < i) {
                     System.arraycopy(input, inputPos, output, 
outputPos, i - inputPos);
                     outputPos += i - inputPos;
                 }
                 // copy unaccented data
                 System.arraycopy(UnaccentData.data[block], unacPosition,
                         output, outputPos, unacLength);
                 outputPos += unacLength;
                 inputPos = i + 1;
             }
         }

         // no conversion needed...
         if (inputPos == 0)
             return false;

         // copy rest of the input
         int copyLength = length - inputPos;
         if (copyLength > 0) {
             System.arraycopy(input, inputPos, output, outputPos, 
copyLength);
             outputPos += copyLength;
         }

         return true;
     }

}

to

package org.musicbrainz.search.analysis;

import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;

import java.io.IOException;

/**
  * A filter that replaces accented characters by their unaccented 
equivalents.
  */
public class AccentFilter extends TokenFilter {

     private char[] output = new char[256];
     private int outputPos;

     private CharTermAttribute termAttr;

     public AccentFilter(TokenStream input) {
         super(input);
         termAttr = (CharTermAttribute) 
addAttribute(CharTermAttribute.class);
     }

     @Override
     public boolean incrementToken() throws IOException {
         if (!input.incrementToken())
             return false;

         final char[] buffer = termAttr.buffer();
         final int length    = termAttr.length();
         if (removeAccents(buffer, length))  {
             termAttr.resizeBuffer(outputPos);
         }
         return true;
     }

     protected final boolean removeAccents(char[] input, int length) {
         final int maxSizeNeeded = 2 * length;
         int size = output.length;
         while (size < maxSizeNeeded)
             size *= 2;

         int inputPos = 0;
         outputPos = 0;

         for (int i = 0; i < length; i++) {
             int c = (int) input[i];

             int block = UnaccentIndexes.indexes[c >> 
UnaccentData.BLOCK_SHIFT];
             int position = c & UnaccentData.BLOCK_MASK;

             short[] positions = UnaccentPositions.positions[block];
             int unacPosition = positions[position];
             int unacLength = positions[position + 1] - unacPosition;

             if (unacLength > 0) {
                 // allocate a new char array, if necessary
                 if (size != output.length)
                     output = new char[size];
                 // copy front of the input
                 if (inputPos < i) {
                     System.arraycopy(input, inputPos, output, 
outputPos, i - inputPos);
                     outputPos += i - inputPos;
                 }
                 // copy unaccented data
                 System.arraycopy(UnaccentData.data[block], unacPosition,
                         output, outputPos, unacLength);
                 outputPos += unacLength;
                 inputPos = i + 1;
             }
         }

         // no conversion needed...
         if (inputPos == 0)
             return false;

         // copy rest of the input
         int copyLength = length - inputPos;
         if (copyLength > 0) {
             System.arraycopy(input, inputPos, output, outputPos, 
copyLength);
             outputPos += copyLength;
         }

         return true;
     }

}




---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-user-help@lucene.apache.org


Mime
View raw message