lucene-java-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From AHMET ARSLAN <iori...@yahoo.com>
Subject Re: Lucene Analyzer that can handle C++ vs C#
Date Thu, 24 Dec 2009 20:13:16 GMT

> public class CustomFilter extends TokenFilter
> {
>     protected CustomFilter(TokenStream
> tokenStream)
>     {
>         super(tokenStream);
>     }
>     @Override
>     public Token next(final Token reusableToken)
> throws IOException
>     {
>         Token nextToken =
> input.next(reusableToken);
>         
>         if(nextToken != null)
>         {
>            
> nextToken.setTermText(nextToken.termText().replaceAll(":|,|\\(|\\)|“|~|;|&|\\.",""));
>         }
>         return nextToken;
>     }
> }

Here is the the one that uses new token stream api: 

public final class CustomFilter extends TokenFilter {

   private final TermAttribute termAtt;

    public CustomFilter(TokenStream in, FastZemberek zemberek) {
        super(in);
        termAtt = (TermAttribute) addAttribute(TermAttribute.class);
    }
    
    public final boolean incrementToken() throws IOException {
        if (input.incrementToken()) {
            String term = termAtt.term();
            String s = term.replaceAll(":|,|\\(|\\)|“|~|;|&|\\.","");
            if (s != null && !s.equals(term))
                termAtt.setTermBuffer(s);
            return true;
        } else {
            return false;
        }
    }
}


      

---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-user-help@lucene.apache.org


Mime
View raw message