lucene-java-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Paul Taylor <paul_t...@fastmail.fm>
Subject Re: Not getting matches for analyzers using CharMappingFilter with Lucene 4.1
Date Mon, 25 Feb 2013 10:24:48 GMT
On 20/02/2013 11:28, Paul Taylor wrote:
> Just updating codebase from Lucene 3.6 to Lucene 4.1 and seems my 
> tests that use NormalizeCharMap for replacing characters in the 
> anyalzers are not working.
>
bump, anybody I thought a self contained testcase would be enough to 
pique somebodys interest, am I doing something silly - maybe but I can't 
see it

Paul
> Below Ive created a self-contained test case, this is the output when 
> I run it
>
>
>     --term=and--
>     --term=gold--
>     --term=platinum--
>     name:"platinum and gold"
>     Size1
>     name:"platinum & gold"
>     Size0
>
>     java.lang.AssertionError:
>     Expected :1
>     Actual   :0
>      <Click to see difference>
>         at org.junit.Assert.fail(Assert.java:93)
>         at org.junit.Assert.failNotEquals(Assert.java:647)
>         at org.junit.Assert.assertEquals(Assert.java:128)
>         at org.junit.Assert.assertEquals(Assert.java:472)
>         at org.junit.Assert.assertEquals(Assert.java:456)
>         at 
> org.musicbrainz.search.analysis.Lucene41CharFilterTest.testAmpersandSearching(Lucene41CharFilterTest.java:89)
>
> As you can see the charfilter does seem to work because the the text 
> 'platinum & gold' is converted to three terms 'platnum, and , gold'. 
> In fact search is working for 'platinum and gold' but not working for 
> the original "platinum & gold" even though both index and search using 
> same analyzer. Maybe the problem is with the query parser, but its 
> certainly related to 4.1 because worked previously.
>
> thanks Paul
>
>
>     package org.musicbrainz.search.analysis;
>
>     import org.apache.lucene.analysis.Analyzer;
>     import org.apache.lucene.analysis.TokenStream;
>     import org.apache.lucene.analysis.Tokenizer;
>     import org.apache.lucene.analysis.charfilter.MappingCharFilter;
>     import org.apache.lucene.analysis.charfilter.NormalizeCharMap;
>     import org.apache.lucene.analysis.core.LowerCaseFilter;
>     import org.apache.lucene.document.Document;
>     import org.apache.lucene.document.Field;
>     import org.apache.lucene.index.*;
>     import org.apache.lucene.queryparser.classic.QueryParser;
>     import org.apache.lucene.search.IndexSearcher;
>     import org.apache.lucene.search.Query;
>     import org.apache.lucene.search.TopDocs;
>     import org.apache.lucene.store.RAMDirectory;
>     import org.apache.lucene.util.BytesRef;
>     import org.apache.lucene.util.Version;
>     import org.junit.Test;
>     import java.io.Reader;
>
>     import static org.junit.Assert.assertEquals;
>
>     public class Lucene41CharFilterTest
>     {
>         class SimpleAnalyzer extends Analyzer {
>
>             protected NormalizeCharMap charConvertMap;
>
>             protected void setCharConvertMap() {
>
>                 NormalizeCharMap.Builder builder = new 
> NormalizeCharMap.Builder();
>                 builder.add("&","and");
>                 charConvertMap = builder.build();
>             }
>
>             public SimpleAnalyzer() {
>                 setCharConvertMap();
>             }
>
>             @Override
>             protected TokenStreamComponents createComponents(String 
> fieldName, Reader reader) {
>                 Tokenizer source = new 
> MusicbrainzTokenizer(Version.LUCENE_41,
>                         new MappingCharFilter(charConvertMap, reader));
>                 TokenStream filter = new 
> LowerCaseFilter(Version.LUCENE_41,source);
>                 return new TokenStreamComponents(source, filter);
>             }
>         }
>
>         @Test
>         public void testAmpersandSearching() throws Exception {
>
>             Analyzer analyzer = new SimpleAnalyzer();
>             RAMDirectory dir = new RAMDirectory();
>             IndexWriterConfig writerConfig = new 
> IndexWriterConfig(Version.LUCENE_41,analyzer);
>             IndexWriter writer = new IndexWriter(dir, writerConfig);
>             {
>                 Document doc = new Document();
>                 doc.add(new Field("name", "platinum & gold", 
> Field.Store.YES, Field.Index.ANALYZED));
>                 writer.addDocument(doc);
>             }
>             writer.close();
>
>             IndexReader ir = DirectoryReader.open(dir);
>             Fields fields = MultiFields.getFields(ir);
>             Terms terms = fields.terms("name");
>             TermsEnum termsEnum = terms.iterator(null);
>             BytesRef text;
>             while((text = termsEnum.next()) != null) {
>                 System.out.println("--term=" + text.utf8ToString()+"--");
>             }
>             ir.close();
>
>             IndexSearcher searcher = new 
> IndexSearcher(IndexReader.open(dir));
>             {
>                 Query q = new QueryParser(Version.LUCENE_41, "name", 
> analyzer).parse("\"platinum and gold\"");
>                 System.out.println(q);
>                 TopDocs td = searcher.search(q, 10);
>                 System.out.println("Size"+td.scoreDocs.length);
>                 assertEquals(1, searcher.search(q, 10).totalHits);
>             }
>
>             searcher = new IndexSearcher(IndexReader.open(dir));
>             {
>                 Query q = new QueryParser(Version.LUCENE_41, "name", 
> analyzer).parse("\"platinum & gold\"");
>                 System.out.println(q);
>                 TopDocs td = searcher.search(q, 10);
>                 System.out.println("Size"+td.scoreDocs.length);
>                 assertEquals(1, searcher.search(q, 10).totalHits);
>             }
>         }
>     }
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
> For additional commands, e-mail: java-user-help@lucene.apache.org
>
>


---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-user-help@lucene.apache.org


Mime
View raw message