lucene-java-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Paul Taylor <paul_t...@fastmail.fm>
Subject Is there a problem with my Analyzer subclass ?
Date Tue, 22 Jan 2013 13:56:53 GMT
I've been investigating potential memory leaks in my Lucene based 
application thats runs on jetty. I did a memory dump with jmap and one 
thing I've noticed is that for any subclass of analyzer that I have 
created that there are alot instances of the $SavedStream inner class.

So for example I can have just fives instances of TitleAnalyzer 
analyser, but 417 instances of TitleAnalyzer$SavedStreams. These 
$SavedStreams are taking alot of memory, and my profiler (Yourkit) 
labels them all but two (415 instances) as strongly reachable, i.e has 
strong references so i dont think they can be garbage collected, 
therefore suggesting a memory leak.

But i cant see it ?



     package org.musicbrainz.search.analysis;

     import com.ibm.icu.text.Transliterator;
     import org.apache.lucene.analysis.*;
     import org.musicbrainz.search.LuceneVersion;

     import java.io.IOException;
     import java.io.Reader;
     import java.util.regex.Pattern;

     /**
      * Should be used for for analysing titles such as track 
title,release title or recording title
      * because contains special processing for titles that isn't 
required for other text fields such as artist name.
      *
      * Filters MusicbrainzTokenizer with MusicbrainzTokenizerFilter, 
ICUTransformFilter, AccentFilter, LowerCaseFilter
      * and no stop words.
      */
     public class TitleAnalyzer extends Analyzer {

         private NormalizeCharMap charConvertMap;

         //We convert to the wrong form No.1 rather than the correct 
form No. 1 because this keeps it as single token
         //when tokenized so doesn't incorrectly match additional single 
numbers in the text.
         private Pattern no1Pattern = Pattern.compile("(no\\.) (\\d+)", 
Pattern.CASE_INSENSITIVE);
         private String no1PatternReplacement = "$1$2";

         private void setCharConvertMap() {
             charConvertMap = new NormalizeCharMap();
             AmpersandToAndMappingHelper.addToMap(charConvertMap);
             CharEquivToCharHelper.addToMap(charConvertMap);
             HebrewCharMappingHelper.addToMap(charConvertMap);
         }

         public TitleAnalyzer() {
             setCharConvertMap();
         }

         public final TokenStream tokenStream(String fieldName, Reader 
reader) {
             CharFilter mappingCharFilter = new 
MappingCharFilter(charConvertMap, reader);
             CharFilter no1CharFilter = new 
PatternReplaceCharFilter(no1Pattern, no1PatternReplacement, 
mappingCharFilter);
             MusicbrainzTokenizer tokenStream = new 
MusicbrainzTokenizer(LuceneVersion.LUCENE_VERSION, no1CharFilter);
             TokenStream result = new ICUTransformFilter(tokenStream, 
Transliterator.getInstance("[?[:Script=Katakana:]]Katakana-Hiragana"));
             result = new ICUTransformFilter(result, 
Transliterator.getInstance("Traditional-Simplified"));
             result = new MusicbrainzTokenizerFilter(result);
             result = new AccentFilter(result);
             result = new LowercaseFilter(result);
             result = new MusicbrainzWordDelimiterFilter(result,
WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE,
                                               1,
                                               0,
                                               0,
                                               6,
                                               0,
                                               0,
                                               0,
                                               0,
                                               0,
                                              null);
             return result;
         }

         private static final class SavedStreams {
             MusicbrainzTokenizer tokenStream;
             TokenStream filteredTokenStream;
         }

         public final TokenStream reusableTokenStream(String fieldName, 
Reader reader) throws IOException {
             SavedStreams streams = (SavedStreams) getPreviousTokenStream();
             if (streams == null) {
                 streams = new SavedStreams();
                 setPreviousTokenStream(streams);
                 streams.tokenStream = new 
MusicbrainzTokenizer(LuceneVersion.LUCENE_VERSION, new 
PatternReplaceCharFilter(no1Pattern, no1PatternReplacement, new 
MappingCharFilter(charConvertMap, reader)));
                 streams.filteredTokenStream = new 
ICUTransformFilter(streams.tokenStream, 
Transliterator.getInstance("[?[:Script=Katakana:]]Katakana-Hiragana"));
                 streams.filteredTokenStream = new 
ICUTransformFilter(streams.filteredTokenStream, 
Transliterator.getInstance("Traditional-Simplified"));
                 streams.filteredTokenStream = new 
MusicbrainzTokenizerFilter(streams.filteredTokenStream);
                 streams.filteredTokenStream = new 
AccentFilter(streams.filteredTokenStream);
                 streams.filteredTokenStream = new 
LowercaseFilter(streams.filteredTokenStream);
                 streams.filteredTokenStream = new 
MusicbrainzWordDelimiterFilter(streams.filteredTokenStream,
WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE,
                                               1,
                                               0,
                                               0,
                                               6,
                                               0,
                                               0,
                                               0,
                                               0,
                                               0,
                                              null);
             } else {
                 streams.tokenStream.reset(new 
PatternReplaceCharFilter(no1Pattern, no1PatternReplacement, new 
MappingCharFilter(charConvertMap, reader)));
             }
             return streams.filteredTokenStream;
         }
     }

Some of the referrences back:

+---org.musicbrainz.search.analysis.TitleAnalyzer$SavedStreams |         
58,672  |            32  |
     |  | | |                 |                |
     |  | +---value of java.util.WeakHashMap$Entry |            144  
|            72  |
     |  | | |                 |                |
     |  |   +---[251] of java.util.WeakHashMap$Entry[256] |        
247,888  |         2,072  |
     |  | | |                 |                |
     |  |     +---table of java.util.WeakHashMap |        248,048  
|            80  |
     |  | | |                 |                |
     |  |       +---hardRefs of 
org.apache.lucene.util.CloseableThreadLocal |        248,112  
|            40  |
     |  | | |                 |                |
     |  |         +---tokenStreams of 
org.musicbrainz.search.analysis.TitleWithPosGapAnalyzer |        
248,160  |            48  |
     |  | | |                 |                |
     |  |           +---analyzer of 
org.musicbrainz.search.index.RecordingIndexField |             64  
|            64  |
     |  | | |                 |                |
     |  |             +---RELEASE of 
org.musicbrainz.search.index.RecordingIndexField |          4,720  
|            64  |
     |  | | |                 |                |
     |  |               +---[1756] of java.lang.Object[2560] |      
8,089,136  |        20,504  |
     |  | | |                 |                |
     |  |                 +---elementData of java.util.Vector |      
8,089,176  |            40  |
     |  | | |                 |                |
     |  |                   +---classes of 
org.eclipse.jetty.webapp.WebAppClassLoader |      8,256,880  |           
184  |
     |  | | |                 |                |
     |  |                     +---contextClassLoader of 
java.util.TimerThread [Thread]  "HashSessionScavenger-1" |            
568  |           176  |
     | | |                 |                |
     | +---org.musicbrainz.search.analysis.TitleAnalyzer$SavedStreams 
|         79,192  |            32  |
     |  | | |                 |                |
     |  | +---value of java.util.WeakHashMap$Entry |             72  
|            72  |
     |  | | |                 |                |
     |  |   +---[243] of java.util.WeakHashMap$Entry[256] |        
247,888  |         2,072  |
     |  | | |                 |                |
     |  |     +---table of java.util.WeakHashMap |        248,048  
|            80  |
     |  | | |                 |                |
     |  |       +---hardRefs of 
org.apache.lucene.util.CloseableThreadLocal |        248,112  
|            40  |
     |  | | |                 |                |
     |  |         +---tokenStreams of 
org.musicbrainz.search.analysis.TitleWithPosGapAnalyzer |        
248,160  |            48  |
     |  | | |                 |                |
     |  |           +---analyzer of 
org.musicbrainz.search.index.RecordingIndexField |             64  
|            64  |
     |  | | |                 |                |
     |  |             +---RELEASE of 
org.musicbrainz.search.index.RecordingIndexField |          4,720  
|            64  |
     |  | | |                 |                |
     |  |               +---[1756] of java.lang.Object[2560] |      
8,089,136  |        20,504  |
     |  | | |                 |                |
     |  |                 +---elementData of java.util.Vector |      
8,089,176  |            40  |
     |  | | |                 |                |
     |  |                   +---classes of 
org.eclipse.jetty.webapp.WebAppClassLoader |      8,256,880  |           
184  |
     |  | | |                 |                |
     |  |                     +---contextClassLoader of 
java.util.TimerThread [Thread]  "HashSessionScavenger-1" |            
568  |           176  |
     | | |                 |                |
     | +---org.musicbrainz.search.analysis.TitleAnalyzer$SavedStreams 
|         79,176  |            32  |
     |  | | |                 |                |
     |  | +---value of java.util.WeakHashMap$Entry |             72  
|            72  |
     |  | | |                 |                |
     |  |   +---[240] of java.util.WeakHashMap$Entry[256] |        
247,888  |         2,072  |
     |  | | |                 |                |
     |  |     +---table of java.util.WeakHashMap |        248,048  
|            80  |
     |  | | |                 |                |
     |  |       +---hardRefs of 
org.apache.lucene.util.CloseableThreadLocal |        248,112  
|            40  |
     |  | | |                 |                |
     |  |         +---tokenStreams of 
org.musicbrainz.search.analysis.TitleWithPosGapAnalyzer |        
248,160  |            48  |
     |  | | |                 |                |
     |  |           +---analyzer of 
org.musicbrainz.search.index.RecordingIndexField |             64  
|            64  |
     |  | | |                 |                |
     |  |             +---RELEASE of 
org.musicbrainz.search.index.RecordingIndexField |          4,720  
|            64  |
     |  | | |                 |                |
     |  |               +---[1756] of java.lang.Object[2560] |      
8,089,136  |        20,504  |
     |  | | |                 |                |
     |  |                 +---elementData of java.util.Vector |      
8,089,176  |            40  |
     |  | | |                 |                |
     |  |                   +---classes of 
org.eclipse.jetty.webapp.WebAppClassLoader |      8,256,880  |           
184  |
     |  | | |                 |                |
     |  |                     +---contextClassLoader of 
java.util.TimerThread [Thread]  "HashSessionScavenger-1" |            
568  |           176  |
     | | |                 |                |
     | +---org.musicbrainz.search.analysis.TitleAnalyzer$SavedStreams 
|         79,176  |            32  |
     |  | | |                 |                |
     |  | +---value of java.util.WeakHashMap$Entry |             72  
|            72  |
     |  | | |                 |                |
     |  |   +---[239] of java.util.WeakHashMap$Entry[256] |        
247,888  |         2,072  |
     |  | | |                 |                |
     |  |     +---table of java.util.WeakHashMap |        248,048  
|            80  |
     |  | | |                 |                |
     |  |       +---hardRefs of 
org.apache.lucene.util.CloseableThreadLocal |        248,112  
|            40  |
     |  | | |                 |                |
     |  |         +---tokenStreams of 
org.musicbrainz.search.analysis.TitleWithPosGapAnalyzer |        
248,160  |            48  |
     |  | | |                 |                |
     |  |           +---analyzer of 
org.musicbrainz.search.index.RecordingIndexField |             64  
|            64  |
     |  | | |                 |                |
     |  |             +---RELEASE of 
org.musicbrainz.search.index.RecordingIndexField |          4,720  
|            64  |
     |  | | |                 |                |
     |  |               +---[1756] of java.lang.Object[2560] |      
8,089,136  |        20,504  |
     |  | | |                 |                |
     |  |                 +---elementData of java.util.Vector |      
8,089,176  |            40  |
     |  | | |                 |                |
     |  |                   +---classes of 
org.eclipse.jetty.webapp.WebAppClassLoader |      8,256,880  |           
184  |
     |  | | |                 |                |
     |  |                     +---contextClassLoader of 
java.util.TimerThread [Thread]  "HashS

essionScavenger-1"      |            568  |           176  |

---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-user-help@lucene.apache.org


Mime
View raw message