lucene-java-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Michel Blase <mblas...@gmail.com>
Subject Re: old fashioned....."Too many open files"!
Date Fri, 18 May 2012 16:47:53 GMT
This is the code in charge of managing the Lucene index. Thanks for your
help!



package luz.aurora.lucene;

import java.io.File;
import java.io.IOException;
import java.util.*;
import luz.aurora.search.ExtendedQueryParser;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.*;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;


public class LuceneManager {

    private HashMap<Integer,String> IndexesPaths;
    private HashMap<Integer,IndexWriter> Writers;

    private int CurrentOpenIndex_ID;
    private String CurrentOpenIndex_TablePrefix;

    public  LuceneManager(int CurrentOpenIndex_ID,String
CurrentOpenIndex_TablePrefix, HashMap<Integer,String> IndexesPaths) throws
Exception {
        this.CurrentOpenIndex_ID = CurrentOpenIndex_ID;
        this.IndexesPaths = IndexesPaths;
        this.Writers = new HashMap<Integer,IndexWriter>();
        this.CurrentOpenIndex_TablePrefix = CurrentOpenIndex_TablePrefix;

        SetUpWriters();
    }

    private void SetUpWriters() throws Exception {
        Set set = IndexesPaths.entrySet();
        Iterator i = set.iterator();

        while(i.hasNext()){
            Map.Entry index = (Map.Entry)i.next();
            int id = (Integer)index.getKey();
            String path = (String)index.getValue();

            File app = new File(path);
            Directory dir = FSDirectory.open(app);
            IndexWriterConfig config = new
IndexWriterConfig(LuceneVersion.CurrentVersion,new
StandardAnalyzer(LuceneVersion.CurrentVersion));

            //config.setMaxBufferedDocs(50);
            config.setRAMBufferSizeMB(400);
            TieredMergePolicy mp =
(TieredMergePolicy)config.getMergePolicy();
            mp.setUseCompoundFile(true);
            config.setMergePolicy(mp);

            /*
            LogMergePolicy lmp = (LogMergePolicy)config.getMergePolicy();
            lmp.setUseCompoundFile(true);
            lmp.setMaxMergeDocs(10000);
            config.setMergePolicy(lmp);
            */

            Writers.put(id, new IndexWriter(dir,config));
        }
    }

    public void AddDocument(int IndexId,Document doc,Analyzer analyzer)
throws CorruptIndexException, IOException {
        IndexWriter im = Writers.get(IndexId);
        im.addDocument(doc, analyzer);
    }

    public void AddDocument(Document doc,Analyzer analyzer) throws
CorruptIndexException, IOException {
        IndexWriter im = Writers.get(this.CurrentOpenIndex_ID);
        im.addDocument(doc, analyzer);
    }

    public void DeleteDoc(int IndexId,int SegmentIdFromDb) throws
CorruptIndexException, IOException {
        IndexWriter im = Writers.get(IndexId);
        Term term = new Term("SegmentID",Integer.toString(SegmentIdFromDb));
        im.deleteDocuments(term);
    }

    public void DeleteDocuments(String query) throws ParseException,
CorruptIndexException, IOException {

        ExtendedQueryParser parser = new
ExtendedQueryParser(LuceneVersion.CurrentVersion,"ID",new
StandardAnalyzer(LuceneVersion.CurrentVersion));
Query q = parser.parse(query);

        Set set = Writers.entrySet();
        Iterator i = set.iterator();

        while(i.hasNext()){
            Map.Entry app = (Map.Entry)i.next();
            IndexWriter im = (IndexWriter)app.getValue();
            im.deleteDocuments(q);
        }
    }

    private IndexSearcher getSearcher() throws CorruptIndexException,
IOException {
        int NumberOfIndexes = Writers.size();

        ArrayList<IndexReader> readers = new ArrayList<IndexReader>();
        IndexReader[] readerList = new IndexReader[NumberOfIndexes];

        Set set = Writers.entrySet();
        Iterator i = set.iterator();
        while(i.hasNext()){
            Map.Entry index = (Map.Entry)i.next();
            IndexWriter iw = (IndexWriter)index.getValue();
            readers.add(IndexReader.open(iw, true));
        }

        MultiReader mr = new MultiReader(readers.toArray(readerList));
        return new IndexSearcher(mr);
    }

    public void close() throws CorruptIndexException, IOException {
        Set set = Writers.entrySet();
        Iterator i = set.iterator();
        while(i.hasNext()){
            Map.Entry index = (Map.Entry)i.next();
            IndexWriter iw = (IndexWriter)index.getValue();
            iw.close();
        }
    }

    public void commit() throws CorruptIndexException, IOException,
Exception {
        Set set = Writers.entrySet();
        Iterator i = set.iterator();
        while(i.hasNext()){
            Map.Entry index = (Map.Entry)i.next();
            IndexWriter iw = (IndexWriter)index.getValue();
            iw.commit();
        }

    }

    public int getCurrentOpenIndexId() {
        return this.CurrentOpenIndex_ID;
    }

    public String getCurrentOpenIndexTablePrefix() {
        return this.CurrentOpenIndex_TablePrefix;
    }


    //SEARCH START

    public TopDocs Search(String q,Analyzer analyzer,int NumberOfResults)
throws Exception {
        ExtendedQueryParser parser = new
ExtendedQueryParser(LuceneVersion.CurrentVersion,"ID",analyzer);
Query query = parser.parse(q);
        //Filter f = NumericRangeFilter.newIntRange("SegmentID", 393, 393,
true, true);
//FilteredQuery fq = new FilteredQuery(query,f);
//TopDocs docs = searcher.search(fq, NumberOfResults);
//System.out.println(searcher.getIndexReader().)
return getSearcher().search(query, NumberOfResults);
    }

    public TopDocs SearchAndHighlight(String q,Analyzer analyzer,int
NumberOfResults) throws Exception {
        ExtendedQueryParser parser = new
ExtendedQueryParser(LuceneVersion.CurrentVersion,"ID",analyzer);
Query query = parser.parse(q);
        return getSearcher().search(query, NumberOfResults);
    }

    public Highlighter getHighlighter(String query,Analyzer analyzer,String
OpeningTag,String ClosingTag) throws ParseException {
        ExtendedQueryParser qp = new
ExtendedQueryParser(LuceneVersion.CurrentVersion,"ID",analyzer);
        Query q = qp.parse(query);
        QueryScorer qs = new QueryScorer(q);
        SimpleHTMLFormatter formatter =  new
SimpleHTMLFormatter(OpeningTag,ClosingTag);
        Highlighter hl = new Highlighter(formatter,qs);
        hl.setTextFragmenter(new SimpleSpanFragmenter(qs));
        return hl;
    }

    public Document getDoc(int i) throws CorruptIndexException, IOException
{
        return getSearcher().doc(i);
    }



}





On Thu, May 17, 2012 at 10:37 PM, findbestopensource <
findbestopensource@gmail.com> wrote:

> Post complete code. You are not closing the objects (IndexWriter / Index
> Searcher) properly.
>
> Regards
> Aditya
> www.findbestopensource.com
>
>
> On Fri, May 18, 2012 at 6:51 AM, Michel Blase <mblase23@gmail.com> wrote:
>
> > Hi all,
> >
> > I have few problems Indexing. I keep hitting "Too many open files". It
> > seems like Lucene is not releasing files handlers after deleting
> segments.
> >
> > This is a piece from the lsof output showing the problem:
> >
> >
> > java    23024 root *347r      REG      251,0     2660 149376
> > /home/INDEXES_ROOT/SMPL_1/_bvq.cfs (deleted)
> > java    23024 root *348r      REG      251,0     2477 149382
> > /home/INDEXES_ROOT/SMPL_1/_bvr.cfs (deleted)
> > java    23024 root *349r      REG      251,0     2747 149392
> > /home/INDEXES_ROOT/SMPL_1/_bvu.cfs (deleted)
> > java    23024 root *350r      REG      251,0     2339 149384
> > /home/INDEXES_ROOT/SMPL_1/_bvs.cfs (deleted)
> >
> > and this is the code I'm using (I'm using Lucene3.6)
> >
> > -- IndexWriter creation:
> >
> > File app = new File(path);
> > Directory dir = FSDirectory.open(app);
> > IndexWriterConfig config = new
> > IndexWriterConfig(LuceneVersion.CurrentVersion,new
> > StandardAnalyzer(LuceneVersion.CurrentVersion));
> >
> > //these are random tries attempting to solve the problem:
> > config.setRAMBufferSizeMB(400);
> > TieredMergePolicy mp = (TieredMergePolicy)config.getMergePolicy();
> > mp.setUseCompoundFile(true);
> > config.setMergePolicy(mp);
> > IndexWriter im = new IndexWriter(dir,config);
> >
> > -- Then just a loop over my doc list calling for indexing:
> > im.addDocument(doc, analyzer);
> >
> >
> > Any idea?
> > Thanks,
> > Luca
> >
>

Mime
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message