Return-Path: Delivered-To: apmail-lucene-java-dev-archive@www.apache.org Received: (qmail 37606 invoked from network); 27 May 2005 19:31:18 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (209.237.227.199) by minotaur.apache.org with SMTP; 27 May 2005 19:31:18 -0000 Received: (qmail 7545 invoked by uid 500); 27 May 2005 19:30:47 -0000 Delivered-To: apmail-lucene-java-dev-archive@lucene.apache.org Received: (qmail 7328 invoked by uid 500); 27 May 2005 19:30:46 -0000 Mailing-List: contact java-dev-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: java-dev@lucene.apache.org Delivered-To: mailing list java-dev@lucene.apache.org Received: (qmail 7222 invoked by uid 99); 27 May 2005 19:30:46 -0000 X-ASF-Spam-Status: No, hits=-0.0 required=10.0 tests=SPF_HELO_PASS,SPF_PASS X-Spam-Check-By: apache.org Received-SPF: pass (hermes.apache.org: domain of lucenelist@danielnaber.de designates 80.67.18.13 as permitted sender) Received: from smtprelay01.ispgateway.de (HELO smtprelay01.ispgateway.de) (80.67.18.13) by apache.org (qpsmtpd/0.28) with ESMTP; Fri, 27 May 2005 12:30:44 -0700 Received: (qmail 10085 invoked from network); 27 May 2005 19:30:39 -0000 Received: from unknown (HELO p54963DE3.dip.t-dialin.net) ([pbs]695637@[84.150.61.227]) (envelope-sender ) by smtprelay01.ispgateway.de (qmail-ldap-1.03) with RC4-MD5 encrypted SMTP for ; 27 May 2005 19:30:39 -0000 From: Daniel Naber To: java-dev@lucene.apache.org Subject: class for delete/add access to an index Date: Fri, 27 May 2005 21:31:31 +0200 User-Agent: KMail/1.8 MIME-Version: 1.0 Content-Type: Multipart/Mixed; boundary="Boundary-00=_TW3lCCiytFH+pvt" Message-Id: <200505272131.31149@danielnaber.de> X-Virus-Checked: Checked X-Spam-Rating: minotaur.apache.org 1.6.2 0/1000/N --Boundary-00=_TW3lCCiytFH+pvt Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Content-Disposition: inline Hi, I'd like to add the attached class to Lucene's core. It makes live easier for people who need to add and delete documents from an index by hiding all the IndexReader/IndexWriter stuff. Anybody needing full performance or best performance with threads can still use IndexReader/IndexWriter directly. What do you think? If this gets accepted, it also needs a better name. Regards Daniel -- http://www.danielnaber.de --Boundary-00=_TW3lCCiytFH+pvt Content-Type: text/x-java; charset="us-ascii"; name="Index.java" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="Index.java" package org.apache.lucene.index; import java.io.File; import java.io.IOException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; /** * A class to delete and add documents to an index without the * need to care about the implementation detail that adding is done * via IndexWriter and deletion is done via IndexReader. * *

Note that you cannot create more than one Index object * on the same directory at the same time. * *

Although an instance of this class can be used from more than one * thread, you will not get good performance. Use IndexReader and IndexWriter * directly for that (you will need to care about synchronizsation yourself * then). * *

While you can freely mix calls to add() and delete() using this class, * you should batch you calls for best performance. For example, if you * want to update 20 document, you should first delete all those documents, * then add all the new documents. * * @author Daniel Naber */ public class Index { IndexWriter indexWriter = null; IndexReader indexReader = null; Directory directory; Analyzer analyzer; boolean open = false; /** * Open an index with write access. * * @param directory the index directory * @param analyzer the analyzer to use for adding new documents * @param create true to create the index or overwrite the existing one; * false to append to the existing index */ public Index(Directory directory, Analyzer analyzer, boolean create) throws IOException { init(directory, analyzer, create); } /** * Open an index with write access. * * @param dirName the index directory * @param analyzer the analyzer to use for adding new documents * @param create true to create the index or overwrite the existing one; * false to append to the existing index */ public Index(String dirName, Analyzer analyzer, boolean create) throws IOException { Directory dir = FSDirectory.getDirectory(dirName, create); init(dir, analyzer, create); } /** * Open an index with write access. * * @param file the index directory * @param analyzer the analyzer to use for adding new documents * @param create true to create the index or overwrite the existing one; * false to append to the existing index */ public Index(File file, Analyzer analyzer, boolean create) throws IOException { Directory dir = FSDirectory.getDirectory(file, create); init(dir, analyzer, create); } private void init(Directory directory, Analyzer analyzer, boolean create) throws IOException { this.directory = directory; synchronized(this.directory) { this.analyzer = analyzer; indexWriter = new IndexWriter(directory, analyzer, create); open = true; } } /** * @see IndexWriter#addDocument(Document, Analyzer) * @throws IllegalStateException if the index is closed */ public void addDocument(Document doc, Analyzer docAnalyzer) throws IOException { synchronized(directory) { if (!open) throw new IllegalStateException("Index is closed"); createIndexWriter(); if (docAnalyzer != null) indexWriter.addDocument(doc, docAnalyzer); else indexWriter.addDocument(doc); } } /** * @see IndexWriter#addDocument(Document) * @throws IllegalStateException if the index is closed */ public void addDocument(Document doc) throws IOException { addDocument(doc, null); } private void createIndexWriter() throws IOException { if (indexWriter == null) { if (indexReader != null) { indexReader.close(); indexReader = null; } indexWriter = new IndexWriter(directory, analyzer, false); } } private void createIndexReader() throws IOException { if (indexReader == null) { if (indexWriter != null) { indexWriter.close(); indexWriter = null; } indexReader = IndexReader.open(directory); } } /** * @see IndexReader#delete(Term) * @throws IllegalStateException if the index is closed */ public void delete(Term term) throws IOException { synchronized(directory) { if (!open) throw new IllegalStateException("Index is closed"); createIndexReader(); indexReader.delete(term); } } /** * @see IndexReader#delete(int) * @throws IllegalStateException if the index is closed */ public void delete(int docNum) throws IOException { synchronized(directory) { if (!open) throw new IllegalStateException("Index is closed"); createIndexReader(); indexReader.delete(docNum); } } /** * @see IndexWriter#docCount() * @throws IllegalStateException if the index is closed */ public int docCount() { synchronized(directory) { if (!open) throw new IllegalStateException("Index is closed"); if (indexWriter != null) { return indexWriter.docCount(); } else { return indexReader.numDocs(); } } } /** * @see IndexWriter#optimize() * @throws IllegalStateException if the index is closed */ public void optimize() throws IOException { synchronized(directory) { if (!open) throw new IllegalStateException("Index is closed"); createIndexWriter(); indexWriter.optimize(); } } /** * Close this index, writing all pending changes to disk. * * @throws IllegalStateException if the index has been closed before already */ public void close() throws IOException { synchronized(directory) { if (!open) throw new IllegalStateException("Index is closed already"); if (indexWriter != null) { indexWriter.close(); indexWriter = null; } else { indexReader.close(); indexReader = null; } open = false; } } //TODO: implement from reader: isDeleted, hasDeletions //TODO: implement from writer: setXYZ... } --Boundary-00=_TW3lCCiytFH+pvt Content-Type: text/x-java; charset="us-ascii"; name="TestIndex.java" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="TestIndex.java" package org.apache.lucene.index; import java.io.IOException; import java.util.Random; import java.util.Stack; import junit.framework.TestCase; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; public class TestIndex extends TestCase { public void testIndex() throws IOException { testIndexInternal(0); testIndexInternal(10); testIndexInternal(50); } private void testIndexInternal(int maxWait) throws IOException { boolean create = true; // FIXME: use system temp dir: Directory rd = FSDirectory.getDirectory("/tmp/testindex", create); Index index = new Index(rd, new StandardAnalyzer(), create); System.out.println("START"); IndexThread thread1 = new IndexThread(index, maxWait); thread1.start(); IndexThread thread2 = new IndexThread(index, maxWait); thread2.start(); while(thread1.isAlive() || thread2.isAlive()) { try { Thread.sleep(100); } catch (InterruptedException e) { e.printStackTrace(); } } System.out.println("\nfinal optimize...."); index.optimize(); System.out.println("index size=" + index.docCount()); int added = thread1.added + thread2.added; System.out.println("added=" + added); int deleted = thread1.deleted + thread2.deleted; System.out.println("deleted=" + deleted); System.out.println("expected index size=" + (added-deleted)); assertEquals(added-deleted, index.docCount()); //System.out.println("max id=" + IndexThread.id); index.close(); try { index.close(); fail(); } catch(IllegalStateException e) { // expected exception } } private int id = 0; private Stack idStack = new Stack(); // TODO: test case is not reproducible despite pseudo-random numbers // used for anything: private Random random = new Random(101); // constant seed for reproducability private class IndexThread extends Thread { private final int ITERATIONS = 250; private int maxWait = 10; private Index index; private int added = 0; private int deleted = 0; IndexThread(Index index, int maxWait) { this.index = index; this.maxWait = maxWait; id = 0; idStack.clear(); } public void run() { try { for(int i = 0; i < ITERATIONS; i++) { if (random.nextInt(101) < 5) { System.out.println("--- optimize... ---"); index.optimize(); } else if (random.nextInt(101) < 70) { Document doc = getDocument(); //System.out.println(this + " add doc id=" + doc.get("id")); System.out.println("add doc id=" + doc.get("id")); index.addDocument(doc); idStack.push(doc.get("id")); added++; } else { if (idStack.size() == 0) { // not enough docs in index, let's wait for next chance } else { // we just delete the last document added and remove it // from the id stack so that it won't be removed twice: String delId = (String)idStack.pop(); //System.out.println(this + " delete doc id = " + delId); System.out.println("delete doc id = " + delId); index.delete(new Term("id", new Integer(delId).toString())); deleted++; } } if (maxWait > 0) { try { int rand = random.nextInt(maxWait); System.out.println("waiting " + rand + "ms"); Thread.sleep(rand); } catch (InterruptedException e) { e.printStackTrace(); } } } //System.out.println(">>>>"+x); } catch (IOException e) { throw new RuntimeException(e); } } private Document getDocument() { Document doc = new Document(); doc.add(new Field("id", new Integer(id++).toString(), Field.Store.YES, Field.Index.UN_TOKENIZED)); // add random stuff: doc.add(new Field("content", new Integer(random.nextInt(1000)).toString(), Field.Store.YES, Field.Index.TOKENIZED)); doc.add(new Field("content", new Integer(random.nextInt(1000)).toString(), Field.Store.YES, Field.Index.TOKENIZED)); return doc; } } } --Boundary-00=_TW3lCCiytFH+pvt Content-Type: text/plain; charset=us-ascii --------------------------------------------------------------------- To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org For additional commands, e-mail: java-dev-help@lucene.apache.org --Boundary-00=_TW3lCCiytFH+pvt--