Return-Path: Delivered-To: apmail-jakarta-lucene-user-archive@apache.org Received: (qmail 98050 invoked from network); 17 Jun 2002 14:07:16 -0000 Received: from unknown (HELO nagoya.betaversion.org) (192.18.49.131) by daedalus.apache.org with SMTP; 17 Jun 2002 14:07:16 -0000 Received: (qmail 26242 invoked by uid 97); 17 Jun 2002 14:07:19 -0000 Delivered-To: qmlist-jakarta-archive-lucene-user@jakarta.apache.org Received: (qmail 26225 invoked by uid 97); 17 Jun 2002 14:07:18 -0000 Mailing-List: contact lucene-user-help@jakarta.apache.org; run by ezmlm Precedence: bulk List-Unsubscribe: List-Subscribe: List-Help: List-Post: List-Id: "Lucene Users List" Reply-To: "Lucene Users List" Delivered-To: mailing list lucene-user@jakarta.apache.org Received: (qmail 26213 invoked by uid 98); 17 Jun 2002 14:07:18 -0000 X-Antivirus: nagoya (v4198 created Apr 24 2002) Reply-To: From: "Nader S. Henein" To: "Lucene Users List" , Subject: RE: Deleting documents from index question. Date: Mon, 17 Jun 2002 18:08:12 +0400 Message-ID: MIME-Version: 1.0 Content-Type: text/plain; charset="iso-8859-1" Content-Transfer-Encoding: 8bit X-Priority: 3 (Normal) X-MSMail-Priority: Normal X-Mailer: Microsoft Outlook IMO, Build 9.0.2416 (9.0.2910.0) X-MimeOLE: Produced By Microsoft MimeOLE V6.00.2600.0000 Importance: Normal In-Reply-To: <200206171538.07309.karl@gan.no> X-Spam-Rating: daedalus.apache.org 1.6.2 0/1000/N X-Spam-Rating: daedalus.apache.org 1.6.2 0/1000/N I run my delete finction without opimizing because it take too long and because it doesn't inflate the number of files in the index like an insert does (it just adds a file, I imagine for exclusion purposes until the next optimize) and it works fine. here's how I delete : public static synchronized void deleteIndexEntry ( String filePath , String fieldValue ) { String fieldName = "" ; String indexTag = "" ; fieldName = "id" ; IndexReader reader = null; try { reader = IndexReader.open(indexPath); Term targetTerm = new Term(fieldName,fieldValue) ; reader.delete(targetTerm) ; reader.close(); } catch (java.io.IOException e) { errorText = errorText.concat("DeleteIndex :"+e+"\n") ; } } -----Original Message----- From: Karl �ie [mailto:karl@gan.no] Sent: Monday, June 17, 2002 5:38 PM To: Lucene Users List Subject: Re: Deleting documents from index question. hi, i think you must run writer.optimize after deleting docs before it takes effect, deleted documents are only marked as deleted until then... mvh karl �ie On Monday 17 June 2002 15:33, Rick Vestal wrote: > Good morning all, > > I'm trying to delete a set of documents from an index, > and am running into a problem where all the documents are > not deleted. My problem is either the way I am using the API > or it is a bug in lucene...I'm not sure which one it is. > > I've included a sample program here that shows the problem. Note > that you will have to change the path at the top to a valid set > of files on your machine. If anybody has any ideas on why I > am not removing the files correctly, please let me know. > > Thanks, > > -- Rick > > /* > * Created by IntelliJ IDEA. > * User: rvestal > * Date: Jun 16, 2002 > * Time: 10:23:51 PM > * To change template for new class use > * Code Style | Class Templates options (Tools | IDE Options). > */ > package org.intellij.plugins.docPlugin; > > import org.apache.lucene.analysis.Analyzer; > import org.apache.lucene.analysis.standard.StandardAnalyzer; > import org.apache.lucene.document.*; > import org.apache.lucene.index.*; > import org.apache.lucene.queryParser.QueryParser; > import org.apache.lucene.search.*; > import org.apache.lucene.store.*; > > import java.io.*; > import java.util.Vector; > > public class IndexTest { > > // path to ant 1.4.1 docs > private static String mDirToIndex = "c:/utils/ant/docs/manual/api/"; > > private static String INDEX_DIR = "indexTest"; > > > static private void collectFiles( File dir, Vector files ) { > File[] children = dir.listFiles(); > for ( int ix = 0; ix < children.length; ix++ ) { > File child = children[ix]; > if ( child.isDirectory() ) { > collectFiles( child, files ); > } else { > files.add( child ); > } > } > } > > > public static void main( String[] args ) { > File indexDir = new File( INDEX_DIR ); > if ( !indexDir.exists() ) { > indexDir.mkdirs(); > } > > Vector files = new Vector(); > collectFiles( new File( mDirToIndex ), files ); > > try { > IndexWriter writer = new IndexWriter( INDEX_DIR, new > StandardAnalyzer(), true ); > > for ( int ix = 0; ix < files.size(); ix++ ) { > File file = ( File ) files.get( ix ); > writer.addDocument( IndexTestDocument.createDocument( > file ) ); > } > System.out.println( "Added: " + files.size() + " files." ); > > writer.optimize(); > writer.close(); > writer = null; > > Searcher searcher = new IndexSearcher( INDEX_DIR ); > Analyzer analyzer = new StandardAnalyzer(); > Query query = QueryParser.parse( "Ant", "contents", analyzer > ); > > Hits hits = searcher.search( query ); > System.out.println( "Hits after add: " + hits.length() ); > searcher.close(); > > Directory directory = FSDirectory.getDirectory( INDEX_DIR, > false ); > IndexReader reader = IndexReader.open( directory ); > > int count = 0; > for ( int ix = 0; ix < files.size(); ix++ ) { > String path = IndexTestDocument.normalizePath( ( ( File > ) > files.get( ix ) ).getAbsolutePath().replace( '\\', '/' ) ); > > int numDocs = reader.numDocs(); > boolean bDeleted = false; > for ( int ndx = 0; ndx < numDocs; ndx++ ) { > if ( !reader.isDeleted( ndx ) ) { > String docPath = IndexTestDocument.getPath( > reader.document( ndx ) ); > if ( docPath.equals( path ) ) { > count++; > reader.delete( ndx ); > bDeleted = true; > break; > } > } > } > if ( !bDeleted ) { > System.out.println( " Not Deleted: " + path ); > for( int ndx = 0; ndx < numDocs; ndx++ ) { > if ( !reader.isDeleted( ndx ) ) { > String docPath = IndexTestDocument.getPath( > reader.document( ndx ) ); > System.out.println( " path " + ndx + ": > " + > docPath ); > } > } > } > } > System.out.println( "Removed " + count + " documents of (" + > > files.size() + ")" ); > reader.close(); > > searcher = new IndexSearcher( INDEX_DIR ); > analyzer = new StandardAnalyzer(); > query = QueryParser.parse( "Ant", "contents", analyzer ); > > hits = searcher.search( query ); > System.out.println( "Hits after remove: " + hits.length() ); > > } catch ( Exception ex ) { > ex.printStackTrace(); > } > } > > > static class IndexTestDocument { > > static public Document createDocument( File f ) > throws FileNotFoundException { > Document doc = new Document(); > doc.add( Field.Text( "path", normalizePath( f.getPath() ) ) > ); > Reader reader = new BufferedReader( new InputStreamReader( > new > FileInputStream( f ) ) ); > doc.add( Field.Text( "contents", reader ) ); > return doc; > } > > > static public String getPath( Document doc ) { > return ( String ) doc.get( "path" ); > } > > static public String normalizePath( String path ) { > if ( path == null || path.length() == 0 ) { > return ""; > } > path = path.replace( '\\', '/' ); > File f = new File( path ); > if ( f.isDirectory() ) { > if ( path.charAt( path.length() - 1 ) != '/' ) { > path = path + "/"; > } > } > return path; > } > } > } -- To unsubscribe, e-mail: For additional commands, e-mail: -- To unsubscribe, e-mail: For additional commands, e-mail: