lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dna...@apache.org
Subject cvs commit: jakarta-lucene/src/demo/org/apache/lucene/demo DeleteFiles.java HTMLDocument.java SearchFiles.java IndexHTML.java FileDocument.java
Date Tue, 03 Aug 2004 21:49:24 GMT
dnaber      2004/08/03 14:49:24

  Modified:    src/demo/org/apache/lucene/demo DeleteFiles.java
                        HTMLDocument.java SearchFiles.java IndexHTML.java
                        FileDocument.java
  Log:
  Store the path as a Keyword field. Also rename HTMLDocument's "url" to "path"
  and store it as a Keyword field, too. DeleteFiles now takes such a path
  as its command line parameter and works on the index directory "index", like the
  other demos.
  
  This fixes bug http://issues.apache.org/bugzilla/show_bug.cgi?id=30330
  
  Revision  Changes    Path
  1.3       +18 -9     jakarta-lucene/src/demo/org/apache/lucene/demo/DeleteFiles.java
  
  Index: DeleteFiles.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/demo/org/apache/lucene/demo/DeleteFiles.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- DeleteFiles.java	29 Mar 2004 22:48:00 -0000	1.2
  +++ DeleteFiles.java	3 Aug 2004 21:49:24 -0000	1.3
  @@ -16,27 +16,36 @@
    * limitations under the License.
    */
   
  -import java.io.IOException;
  -
   import org.apache.lucene.store.Directory;
   import org.apache.lucene.store.FSDirectory;
   import org.apache.lucene.index.IndexReader;
   import org.apache.lucene.index.Term;
  +//import org.apache.lucene.index.Term;
   
   class DeleteFiles {
  +  
     public static void main(String[] args) {
  +    String usage = "java org.apache.lucene.demo.DeleteFiles <unique_term>";
  +    if (args.length == 0) {
  +      System.err.println("Usage: " + usage);
  +      System.exit(1);
  +    }
       try {
  -      Directory directory = FSDirectory.getDirectory("demo index", false);
  +      Directory directory = FSDirectory.getDirectory("index", false);
         IndexReader reader = IndexReader.open(directory);
   
  -//       Term term = new Term("path", "pizza");
  -//       int deleted = reader.delete(term);
  +      Term term = new Term("path", args[0]);
  +      int deleted = reader.delete(term);
   
  -//       System.out.println("deleted " + deleted +
  -// 			 " documents containing " + term);
  +      System.out.println("deleted " + deleted +
  + 			 " documents containing " + term);
   
  -      for (int i = 0; i < reader.maxDoc(); i++)
  -	reader.delete(i);
  +      // one can also delete documents by their internal id:
  +      /*
  +      for (int i = 0; i < reader.maxDoc(); i++) {
  +        System.out.println("Deleting document with id " + i);
  +        reader.delete(i);
  +      }*/
   
         reader.close();
         directory.close();
  
  
  
  1.3       +4 -3      jakarta-lucene/src/demo/org/apache/lucene/demo/HTMLDocument.java
  
  Index: HTMLDocument.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/demo/org/apache/lucene/demo/HTMLDocument.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- HTMLDocument.java	29 Mar 2004 22:48:00 -0000	1.2
  +++ HTMLDocument.java	3 Aug 2004 21:49:24 -0000	1.3
  @@ -45,9 +45,10 @@
       // make a new, empty document
       Document doc = new Document();
   
  -    // Add the url as a field named "url".  Use an UnIndexed field, so
  -    // that the url is just stored with the document, but is not searchable.
  -    doc.add(Field.UnIndexed("url", f.getPath().replace(dirSep, '/')));
  +    // Add the url as a field named "path".  Use a Keyword field, so 
  +    // that it's searchable, but so that no attempt is made
  +    // to tokenize the field into words.
  +    doc.add(Field.Keyword("path", f.getPath().replace(dirSep, '/')));
   
       // Add the last modified date of the file a field named "modified".  Use a
       // Keyword field, so that it's searchable, but so that no attempt is made
  
  
  
  1.3       +6 -9      jakarta-lucene/src/demo/org/apache/lucene/demo/SearchFiles.java
  
  Index: SearchFiles.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/demo/org/apache/lucene/demo/SearchFiles.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- SearchFiles.java	29 Mar 2004 22:48:00 -0000	1.2
  +++ SearchFiles.java	3 Aug 2004 21:49:24 -0000	1.3
  @@ -16,7 +16,6 @@
    * limitations under the License.
    */
   
  -import java.io.IOException;
   import java.io.BufferedReader;
   import java.io.InputStreamReader;
   
  @@ -56,15 +55,13 @@
   	    Document doc = hits.doc(i);
   	    String path = doc.get("path");
   	    if (path != null) {
  -              System.out.println(i + ". " + path);
  -	    } else {
  -              String url = doc.get("url");
  -	      if (url != null) {
  -		System.out.println(i + ". " + url);
  -		System.out.println("   - " + doc.get("title"));
  -	      } else {
  -		System.out.println(i + ". " + "No path nor URL for this document");
  +              System.out.println((i+1) + ". " + path);
  +              String title = doc.get("title");
  +	      if (title != null) {
  +		System.out.println("   Title: " + doc.get("title"));
   	      }
  +	    } else {
  +	      System.out.println((i+1) + ". " + "No path for this document");
   	    }
   	  }
   
  
  
  
  1.4       +2 -2      jakarta-lucene/src/demo/org/apache/lucene/demo/IndexHTML.java
  
  Index: IndexHTML.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/demo/org/apache/lucene/demo/IndexHTML.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- IndexHTML.java	29 Mar 2004 22:48:00 -0000	1.3
  +++ IndexHTML.java	3 Aug 2004 21:49:24 -0000	1.4
  @@ -144,12 +144,12 @@
   	  uidIter.next();			  // keep matching docs
   	} else if (!deleting) {			  // add new docs
   	  Document doc = HTMLDocument.Document(file);
  -	  System.out.println("adding " + doc.get("url"));
  +	  System.out.println("adding " + doc.get("path"));
   	writer.addDocument(doc);
   	}
         } else {					  // creating a new index
   	Document doc = HTMLDocument.Document(file);
  -	System.out.println("adding " + doc.get("url"));
  +	System.out.println("adding " + doc.get("path"));
   	writer.addDocument(doc);		  // add docs unconditionally
         }
       }
  
  
  
  1.3       +4 -3      jakarta-lucene/src/demo/org/apache/lucene/demo/FileDocument.java
  
  Index: FileDocument.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/demo/org/apache/lucene/demo/FileDocument.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- FileDocument.java	29 Mar 2004 22:48:00 -0000	1.2
  +++ FileDocument.java	3 Aug 2004 21:49:24 -0000	1.3
  @@ -47,9 +47,10 @@
       // make a new, empty document
       Document doc = new Document();
   
  -    // Add the path of the file as a field named "path".  Use a Text field, so
  -    // that the index stores the path, and so that the path is searchable
  -    doc.add(Field.Text("path", f.getPath()));
  +    // Add the path of the file as a field named "path".  Use a
  +    // Keyword field, so that it's searchable, but so that no attempt is made
  +    // to tokenize the field into words.
  +    doc.add(Field.Keyword("path", f.getPath()));
   
       // Add the last modified date of the file a field named "modified".  Use a
       // Keyword field, so that it's searchable, but so that no attempt is made
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org


Mime
View raw message