lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dna...@apache.org
Subject cvs commit: jakarta-lucene/src/demo/org/apache/lucene/demo HTMLDocument.java
Date Fri, 06 Aug 2004 19:26:17 GMT
dnaber      2004/08/06 12:26:16

  Modified:    src/demo/org/apache/lucene/demo/html HTMLParser.java
                        HTMLParser.jj Test.java
               src/demo/org/apache/lucene/demo HTMLDocument.java
  Log:
  Use the HTMLParser constructor that takes a FileInputStream and make sure it gets closed.
This was not the case with the constructor that takes a File.
  Thus I deprecated that one. I guess the demo isn't part of the "official" API but there
are surely people who use it for more than just testing.
  PR: 28187
  
  Revision  Changes    Path
  1.7       +9 -6      jakarta-lucene/src/demo/org/apache/lucene/demo/html/HTMLParser.java
  
  Index: HTMLParser.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/demo/org/apache/lucene/demo/html/HTMLParser.java,v
  retrieving revision 1.6
  retrieving revision 1.7
  diff -u -r1.6 -r1.7
  --- HTMLParser.java	3 Aug 2004 21:32:49 -0000	1.6
  +++ HTMLParser.java	6 Aug 2004 19:26:16 -0000	1.7
  @@ -40,6 +40,9 @@
       }
     }
   
  +  /**
  +   * @deprecated Use HTMLParser(FileInputStream) instead
  +   */
     public HTMLParser(File file) throws FileNotFoundException {
       this(new FileInputStream(file));
     }
  @@ -450,15 +453,15 @@
       finally { jj_save(1, xla); }
     }
   
  -  final private boolean jj_3_1() {
  -    if (jj_scan_token(ArgQuote1)) return true;
  -    if (jj_scan_token(CloseQuote1)) return true;
  -    return false;
  -  }
  -
     final private boolean jj_3_2() {
       if (jj_scan_token(ArgQuote2)) return true;
       if (jj_scan_token(CloseQuote2)) return true;
  +    return false;
  +  }
  +
  +  final private boolean jj_3_1() {
  +    if (jj_scan_token(ArgQuote1)) return true;
  +    if (jj_scan_token(CloseQuote1)) return true;
       return false;
     }
   
  
  
  
  1.8       +3 -0      jakarta-lucene/src/demo/org/apache/lucene/demo/html/HTMLParser.jj
  
  Index: HTMLParser.jj
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/demo/org/apache/lucene/demo/html/HTMLParser.jj,v
  retrieving revision 1.7
  retrieving revision 1.8
  diff -u -r1.7 -r1.8
  --- HTMLParser.jj	3 Aug 2004 21:32:50 -0000	1.7
  +++ HTMLParser.jj	6 Aug 2004 19:26:16 -0000	1.8
  @@ -104,6 +104,9 @@
       }
     }
   
  +  /**
  +   * @deprecated Use HTMLParser(FileInputStream) instead
  +   */
     public HTMLParser(File file) throws FileNotFoundException {
       this(new FileInputStream(file));
     }
  
  
  
  1.3       +15 -8     jakarta-lucene/src/demo/org/apache/lucene/demo/html/Test.java
  
  Index: Test.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/demo/org/apache/lucene/demo/html/Test.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- Test.java	29 Mar 2004 22:48:00 -0000	1.2
  +++ Test.java	6 Aug 2004 19:26:16 -0000	1.3
  @@ -19,7 +19,7 @@
   import java.io.*;
   
   class Test {
  -  public static void main(String[] argv) throws Exception {
  +  public static void main(String[] argv) throws IOException, InterruptedException {
       if ("-dir".equals(argv[0])) {
         String[] files = new File(argv[1]).list();
         java.util.Arrays.sort(files);
  @@ -32,12 +32,19 @@
         parse(new File(argv[0]));
     }
   
  -  public static void parse(File file) throws Exception {
  -    HTMLParser parser = new HTMLParser(file);
  -    System.out.println("Title: " + Entities.encode(parser.getTitle()));
  -    System.out.println("Summary: " + Entities.encode(parser.getSummary()));
  -    LineNumberReader reader = new LineNumberReader(parser.getReader());
  -    for (String l = reader.readLine(); l != null; l = reader.readLine())
  -      System.out.println(l);
  +  public static void parse(File file) throws IOException, InterruptedException {
  +    FileInputStream fis = null;
  +    try {
  +      fis = new FileInputStream(file);
  +      HTMLParser parser = new HTMLParser(fis);
  +      System.out.println("Title: " + Entities.encode(parser.getTitle()));
  +      System.out.println("Summary: " + Entities.encode(parser.getSummary()));
  +      System.out.println("Content:");
  +      LineNumberReader reader = new LineNumberReader(parser.getReader());
  +      for (String l = reader.readLine(); l != null; l = reader.readLine())
  +        System.out.println(l);
  +    } finally {
  +      if (fis != null) fis.close();
  +    }
     }
   }
  
  
  
  1.4       +18 -11    jakarta-lucene/src/demo/org/apache/lucene/demo/HTMLDocument.java
  
  Index: HTMLDocument.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/demo/org/apache/lucene/demo/HTMLDocument.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- HTMLDocument.java	3 Aug 2004 21:49:24 -0000	1.3
  +++ HTMLDocument.java	6 Aug 2004 19:26:16 -0000	1.4
  @@ -61,19 +61,26 @@
       // tokenized prior to indexing.
       doc.add(new Field("uid", uid(f), false, true, false));
   
  -    HTMLParser parser = new HTMLParser(f);
  +    FileInputStream fis = null;
  +    try {
  +      fis = new FileInputStream(f);
  +      HTMLParser parser = new HTMLParser(fis);
  +      
  +      // Add the tag-stripped contents as a Reader-valued Text field so it will
  +      // get tokenized and indexed.
  +      doc.add(Field.Text("contents", parser.getReader()));
   
  -    // Add the tag-stripped contents as a Reader-valued Text field so it will
  -    // get tokenized and indexed.
  -    doc.add(Field.Text("contents", parser.getReader()));
  +      // Add the summary as an UnIndexed field, so that it is stored and returned
  +      // with hit documents for display.
  +      doc.add(Field.UnIndexed("summary", parser.getSummary()));
   
  -    // Add the summary as an UnIndexed field, so that it is stored and returned
  -    // with hit documents for display.
  -    doc.add(Field.UnIndexed("summary", parser.getSummary()));
  -
  -    // Add the title as a separate Text field, so that it can be searched
  -    // separately.
  -    doc.add(Field.Text("title", parser.getTitle()));
  +      // Add the title as a separate Text field, so that it can be searched
  +      // separately.
  +      doc.add(Field.Text("title", parser.getTitle()));
  +    } finally {
  +      if (fis != null)
  +        fis.close();
  +    }
   
       // return the document
       return doc;
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org


Mime
View raw message