lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ehatc...@apache.org
Subject cvs commit: jakarta-lucene-sandbox/contributions/ant/src/main/org/apache/lucene/ant ConfigurableDocumentHandler.java DocumentHandler.java DocumentHandlerException.java FileExtensionDocumentHandler.java HtmlDocument.java IndexTask.java TextDocument.java
Date Mon, 19 Jan 2004 14:58:33 GMT
ehatcher    2004/01/19 06:58:33

  Modified:    contributions/ant build.xml
               contributions/ant/src/main/org/apache/lucene/ant
                        DocumentHandler.java DocumentHandlerException.java
                        FileExtensionDocumentHandler.java HtmlDocument.java
                        IndexTask.java TextDocument.java
  Added:       contributions/ant/src/main/org/apache/lucene/ant
                        ConfigurableDocumentHandler.java
  Log:
  upgrade index task to some ancient enhancements i had locally
  
  Revision  Changes    Path
  1.4       +23 -1     jakarta-lucene-sandbox/contributions/ant/build.xml
  
  Index: build.xml
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/ant/build.xml,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- build.xml	5 Jan 2004 15:54:53 -0000	1.3
  +++ build.xml	19 Jan 2004 14:58:33 -0000	1.4
  @@ -1,5 +1,4 @@
   <?xml version="1.0"?>
  -
   <project name="lucene-ant" default="default">
   
     <description>
  @@ -18,5 +17,28 @@
   
     <property name="src.dir" location="src/main"/>
   
  +  <!-- alias classpath for cleaner example in index target -->
  +  <path id="index.classpath">
  +    <path refid="test.classpath"/>
  +  </path>
  +
     <import file="../common.xml"/>
  +
  +  <property name="index.dir" location="${test.output.dir}/index"/>
  +  <property name="files.dir" location="${test.src.dir}"/>
  +
  +  <target name="index" depends="compile">
  +    <taskdef name="index"
  +      classname="org.apache.lucene.ant.IndexTask"
  +      classpathref="index.classpath"
  +    />
  +<!--    <typedef file="src/main/org/apache/lucene/ant/antlib.xml"
  +      uri="lucene:/org/apache/lucene/ant"
  +      classpathref="index.classpath"/> -->
  +
  +    <index index="${index.dir}">
  +      <fileset dir="${files.dir}"/>
  +    </index>
  +  </target>
  +
   </project>
  
  
  
  1.2       +4 -2      jakarta-lucene-sandbox/contributions/ant/src/main/org/apache/lucene/ant/DocumentHandler.java
  
  Index: DocumentHandler.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/ant/src/main/org/apache/lucene/ant/DocumentHandler.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- DocumentHandler.java	11 Jul 2002 01:12:30 -0000	1.1
  +++ DocumentHandler.java	19 Jan 2004 14:58:33 -0000	1.2
  @@ -1,8 +1,9 @@
   package org.apache.lucene.ant;
   
  -import java.io.File;
   import org.apache.lucene.document.Document;
   
  +import java.io.File;
  +
   /**
    *  Allows a class to act as a Lucene document handler
    *
  @@ -10,6 +11,7 @@
    *@created    October 27, 2001
    */
   public interface DocumentHandler {
  +
       /**
        *  Gets the document attribute of the DocumentHandler object
        *
  @@ -18,6 +20,6 @@
        *@throws DocumentHandlerException
        */
       public Document getDocument(File file)
  -                                  throws DocumentHandlerException;
  +            throws DocumentHandlerException;
   }
   
  
  
  
  1.2       +19 -9     jakarta-lucene-sandbox/contributions/ant/src/main/org/apache/lucene/ant/DocumentHandlerException.java
  
  Index: DocumentHandlerException.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/ant/src/main/org/apache/lucene/ant/DocumentHandlerException.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- DocumentHandlerException.java	11 Jul 2002 01:12:30 -0000	1.1
  +++ DocumentHandlerException.java	19 Jan 2004 14:58:33 -0000	1.2
  @@ -5,32 +5,42 @@
   
   /**
    */
  -public class DocumentHandlerException extends Exception
  -{
  +public class DocumentHandlerException extends Exception {
       private Throwable cause;
  -    
  +
  +    /**
  +     * Default constructor.
  +     */
       public DocumentHandlerException() {
           super();
       }
  -    
  +
  +    /**
  +     * Constructs with message.
  +     */
       public DocumentHandlerException(String message) {
           super(message);
       }
  -    
  +
  +    /**
  +     * Constructs with chained exception.
  +     */
       public DocumentHandlerException(Throwable cause) {
           super(cause.toString());
           this.cause = cause;
       }
  -    
  +
  +    /**
  +     * Retrieves nested exception.
  +     */
       public Throwable getException() {
           return cause;
       }
   
  -    // Override stack trace methods to show original cause:
       public void printStackTrace() {
           printStackTrace(System.err);
       }
  -    
  +
       public void printStackTrace(PrintStream ps) {
           synchronized (ps) {
               super.printStackTrace(ps);
  @@ -40,7 +50,7 @@
               }
           }
       }
  -    
  +
       public void printStackTrace(PrintWriter pw) {
           synchronized (pw) {
               super.printStackTrace(pw);
  
  
  
  1.2       +9 -9      jakarta-lucene-sandbox/contributions/ant/src/main/org/apache/lucene/ant/FileExtensionDocumentHandler.java
  
  Index: FileExtensionDocumentHandler.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/ant/src/main/org/apache/lucene/ant/FileExtensionDocumentHandler.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- FileExtensionDocumentHandler.java	11 Jul 2002 01:12:30 -0000	1.1
  +++ FileExtensionDocumentHandler.java	19 Jan 2004 14:58:33 -0000	1.2
  @@ -1,19 +1,20 @@
   package org.apache.lucene.ant;
   
  -import java.io.File;
   import org.apache.lucene.document.Document;
   
  +import java.io.File;
  +
   /**
  - *  Decides which class used to create the Lucene Document
  - *  object based on its file extension.
  + *  A DocumentHandler implementation to delegate responsibility to
  + *  based on a files extension.  Currently only .html and .txt
  + *  files are handled, other extensions ignored.
    *
    *@author     Erik Hatcher
    *@created    October 28, 2001
  - *@todo Add dynamic file extension/classname mappings for
  - *      extensibility
  + *@todo Implement dynamic document type lookup
    */
   public class FileExtensionDocumentHandler
  -                                       implements DocumentHandler {
  +        implements DocumentHandler {
       /**
        *  Gets the document attribute of the
        *  FileExtensionDocumentHandler object
  @@ -25,7 +26,7 @@
        *      Exception
        */
       public Document getDocument(File file)
  -                                  throws DocumentHandlerException {
  +            throws DocumentHandlerException {
           Document doc = null;
   
           String name = file.getName();
  @@ -38,8 +39,7 @@
               if (name.endsWith(".html")) {
                   doc = HtmlDocument.Document(file);
               }
  -        }
  -        catch (java.io.IOException e) {
  +        } catch (java.io.IOException e) {
               throw new DocumentHandlerException(e);
           }
   
  
  
  
  1.2       +13 -18    jakarta-lucene-sandbox/contributions/ant/src/main/org/apache/lucene/ant/HtmlDocument.java
  
  Index: HtmlDocument.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/ant/src/main/org/apache/lucene/ant/HtmlDocument.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- HtmlDocument.java	11 Jul 2002 01:12:30 -0000	1.1
  +++ HtmlDocument.java	19 Jan 2004 14:58:33 -0000	1.2
  @@ -1,5 +1,12 @@
   package org.apache.lucene.ant;
   
  +import org.apache.lucene.document.Field;
  +import org.w3c.dom.Element;
  +import org.w3c.dom.Node;
  +import org.w3c.dom.NodeList;
  +import org.w3c.dom.Text;
  +import org.w3c.tidy.Tidy;
  +
   import java.io.BufferedReader;
   import java.io.File;
   import java.io.FileInputStream;
  @@ -8,18 +15,6 @@
   import java.io.InputStream;
   import java.io.StringWriter;
   
  -// Imports commented out since there is a name clash and fully
  -// qualified class names will be used in the code.  Imports are
  -// left for ease of maintenance.
  -import org.apache.lucene.document.Field;
  -//import org.apache.lucene.document.Document;
  -//import org.w3c.dom.Document;
  -import org.w3c.dom.Element;
  -import org.w3c.dom.Node;
  -import org.w3c.dom.NodeList;
  -import org.w3c.dom.Text;
  -import org.w3c.tidy.Tidy;
  -
   /**
    *  The <code>HtmlDocument</code> class creates a Lucene {@link
    *  org.apache.lucene.document.Document} from an HTML document. <P>
  @@ -51,8 +46,8 @@
           Tidy tidy = new Tidy();
           tidy.setQuiet(true);
           tidy.setShowWarnings(false);
  -        org.w3c.dom.Document root = 
  -                    tidy.parseDOM(new FileInputStream(file), null);
  +        org.w3c.dom.Document root =
  +                tidy.parseDOM(new FileInputStream(file), null);
           rawDoc = root.getDocumentElement();
       }
   
  @@ -84,7 +79,7 @@
        *@exception  IOException
        */
       public static org.apache.lucene.document.Document
  -                   getDocument(InputStream is) throws IOException {
  +            getDocument(InputStream is) throws IOException {
           HtmlDocument htmlDoc = new HtmlDocument(is);
           org.apache.lucene.document.Document luceneDoc =
                   new org.apache.lucene.document.Document();
  @@ -109,7 +104,7 @@
        *@exception  IOException
        */
       public static org.apache.lucene.document.Document
  -                           Document(File file) throws IOException {
  +            Document(File file) throws IOException {
           HtmlDocument htmlDoc = new HtmlDocument(file);
           org.apache.lucene.document.Document luceneDoc =
                   new org.apache.lucene.document.Document();
  @@ -119,7 +114,7 @@
   
           String contents = null;
           BufferedReader br =
  -                          new BufferedReader(new FileReader(file));
  +                new BufferedReader(new FileReader(file));
           StringWriter sw = new StringWriter();
           String line = br.readLine();
           while (line != null) {
  @@ -153,7 +148,7 @@
   //         System.out.println("Body  = " + doc.getBody());
   
           HtmlDocument doc =
  -          new HtmlDocument(new FileInputStream(new File(args[0])));
  +                new HtmlDocument(new FileInputStream(new File(args[0])));
           System.out.println("Title = " + doc.getTitle());
           System.out.println("Body  = " + doc.getBody());
       }
  
  
  
  1.3       +331 -231  jakarta-lucene-sandbox/contributions/ant/src/main/org/apache/lucene/ant/IndexTask.java
  
  Index: IndexTask.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/ant/src/main/org/apache/lucene/ant/IndexTask.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- IndexTask.java	5 Jan 2004 15:45:55 -0000	1.2
  +++ IndexTask.java	19 Jan 2004 14:58:33 -0000	1.3
  @@ -1,12 +1,11 @@
   package org.apache.lucene.ant;
   
  -import java.io.File;
  -import java.io.IOException;
  -import java.util.Date;
  -import java.util.Vector;
  -
   import org.apache.lucene.analysis.Analyzer;
   import org.apache.lucene.analysis.StopAnalyzer;
  +import org.apache.lucene.analysis.SimpleAnalyzer;
  +import org.apache.lucene.analysis.WhitespaceAnalyzer;
  +import org.apache.lucene.analysis.de.GermanAnalyzer;
  +import org.apache.lucene.analysis.standard.StandardAnalyzer;
   import org.apache.lucene.document.DateField;
   import org.apache.lucene.document.Document;
   import org.apache.lucene.document.Field;
  @@ -16,271 +15,372 @@
   import org.apache.lucene.search.IndexSearcher;
   import org.apache.lucene.search.Searcher;
   import org.apache.lucene.search.TermQuery;
  -
   import org.apache.tools.ant.BuildException;
   import org.apache.tools.ant.DirectoryScanner;
  +import org.apache.tools.ant.DynamicConfigurator;
   import org.apache.tools.ant.Project;
   import org.apache.tools.ant.Task;
   import org.apache.tools.ant.types.FileSet;
  +import org.apache.tools.ant.types.EnumeratedAttribute;
  +
  +import java.io.File;
  +import java.io.IOException;
  +import java.util.Date;
  +import java.util.Properties;
  +import java.util.Map;
  +import java.util.HashMap;
  +import java.util.Set;
  +import java.util.ArrayList;
   
   /**
  - * Builds a Lucene index from a fileset.
  + *  Ant task to index files with Lucene
    *
  - * @author     Erik Hatcher
  + *@author Erik Hatcher
    */
   public class IndexTask extends Task {
  -    /**
  -     *  file list
  -     */
  -    private Vector filesets = new Vector();
  +  /**
  +   *  file list
  +   */
  +  private ArrayList filesets = new ArrayList();
  +
  +  /**
  +   *  overwrite index?
  +   */
  +  private boolean overwrite = false;
  +
  +  /**
  +   *  index path
  +   */
  +  private File indexDir;
  +
  +  /**
  +   *  document handler classname
  +   */
  +  private String handlerClassName =
  +    FileExtensionDocumentHandler.class.getName();
  +
  +  /**
  +   *  document handler instance
  +   */
  +  private DocumentHandler handler;
  +
  +
  +  /**
  +   *
  +   */
  +  private String analyzerClassName =
  +    StandardAnalyzer.class.getName();
  +
  +  /**
  +   *  analyzer instance
  +   */
  +  private Analyzer analyzer;
  +
  +  /**
  +   *  Lucene merge factor
  +   */
  +  private int mergeFactor = 20;
  +
  +  private HandlerConfig handlerConfig;
  +
  +
  +  /**
  +   *  Creates new instance
  +   */
  +  public IndexTask() {
  +  }
  +
  +
  +  /**
  +   *  Specifies the directory where the index will be stored
  +   */
  +  public void setIndex(File indexDir) {
  +    this.indexDir = indexDir;
  +  }
  +
  +
  +  /**
  +   *  Sets the mergeFactor attribute of the IndexTask object
  +   *
  +   *@param  mergeFactor  The new mergeFactor value
  +   */
  +  public void setMergeFactor(int mergeFactor) {
  +    this.mergeFactor = mergeFactor;
  +  }
  +
  +
  +  /**
  +   *  Sets the overwrite attribute of the IndexTask object
  +   *
  +   *@param  overwrite  The new overwrite value
  +   */
  +  public void setOverwrite(boolean overwrite) {
  +    this.overwrite = overwrite;
  +  }
  +
  +
  +  /**
  +   *  Sets the documentHandler attribute of the IndexTask object
  +   *
  +   *@param  classname  The new documentHandler value
  +   */
  +  public void setDocumentHandler(String classname) {
  +    handlerClassName = classname;
  +  }
  +
  +  /**
  +   * Sets the analyzer based on the builtin Lucene analyzer types.
  +   *
  +   * @todo Enforce analyzer and analyzerClassName to be mutually exclusive
  +   */
  +  public void setAnalyzer(AnalyzerType type) {
  +    analyzerClassName = type.getClassname();
  +  }
  +
  +  public void setAnalyzerClassName(String classname) {
  +    analyzerClassName = classname;
  +  }
  +
  +  /**
  +   *  Adds a set of files (nested fileset attribute).
  +   *
  +   *@param  set  FileSet to be added
  +   */
  +  public void addFileset(FileSet set) {
  +    filesets.add(set);
  +  }
  +
  +  /**
  +   * Sets custom properties for a configurable document handler.
  +   */
  +  public void addConfig(HandlerConfig config) throws BuildException {
  +    if (handlerConfig != null) {
  +      throw new BuildException("Only one config element allowed");
  +    }
   
  -    /**
  -     *  overwrite index?
  -     */
  -    private boolean overwrite = false;
  +    handlerConfig = config;
  +  }
   
  -    /**
  -     *  index path
  -     */
  -    private File indexPath;
   
  -    /**
  -     *  document handler classname
  -     */
  -    private String handlerClassName =
  -            "org.apache.lucene.ant.FileExtensionDocumentHandler";
  -
  -    /**
  -     *  document handler instance
  -     */
  -    private DocumentHandler handler;
  -
  -    /**
  -     *  Lucene merge factor
  -     */
  -    private int mergeFactor = 20;
  +  /**
  +   *  Begins the indexing
  +   *
  +   *@exception  BuildException  If an error occurs indexing the
  +   *      fileset
  +   */
  +  public void execute() throws BuildException {
  +
  +    // construct handler and analyzer dynamically
  +    try {
  +      Class clazz = Class.forName(handlerClassName);
  +      handler = (DocumentHandler) clazz.newInstance();
  +
  +      clazz = Class.forName(analyzerClassName);
  +      analyzer = (Analyzer) clazz.newInstance();
  +    } catch (ClassNotFoundException cnfe) {
  +      throw new BuildException(cnfe);
  +    } catch (InstantiationException ie) {
  +      throw new BuildException(ie);
  +    } catch (IllegalAccessException iae) {
  +      throw new BuildException(iae);
  +    }
   
  +    log("Document handler = " + handler.getClass(), Project.MSG_VERBOSE);
  +    log("Analyzer = " + analyzer.getClass(), Project.MSG_VERBOSE);
   
  -    /**
  -     *  Specifies the directory where the index will be stored
  -     *
  -     * @param  indexPath  The new index value
  -     */
  -    public void setIndex(File indexPath) {
  -        this.indexPath = indexPath;
  +    if (handler instanceof ConfigurableDocumentHandler) {
  +      ((ConfigurableDocumentHandler) handler).configure(handlerConfig.getProperties());
       }
   
  -    /**
  -     *  Sets the mergeFactor attribute of the IndexTask object
  -     *
  -     *@param  mergeFactor  The new mergeFactor value
  -     */
  -    public void setMergeFactor(int mergeFactor) {
  -        this.mergeFactor = mergeFactor;
  +    try {
  +      indexDocs();
  +    } catch (IOException e) {
  +      throw new BuildException(e);
       }
  +  }
   
   
  -    /**
  -     * If true, index will be overwritten.
  -     *
  -     * @param  overwrite  The new overwrite value
  -     */
  -    public void setOverwrite(boolean overwrite) {
  -        this.overwrite = overwrite;
  +  /**
  +   * Index the fileset.
  +   *
  +   *@exception  IOException if Lucene I/O exception
  +   *@todo refactor!!!!!
  +   */
  +  private void indexDocs() throws IOException {
  +    Date start = new Date();
  +
  +    boolean create = overwrite;
  +    // If the index directory doesn't exist,
  +    // create it and force create mode
  +    if (indexDir.mkdirs() && !overwrite) {
  +      create = true;
       }
   
  -
  -    /**
  -     * Classname of document handler.
  -     *
  -     * @param  classname  The new documentHandler value
  -     */
  -    public void setDocumentHandler(String classname) {
  -        handlerClassName = classname;
  +    Searcher searcher = null;
  +    boolean checkLastModified = false;
  +    if (!create) {
  +      try {
  +        searcher = new IndexSearcher(indexDir.getAbsolutePath());
  +        checkLastModified = true;
  +      } catch (IOException ioe) {
  +        log("IOException: " + ioe.getMessage());
  +        // Empty - ignore, which indicates to index all
  +        // documents
  +      }
       }
   
  +    log("checkLastModified = " + checkLastModified, Project.MSG_VERBOSE);
   
  -    /**
  -     *  Adds a set of files.
  -     *
  -     * @param  set  FileSet to be added
  -     */
  -    public void addFileset(FileSet set) {
  -        filesets.addElement(set);
  -    }
  +    IndexWriter writer =
  +      new IndexWriter(indexDir, analyzer, create);
  +    int totalFiles = 0;
  +    int totalIndexed = 0;
  +    int totalIgnored = 0;
  +    try {
  +      writer.mergeFactor = mergeFactor;
  +
  +      for (int i = 0; i < filesets.size(); i++) {
  +        FileSet fs = (FileSet) filesets.get(i);
  +        if (fs != null) {
  +          DirectoryScanner ds =
  +            fs.getDirectoryScanner(getProject());
  +          String[] dsfiles = ds.getIncludedFiles();
  +          File baseDir = ds.getBasedir();
  +
  +          for (int j = 0; j < dsfiles.length; j++) {
  +            File file = new File(baseDir, dsfiles[j]);
  +            totalFiles++;
  +
  +            if (!file.exists() || !file.canRead()) {
  +              throw new BuildException("File \"" +
  +                                       file.getAbsolutePath()
  +                                       + "\" does not exist or is not readable.");
  +            }
   
  +            boolean indexIt = true;
   
  -    /**
  -     *  Begins the indexing
  -     *
  -     * @exception  BuildException  If an error occurs indexing the
  -     *      fileset
  -     * @todo add classpath handling so handler does not
  -     *       have to be in system classpath
  -     */
  -    public void execute() throws BuildException {
  -        try {
  -            Class clazz = Class.forName(handlerClassName);
  -            handler = (DocumentHandler) clazz.newInstance();
  -        }
  -        catch (ClassNotFoundException cnfe) {
  -            throw new BuildException(cnfe);
  -        }
  -        catch (InstantiationException ie) {
  -            throw new BuildException(ie);
  -        }
  -        catch (IllegalAccessException iae) {
  -            throw new BuildException(iae);
  -        }
  +            if (checkLastModified) {
  +              Hits hits = null;
  +              Term pathTerm =
  +                new Term("path", file.getPath());
  +              TermQuery query =
  +                new TermQuery(pathTerm);
  +              hits = searcher.search(query);
  +
  +              // if document is found, compare the
  +              // indexed last modified time with the
  +              // current file
  +              // - don't index if up to date
  +              if (hits.length() > 0) {
  +                Document doc = hits.doc(0);
  +                String indexModified =
  +                  doc.get("modified").trim();
  +                if (indexModified != null) {
  +                  if (DateField.stringToTime(indexModified)
  +                    == file.lastModified()) {
  +                    indexIt = false;
  +                  }
  +                }
  +              }
  +            }
   
  -        try {
  -            indexDocs();
  -        }
  -        catch (IOException e) {
  -            throw new BuildException(e);
  +            if (indexIt) {
  +              try {
  +                log("Indexing " + file.getPath(),
  +                    Project.MSG_VERBOSE);
  +                Document doc =
  +                  handler.getDocument(file);
  +
  +                if (doc == null) {
  +                  totalIgnored++;
  +                } else {
  +                  // Add the path of the file as a field named "path".  Use a Text field,
so
  +                  // that the index stores the path, and so that the path is searchable
  +                  doc.add(Field.Keyword("path", file.getPath()));
  +
  +                  // Add the last modified date of the file a field named "modified". 
Use a
  +                  // Keyword field, so that it's searchable, but so that no attempt is
made
  +                  // to tokenize the field into words.
  +                  doc.add(Field.Keyword("modified",
  +                                        DateField.timeToString(file.lastModified())));
  +
  +                  writer.addDocument(doc);
  +                  totalIndexed++;
  +                }
  +              } catch (DocumentHandlerException e) {
  +                throw new BuildException(e);
  +              }
  +            }
  +          }
  +          // for j
           }
  +        // if (fs != null)
  +      }
  +      // for i
  +
  +      writer.optimize();
  +    }
  +      //try
  +    finally {
  +      // always make sure everything gets closed,
  +      // no matter how we exit.
  +      writer.close();
  +      if (searcher != null) {
  +        searcher.close();
  +      }
       }
   
  +    Date end = new Date();
   
  -    /**
  -     *  index the fileset
  -     *
  -     * @exception  IOException  Description of Exception
  -     * @todo refactor - definitely lots of room for improvement here
  -     */
  -    private void indexDocs() throws IOException {
  -        Date start = new Date();
  +    log(totalIndexed + " out of " + totalFiles + " indexed (" +
  +        totalIgnored + " ignored) in " + (end.getTime() - start.getTime()) +
  +        " milliseconds");
  +  }
   
  -        boolean create = overwrite;
  -        // If the index directory doesn't exist,
  -        // create it and force create mode
  -        if (indexPath.mkdirs() && !overwrite) {
  -            create = true;
  -        }
  +  public static class HandlerConfig implements DynamicConfigurator {
  +    Properties props = new Properties();
   
  -        Searcher searcher = null;
  -        Analyzer analyzer = new StopAnalyzer();
  -        boolean checkLastModified = false;
  -        if (!create) {
  -            try {
  -                searcher = new IndexSearcher(indexPath.getAbsolutePath());
  -                checkLastModified = true;
  -            }
  -            catch (IOException ioe) {
  -                log("IOException: " + ioe.getMessage());
  -                // Empty - ignore, which indicates to index all
  -                // documents
  -            }
  -        }
  +    public void setDynamicAttribute(String attributeName, String value) throws BuildException
{
  +      props.setProperty(attributeName, value);
  +    }
   
  -        log("checkLastModified = " + checkLastModified);
  +    public Object createDynamicElement(String elementName) throws BuildException {
  +      throw new BuildException("Sub elements not supported");
  +    }
   
  -        IndexWriter writer =
  -                       new IndexWriter(indexPath, analyzer, create);
  -        int totalFiles = 0;
  -        int totalIndexed = 0;
  -        int totalIgnored = 0;
  -        try {
  -            writer.mergeFactor = mergeFactor;
  -
  -            for (int i = 0; i < filesets.size(); i++) {
  -                FileSet fs = (FileSet) filesets.elementAt(i);
  -                if (fs != null) {
  -                    DirectoryScanner ds =
  -                                   fs.getDirectoryScanner(getProject());
  -                    String[] dsfiles = ds.getIncludedFiles();
  -                    File baseDir = ds.getBasedir();
  -
  -                    for (int j = 0; j < dsfiles.length; j++) {
  -                        File file = new File(baseDir, dsfiles[j]);
  -                        totalFiles++;
  -
  -                        if (!file.exists() || !file.canRead()) {
  -                            throw new BuildException("File \"" +
  -                        file.getAbsolutePath()
  -                        + "\" does not exist or is not readable.");
  -                        }
  -
  -                        boolean indexIt = true;
  -
  -                        if (checkLastModified) {
  -                            Hits hits = null;
  -                            Term pathTerm =
  -                                  new Term("path", file.getPath());
  -                            TermQuery query =
  -                                           new TermQuery(pathTerm);
  -                            hits = searcher.search(query);
  -
  -                            // if document is found, compare the
  -                            // indexed last modified time with the
  -                            // current file
  -                            // - don't index if up to date
  -                            if (hits.length() > 0) {
  -                                Document doc = hits.doc(0);
  -                                String indexModified =
  -                                               doc.get("modified");
  -                                if (indexModified != null) {
  -                                    if (DateField.stringToTime(indexModified)
  -                                             == file.lastModified()) {
  -                                        indexIt = false;
  -                                    }
  -                                }
  -                            }
  -                        }
  -
  -                        if (indexIt) {
  -                            try {
  -                                log("Indexing " + file.getPath(),
  -                                    Project.MSG_VERBOSE);
  -                                Document doc =
  -                                         handler.getDocument(file);
  -
  -                                if (doc == null) {
  -                                    totalIgnored++;
  -                                }
  -                                else {
  -                                    // Add the path of the file as a field named "path".
 Use a Text field, so
  -                                    // that the index stores the path, and so that the
path is searchable
  -                                    doc.add(Field.Keyword("path", file.getPath()));
  -
  -                                    // Add the last modified date of the file a field named
"modified".  Use a
  -                                    // Keyword field, so that it's searchable, but so that
no attempt is made
  -                                    // to tokenize the field into words.
  -                                    doc.add(Field.Keyword("modified",
  -                                            DateField.timeToString(file.lastModified())));
  -
  -                                    writer.addDocument(doc);
  -                                    totalIndexed++;
  -                                }
  -                            }
  -                            catch (DocumentHandlerException e) {
  -                                throw new BuildException(e);
  -                            }
  -                        }
  -                    }
  -                    // for j
  -                }
  -                // if (fs != null)
  -            }
  -            // for i
  +    public Properties getProperties() {
  +      return props;
  +    }
  +  }
   
  -            writer.optimize();
  -        }
  -        //try
  -        finally {
  -            // always make sure everything gets closed,
  -            // no matter how we exit.
  -            writer.close();
  -            if (searcher != null) {
  -                searcher.close();
  -            }
  -        }
  +  /**
  +   * @todo - the RusionAnalyzer requires a constructor argument
  +   *         so its being removed from here until a mechanism
  +   *         is developed to pass ctor info somehow
  +   */
  +  public static class AnalyzerType extends EnumeratedAttribute {
  +    private static Map analyzerLookup = new HashMap();
  +
  +    static {
  +      analyzerLookup.put("simple", SimpleAnalyzer.class.getName());
  +      analyzerLookup.put("standard", StandardAnalyzer.class.getName());
  +      analyzerLookup.put("stop", StopAnalyzer.class.getName());
  +      analyzerLookup.put("whitespace", WhitespaceAnalyzer.class.getName());
  +      analyzerLookup.put("german", GermanAnalyzer.class.getName());
  +//            analyzerLookup.put("russian", RussianAnalyzer.class.getName());
  +    }
   
  -        Date end = new Date();
  +    /**
  +     * @see EnumeratedAttribute#getValues
  +     */
  +    public String[] getValues() {
  +      Set keys = analyzerLookup.keySet();
  +      return (String[]) keys.toArray(new String[0]);
  +    }
   
  -        log(totalIndexed + " out of " + totalFiles + " indexed (" +
  -                totalIgnored + " ignored) in " + (end.getTime() - start.getTime()) +
  -                " milliseconds");
  +    public String getClassname() {
  +      return (String) analyzerLookup.get(getValue());
       }
  +  }
   }
   
  
  
  
  1.2       +6 -5      jakarta-lucene-sandbox/contributions/ant/src/main/org/apache/lucene/ant/TextDocument.java
  
  Index: TextDocument.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/ant/src/main/org/apache/lucene/ant/TextDocument.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- TextDocument.java	11 Jul 2002 01:12:30 -0000	1.1
  +++ TextDocument.java	19 Jan 2004 14:58:33 -0000	1.2
  @@ -1,14 +1,14 @@
   package org.apache.lucene.ant;
   
  +import org.apache.lucene.document.Document;
  +import org.apache.lucene.document.Field;
  +
   import java.io.BufferedReader;
   import java.io.File;
   import java.io.FileReader;
   import java.io.IOException;
   import java.io.StringWriter;
   
  -import org.apache.lucene.document.Document;
  -import org.apache.lucene.document.Field;
  -
   /**
    *  A utility for making Lucene Documents from a File.
    *
  @@ -62,9 +62,10 @@
           // make a new, empty document
           Document doc = new Document();
   
  +        doc.add(Field.Text("title", f.getName()));
           doc.add(Field.Text("contents", textDoc.getContents()));
  -        doc.add(Field.UnIndexed("rawcontents", 
  -                                           textDoc.getContents()));
  +        doc.add(Field.UnIndexed("rawcontents",
  +                textDoc.getContents()));
   
           // return the document
           return doc;
  
  
  
  1.1                  jakarta-lucene-sandbox/contributions/ant/src/main/org/apache/lucene/ant/ConfigurableDocumentHandler.java
  
  Index: ConfigurableDocumentHandler.java
  ===================================================================
  package org.apache.lucene.ant;
  
  import java.util.Properties;
  
  public interface ConfigurableDocumentHandler extends DocumentHandler {
      void configure(Properties props);
  }
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org


Mime
View raw message