lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kelv...@apache.org
Subject cvs commit: jakarta-lucene-sandbox/projects/appex/src/java/search/contenthandler FileContentHandler.java FileContentHandlerAdapter.java FileContentHandlerFactory.java GZipHandler.java NestedFileContentHandlerAdapter.java NullHandler.java TARHandler.java TextHandler.java ZIPHandler.java
Date Wed, 08 May 2002 15:52:38 GMT
kelvint     02/05/08 08:52:38

  Modified:    projects/appex/src/java/search/contenthandler
                        FileContentHandler.java
                        FileContentHandlerAdapter.java
                        FileContentHandlerFactory.java GZipHandler.java
                        NestedFileContentHandlerAdapter.java
                        NullHandler.java TARHandler.java TextHandler.java
                        ZIPHandler.java
  Log:
  Importing the classes seem to have warped the whitespaces. Here's my attempt to get things back to normal.
  
  Introduced new datasource and contenthandler mechanism. It's quite a major alteration for individual changes to be enumerated.
  
  Revision  Changes    Path
  1.2       +88 -86    jakarta-lucene-sandbox/projects/appex/src/java/search/contenthandler/FileContentHandler.java
  
  Index: FileContentHandler.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/projects/appex/src/java/search/contenthandler/FileContentHandler.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- FileContentHandler.java	4 May 2002 15:43:46 -0000	1.1
  +++ FileContentHandler.java	8 May 2002 15:52:37 -0000	1.2
  @@ -1,86 +1,88 @@
  -package search.contenthandler;
  -
  -/* ====================================================================
  - * The Apache Software License, Version 1.1
  - *
  - * Copyright (c) 2001 The Apache Software Foundation.  All rights
  - * reserved.
  - *
  - * Redistribution and use in source and binary forms, with or without
  - * modification, are permitted provided that the following conditions
  - * are met:
  - *
  - * 1. Redistributions of source code must retain the above copyright
  - *    notice, this list of conditions and the following disclaimer.
  - *
  - * 2. Redistributions in binary form must reproduce the above copyright
  - *    notice, this list of conditions and the following disclaimer in
  - *    the documentation and/or other materials provided with the
  - *    distribution.
  - *
  - * 3. The end-user documentation included with the redistribution,
  - *    if any, must include the following acknowledgment:
  - *       "This product includes software developed by the
  - *        Apache Software Foundation (http://www.apache.org/)."
  - *    Alternately, this acknowledgment may appear in the software itself,
  - *    if and wherever such third-party acknowledgments normally appear.
  - *
  - * 4. The names "Apache" and "Apache Software Foundation" and
  - *    "Apache Turbine" must not be used to endorse or promote products
  - *    derived from this software without prior written permission. For
  - *    written permission, please contact apache@apache.org.
  - *
  - * 5. Products derived from this software may not be called "Apache",
  - *    "Apache Turbine", nor may "Apache" appear in their name, without
  - *    prior written permission of the Apache Software Foundation.
  - *
  - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  - * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  - * SUCH DAMAGE.
  - * ====================================================================
  - *
  - * This software consists of voluntary contributions made by many
  - * individuals on behalf of the Apache Software Foundation.  For more
  - * information on the Apache Software Foundation, please see
  - * <http://www.apache.org/>.
  - */
  -
  -import org.apache.lucene.document.Document;
  -
  -import java.io.File;
  -import java.util.List;
  -
  -/**
  - * A content handler determines how to index a file's contents.
  - *
  - * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
  - */
  -public interface FileContentHandler
  -{
  -    /**
  -     * Perform filetype-specific actions to index the file's contents and
  -     * add it to the {@link org.apache.lucene.document.Document} object.
  -     */
  -    public void parse(Document doc, File f);
  -
  -    /**
  -     * Is this a collection of files?
  -     */
  -    public boolean isNested();
  -
  -    /**
  -     * Return the collection of files contained within the parent file.
  -     */
  -    public List getNestedData();
  -
  -    public Object clone();
  -}
  +package search.contenthandler;
  +
  +/* ====================================================================
  + * The Apache Software License, Version 1.1
  + *
  + * Copyright (c) 2001 The Apache Software Foundation.  All rights
  + * reserved.
  + *
  + * Redistribution and use in source and binary forms, with or without
  + * modification, are permitted provided that the following conditions
  + * are met:
  + *
  + * 1. Redistributions of source code must retain the above copyright
  + *    notice, this list of conditions and the following disclaimer.
  + *
  + * 2. Redistributions in binary form must reproduce the above copyright
  + *    notice, this list of conditions and the following disclaimer in
  + *    the documentation and/or other materials provided with the
  + *    distribution.
  + *
  + * 3. The end-user documentation included with the redistribution,
  + *    if any, must include the following acknowledgment:
  + *       "This product includes software developed by the
  + *        Apache Software Foundation (http://www.apache.org/)."
  + *    Alternately, this acknowledgment may appear in the software itself,
  + *    if and wherever such third-party acknowledgments normally appear.
  + *
  + * 4. The names "Apache" and "Apache Software Foundation" and
  + *    "Apache Lucene" must not be used to endorse or promote products
  + *    derived from this software without prior written permission. For
  + *    written permission, please contact apache@apache.org.
  + *
  + * 5. Products derived from this software may not be called "Apache",
  + *    "Apache Lucene", nor may "Apache" appear in their name, without
  + *    prior written permission of the Apache Software Foundation.
  + *
  + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  + * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  + * SUCH DAMAGE.
  + * ====================================================================
  + *
  + * This software consists of voluntary contributions made by many
  + * individuals on behalf of the Apache Software Foundation.  For more
  + * information on the Apache Software Foundation, please see
  + * <http://www.apache.org/>.
  + */
  +
  +import java.io.Reader;
  +import java.util.List;
  +
  +/**
  + * A content handler determines how to index a file's contents.
  + */
  +public interface FileContentHandler
  +{
  +    /**
  +     * Do the file contents of this file have any meaning? Should
  +     * its contents be indexed?
  +     */
  +    public boolean fileContentIsReadable();
  +
  +    /**
  +     * Returns a reader for this file's contents.
  +     */
  +    public Reader getReader();
  +
  +    /**
  +     * Does this file have nested data within?
  +     */
  +    public boolean containsNestedData();
  +
  +    /**
  +     * Return the datasources contained within the parent file.
  +     * This can be URLs contained within a HTML file, files
  +     * within a ZIP file, basically anything represented by a
  +     * DataSource.
  +     */
  +    public List getNestedDataSource();
  +}
  
  
  
  1.2       +88 -81    jakarta-lucene-sandbox/projects/appex/src/java/search/contenthandler/FileContentHandlerAdapter.java
  
  Index: FileContentHandlerAdapter.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/projects/appex/src/java/search/contenthandler/FileContentHandlerAdapter.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- FileContentHandlerAdapter.java	4 May 2002 15:43:46 -0000	1.1
  +++ FileContentHandlerAdapter.java	8 May 2002 15:52:37 -0000	1.2
  @@ -1,81 +1,88 @@
  -package search.contenthandler;
  -
  -/* ====================================================================
  - * The Apache Software License, Version 1.1
  - *
  - * Copyright (c) 2001 The Apache Software Foundation.  All rights
  - * reserved.
  - *
  - * Redistribution and use in source and binary forms, with or without
  - * modification, are permitted provided that the following conditions
  - * are met:
  - *
  - * 1. Redistributions of source code must retain the above copyright
  - *    notice, this list of conditions and the following disclaimer.
  - *
  - * 2. Redistributions in binary form must reproduce the above copyright
  - *    notice, this list of conditions and the following disclaimer in
  - *    the documentation and/or other materials provided with the
  - *    distribution.
  - *
  - * 3. The end-user documentation included with the redistribution,
  - *    if any, must include the following acknowledgment:
  - *       "This product includes software developed by the
  - *        Apache Software Foundation (http://www.apache.org/)."
  - *    Alternately, this acknowledgment may appear in the software itself,
  - *    if and wherever such third-party acknowledgments normally appear.
  - *
  - * 4. The names "Apache" and "Apache Software Foundation" and
  - *    "Apache Turbine" must not be used to endorse or promote products
  - *    derived from this software without prior written permission. For
  - *    written permission, please contact apache@apache.org.
  - *
  - * 5. Products derived from this software may not be called "Apache",
  - *    "Apache Turbine", nor may "Apache" appear in their name, without
  - *    prior written permission of the Apache Software Foundation.
  - *
  - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  - * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  - * SUCH DAMAGE.
  - * ====================================================================
  - *
  - * This software consists of voluntary contributions made by many
  - * individuals on behalf of the Apache Software Foundation.  For more
  - * information on the Apache Software Foundation, please see
  - * <http://www.apache.org/>.
  - */
  -
  -import org.apache.lucene.document.Document;
  -
  -import java.io.File;
  -import java.util.List;
  -
  -/**
  - * A no-op implementation to make FileContentHandler creation easier.
  - * <p>
  - * Classes which need to implement the FileContentHandler interface should
  - * extend this class or {@link NestedFileContentHandlerAdapter}.
  - * </p>
  - *
  - * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
  - */
  -public abstract class FileContentHandlerAdapter implements FileContentHandler
  -{
  -    public void parse(Document doc, File f)
  -    {
  -    }
  -    public List getNestedData()
  -    {
  -        return null;
  -    }
  -    public abstract Object clone();
  -}
  +package search.contenthandler;
  +
  +/* ====================================================================
  + * The Apache Software License, Version 1.1
  + *
  + * Copyright (c) 2001 The Apache Software Foundation.  All rights
  + * reserved.
  + *
  + * Redistribution and use in source and binary forms, with or without
  + * modification, are permitted provided that the following conditions
  + * are met:
  + *
  + * 1. Redistributions of source code must retain the above copyright
  + *    notice, this list of conditions and the following disclaimer.
  + *
  + * 2. Redistributions in binary form must reproduce the above copyright
  + *    notice, this list of conditions and the following disclaimer in
  + *    the documentation and/or other materials provided with the
  + *    distribution.
  + *
  + * 3. The end-user documentation included with the redistribution,
  + *    if any, must include the following acknowledgment:
  + *       "This product includes software developed by the
  + *        Apache Software Foundation (http://www.apache.org/)."
  + *    Alternately, this acknowledgment may appear in the software itself,
  + *    if and wherever such third-party acknowledgments normally appear.
  + *
  + * 4. The names "Apache" and "Apache Software Foundation" and
  + *    "Apache Lucene" must not be used to endorse or promote products
  + *    derived from this software without prior written permission. For
  + *    written permission, please contact apache@apache.org.
  + *
  + * 5. Products derived from this software may not be called "Apache",
  + *    "Apache Lucene", nor may "Apache" appear in their name, without
  + *    prior written permission of the Apache Software Foundation.
  + *
  + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  + * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  + * SUCH DAMAGE.
  + * ====================================================================
  + *
  + * This software consists of voluntary contributions made by many
  + * individuals on behalf of the Apache Software Foundation.  For more
  + * information on the Apache Software Foundation, please see
  + * <http://www.apache.org/>.
  + */
  +
  +import java.io.File;
  +import java.io.Reader;
  +import java.util.List;
  +
  +/**
  + * A no-op implementation to make FileContentHandler creation easier.
  + * <p>
  + * Classes which need to implement the FileContentHandler interface should
  + * extend this class or {@link NestedFileContentHandlerAdapter}.
  + * </p>
  + *
  + * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
  + */
  +public abstract class FileContentHandlerAdapter implements FileContentHandler
  +{
  +    protected File file;
  +
  +    protected FileContentHandlerAdapter(File file)
  +    {
  +        this.file = file;
  +    }
  +
  +    public Reader getReader()
  +    {
  +        return null;
  +    }
  +
  +    public List getNestedDataSource()
  +    {
  +        return null;
  +    }
  +}
  
  
  
  1.2       +179 -85   jakarta-lucene-sandbox/projects/appex/src/java/search/contenthandler/FileContentHandlerFactory.java
  
  Index: FileContentHandlerFactory.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/projects/appex/src/java/search/contenthandler/FileContentHandlerFactory.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- FileContentHandlerFactory.java	4 May 2002 15:43:46 -0000	1.1
  +++ FileContentHandlerFactory.java	8 May 2002 15:52:37 -0000	1.2
  @@ -1,85 +1,179 @@
  -package search.contenthandler;
  -
  -/* ====================================================================
  - * The Apache Software License, Version 1.1
  - *
  - * Copyright (c) 2001 The Apache Software Foundation.  All rights
  - * reserved.
  - *
  - * Redistribution and use in source and binary forms, with or without
  - * modification, are permitted provided that the following conditions
  - * are met:
  - *
  - * 1. Redistributions of source code must retain the above copyright
  - *    notice, this list of conditions and the following disclaimer.
  - *
  - * 2. Redistributions in binary form must reproduce the above copyright
  - *    notice, this list of conditions and the following disclaimer in
  - *    the documentation and/or other materials provided with the
  - *    distribution.
  - *
  - * 3. The end-user documentation included with the redistribution,
  - *    if any, must include the following acknowledgment:
  - *       "This product includes software developed by the
  - *        Apache Software Foundation (http://www.apache.org/)."
  - *    Alternately, this acknowledgment may appear in the software itself,
  - *    if and wherever such third-party acknowledgments normally appear.
  - *
  - * 4. The names "Apache" and "Apache Software Foundation" and
  - *    "Apache Turbine" must not be used to endorse or promote products
  - *    derived from this software without prior written permission. For
  - *    written permission, please contact apache@apache.org.
  - *
  - * 5. Products derived from this software may not be called "Apache",
  - *    "Apache Turbine", nor may "Apache" appear in their name, without
  - *    prior written permission of the Apache Software Foundation.
  - *
  - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  - * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  - * SUCH DAMAGE.
  - * ====================================================================
  - *
  - * This software consists of voluntary contributions made by many
  - * individuals on behalf of the Apache Software Foundation.  For more
  - * information on the Apache Software Foundation, please see
  - * <http://www.apache.org/>.
  - */
  -
  -import org.apache.log4j.Category;
  -
  -import java.util.Map;
  -
  -/**
  - * Factory responsible for obtaining ContentHandlers.
  - *
  - * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
  - */
  -public abstract class ContentHandlerFactory
  -{
  -    public static final String DEFAULT_HANDLER_KEY = "DEFAULT";
  -    static Category cat = Category.getInstance(ContentHandlerFactory.class.getName());
  -    private static Map handlerCache = null;
  -    public static FileContentHandler getContentHandler(String extension)
  -    {
  -        if (handlerCache.containsKey(extension))
  -            return (FileContentHandler) ((FileContentHandler) handlerCache.get(extension)).clone();
  -        else if (handlerCache.containsKey(DEFAULT_HANDLER_KEY))
  -            return (FileContentHandler) ((FileContentHandler) handlerCache.get(DEFAULT_HANDLER_KEY)).clone();
  -        else
  -            return NullHandler.getInstance();
  -    }
  -
  -    public static void setContentHandlers(Map contentHandlers)
  -    {
  -        handlerCache = contentHandlers;
  -    }
  -}
  +package search.contenthandler;
  +
  +/* ====================================================================
  + * The Apache Software License, Version 1.1
  + *
  + * Copyright (c) 2001 The Apache Software Foundation.  All rights
  + * reserved.
  + *
  + * Redistribution and use in source and binary forms, with or without
  + * modification, are permitted provided that the following conditions
  + * are met:
  + *
  + * 1. Redistributions of source code must retain the above copyright
  + *    notice, this list of conditions and the following disclaimer.
  + *
  + * 2. Redistributions in binary form must reproduce the above copyright
  + *    notice, this list of conditions and the following disclaimer in
  + *    the documentation and/or other materials provided with the
  + *    distribution.
  + *
  + * 3. The end-user documentation included with the redistribution,
  + *    if any, must include the following acknowledgment:
  + *       "This product includes software developed by the
  + *        Apache Software Foundation (http://www.apache.org/)."
  + *    Alternately, this acknowledgment may appear in the software itself,
  + *    if and wherever such third-party acknowledgments normally appear.
  + *
  + * 4. The names "Apache" and "Apache Software Foundation" and
  + *    "Apache Lucene" must not be used to endorse or promote products
  + *    derived from this software without prior written permission. For
  + *    written permission, please contact apache@apache.org.
  + *
  + * 5. Products derived from this software may not be called "Apache",
  + *    "Apache Lucene", nor may "Apache" appear in their name, without
  + *    prior written permission of the Apache Software Foundation.
  + *
  + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  + * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  + * SUCH DAMAGE.
  + * ====================================================================
  + *
  + * This software consists of voluntary contributions made by many
  + * individuals on behalf of the Apache Software Foundation.  For more
  + * information on the Apache Software Foundation, please see
  + * <http://www.apache.org/>.
  + */
  +
  +import org.apache.log4j.Category;
  +
  +import java.util.Map;
  +import java.io.File;
  +import java.lang.reflect.InvocationTargetException;
  +import java.lang.reflect.Constructor;
  +
  +import search.util.IOUtils;
  +
  +/**
  + * Factory responsible for obtaining ContentHandlers.
  + *
  + * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
  + */
  +public abstract class FileContentHandlerFactory
  +{
  +    public static final String DEFAULT_HANDLER_KEY = "DEFAULT";
  +    static Category cat = Category.getInstance(FileContentHandlerFactory.class.getName());
  +    private static Map handlerRegistry;
  +
  +    public static FileContentHandler getContentHandler(File f)
  +    {
  +        String extension = IOUtils.getFileExtension(f);
  +        if (handlerRegistry.containsKey(extension))
  +        {
  +            String handlerClassname = (String) handlerRegistry.get(extension);
  +            return (FileContentHandler) generateObject(handlerClassname,
  +                                                                     new Class[]{File.class},
  +                                                                     new Object[]{f});
  +        }
  +        else if (handlerRegistry.containsKey(DEFAULT_HANDLER_KEY))
  +        {
  +            String handlerClassname = (String) handlerRegistry.get(DEFAULT_HANDLER_KEY);
  +            return (FileContentHandler) generateObject(handlerClassname);
  +        }
  +        else
  +        {
  +            return NullHandler.getInstance();
  +        }
  +    }
  +
  +    public static void setHandlerRegistry(Map handlerRegistry)
  +    {
  +        FileContentHandlerFactory.handlerRegistry = handlerRegistry;
  +    }
  +
  +        /**
  +     * Utility method to return an object based on its class name.
  +     * The object needs to have a constructor which accepts no parameters.
  +     *
  +     * @param className  Class name of object to be generated
  +     * @return Object
  +     */
  +    private static Object generateObject(String className)
  +    {
  +        Object o = null;
  +        try
  +        {
  +            Class c = Class.forName(className);
  +            o = c.newInstance();
  +        }
  +        catch (ClassNotFoundException cnfe)
  +        {
  +            cat.error(cnfe.getMessage() + " No class named '" + className + "' was found.", cnfe);
  +        }
  +        catch (InstantiationException ie)
  +        {
  +            cat.error(ie.getMessage() + " Class named '" + className + "' could not be  instantiated.", ie);
  +        }
  +        catch (IllegalAccessException iae)
  +        {
  +            cat.error(iae.getMessage() + " No access to class named '" + className + "'.", iae);
  +        }
  +        return o;
  +    }
  +
  +        /**
  +     * Utility method to return an object based on its class name.
  +     *
  +     * @param type  Class name of object to be generated
  +     * @param clazz Class array of parameters.
  +     * @param args Object array of arguments.
  +     * @return Object
  +     */
  +    private static Object generateObject(String className,
  +                                        Class[] clazz,
  +                                        Object[] args)
  +    {
  +        Object o = null;
  +        try
  +        {
  +            Class c = Class.forName(className);
  +            Constructor con = c.getConstructor(clazz);
  +            if (con != null)
  +            {
  +                o = con.newInstance(args);
  +            }
  +            else
  +                throw new InstantiationException("Constructor with arguments:" + clazz.toString() + " non-existent.");
  +        }
  +        catch (ClassNotFoundException cnfe)
  +        {
  +            cat.error(cnfe.getMessage() + " No class named '" + className + "' was found.", cnfe);
  +        }
  +        catch (InstantiationException ie)
  +        {
  +            cat.error(ie.getMessage() + " Class named '" + className + "' could not be  instantiated.", ie);
  +        }
  +        catch (IllegalAccessException iae)
  +        {
  +            cat.error(iae.getMessage() + " No access to class named '" + className + "'.", iae);
  +        }
  +        catch (NoSuchMethodException nsme)
  +        {
  +            cat.error(nsme.getMessage() + " No method in class named '" + className + "'.", nsme);
  +        }
  +        catch (InvocationTargetException ite)
  +        {
  +            cat.error(ite.getMessage() + " in class named '" + className + "'.", ite);
  +        }
  +        return o;
  +    }
  +}
  
  
  
  1.2       +129 -123  jakarta-lucene-sandbox/projects/appex/src/java/search/contenthandler/GZipHandler.java
  
  Index: GZipHandler.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/projects/appex/src/java/search/contenthandler/GZipHandler.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- GZipHandler.java	4 May 2002 15:43:46 -0000	1.1
  +++ GZipHandler.java	8 May 2002 15:52:37 -0000	1.2
  @@ -1,124 +1,130 @@
  -package search.contenthandler;
  -
  -/* ====================================================================
  - * The Apache Software License, Version 1.1
  - *
  - * Copyright (c) 2001 The Apache Software Foundation.  All rights
  - * reserved.
  - *
  - * Redistribution and use in source and binary forms, with or without
  - * modification, are permitted provided that the following conditions
  - * are met:
  - *
  - * 1. Redistributions of source code must retain the above copyright
  - *    notice, this list of conditions and the following disclaimer.
  - *
  - * 2. Redistributions in binary form must reproduce the above copyright
  - *    notice, this list of conditions and the following disclaimer in
  - *    the documentation and/or other materials provided with the
  - *    distribution.
  - *
  - * 3. The end-user documentation included with the redistribution,
  - *    if any, must include the following acknowledgment:
  - *       "This product includes software developed by the
  - *        Apache Software Foundation (http://www.apache.org/)."
  - *    Alternately, this acknowledgment may appear in the software itself,
  - *    if and wherever such third-party acknowledgments normally appear.
  - *
  - * 4. The names "Apache" and "Apache Software Foundation" and
  - *    "Apache Turbine" must not be used to endorse or promote products
  - *    derived from this software without prior written permission. For
  - *    written permission, please contact apache@apache.org.
  - *
  - * 5. Products derived from this software may not be called "Apache",
  - *    "Apache Turbine", nor may "Apache" appear in their name, without
  - *    prior written permission of the Apache Software Foundation.
  - *
  - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  - * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  - * SUCH DAMAGE.
  - * ====================================================================
  - *
  - * This software consists of voluntary contributions made by many
  - * individuals on behalf of the Apache Software Foundation.  For more
  - * information on the Apache Software Foundation, please see
  - * <http://www.apache.org/>.
  - */
  -
  -import org.apache.log4j.Category;
  -import org.apache.lucene.document.DateField;
  -import org.apache.lucene.document.Document;
  -
  -import java.io.File;
  -import java.io.IOException;
  -import java.util.ArrayList;
  -import java.util.HashMap;
  -import java.util.List;
  -import java.util.Map;
  -
  -import search.util.IOUtils;
  -
  -/**
  - * Handles GZip content.
  - *
  - * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
  - */
  -public class GZipHandler extends NestedFileContentHandlerAdapter
  -{
  -    static Category cat = Category.getInstance(GZipHandler.class.getName());
  -
  -    public void parse(Document doc, File f)
  -    {
  -        if (!f.exists())
  -            return;
  -        try
  -        {
  -            File tempDir = new File(TEMP_FOLDER);
  -            tempDir.mkdirs();
  -            tempDir.deleteOnExit();
  -            String filename = f.getName();
  -            File tempFile = new File(tempDir, filename.substring(0, filename.lastIndexOf(".")));
  -            tempFile.deleteOnExit();
  -            IOUtils.extractGZip(f, tempFile);
  -            indexGZipDirectory(tempDir, dataMapList);
  -        }
  -        catch (IOException ioe)
  -        {
  -            cat.error("IOException ungzipping " + f.toString(), ioe);
  -        }
  -    }
  -
  -    // only one file, but let's just treat it like a directory anyway
  -    private void indexGZipDirectory(File dir, List dataMapList)
  -    {
  -        if (dir.isDirectory())
  -        {
  -            File[] dirContents = dir.listFiles();
  -            for (int i = 0; i < dirContents.length; i++)
  -            {
  -                indexGZipDirectory(dirContents[i], dataMapList);
  -            }
  -        }
  -        else if (dir.isFile())
  -        {
  -            // here create new DataMap for the gzip entry
  -            Map dataMap = new HashMap();
  -            dataMap.put("filePath", dir.toString());
  -            dataMapList.add(dataMap);
  -        }
  -    }
  -
  -    public Object clone()
  -    {
  -        return new GZipHandler();
  -    }
  +package search.contenthandler;
  +
  +/* ====================================================================
  + * The Apache Software License, Version 1.1
  + *
  + * Copyright (c) 2001 The Apache Software Foundation.  All rights
  + * reserved.
  + *
  + * Redistribution and use in source and binary forms, with or without
  + * modification, are permitted provided that the following conditions
  + * are met:
  + *
  + * 1. Redistributions of source code must retain the above copyright
  + *    notice, this list of conditions and the following disclaimer.
  + *
  + * 2. Redistributions in binary form must reproduce the above copyright
  + *    notice, this list of conditions and the following disclaimer in
  + *    the documentation and/or other materials provided with the
  + *    distribution.
  + *
  + * 3. The end-user documentation included with the redistribution,
  + *    if any, must include the following acknowledgment:
  + *       "This product includes software developed by the
  + *        Apache Software Foundation (http://www.apache.org/)."
  + *    Alternately, this acknowledgment may appear in the software itself,
  + *    if and wherever such third-party acknowledgments normally appear.
  + *
  + * 4. The names "Apache" and "Apache Software Foundation" and
  + *    "Apache Lucene" must not be used to endorse or promote products
  + *    derived from this software without prior written permission. For
  + *    written permission, please contact apache@apache.org.
  + *
  + * 5. Products derived from this software may not be called "Apache",
  + *    "Apache Lucene", nor may "Apache" appear in their name, without
  + *    prior written permission of the Apache Software Foundation.
  + *
  + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  + * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  + * SUCH DAMAGE.
  + * ====================================================================
  + *
  + * This software consists of voluntary contributions made by many
  + * individuals on behalf of the Apache Software Foundation.  For more
  + * information on the Apache Software Foundation, please see
  + * <http://www.apache.org/>.
  + */
  +
  +import org.apache.log4j.Category;
  +import search.DataSource;
  +import search.FSDataSource;
  +import search.util.IOUtils;
  +
  +import java.io.File;
  +import java.io.IOException;
  +import java.io.Reader;
  +import java.util.List;
  +
  +/**
  + * Handles GZip content.
  + *
  + * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
  + */
  +public class GZipHandler extends NestedFileContentHandlerAdapter
  +{
  +    private static Category cat = Category.getInstance(GZipHandler.class.getName());
  +
  +    public GZipHandler(File file)
  +    {
  +        super(file);
  +    }
  +
  +    public Reader getReader()
  +    {
  +        return null;
  +    }
  +
  +    public List getNestedDataSource()
  +    {
  +        if (!file.exists())
  +            return null;
  +        try
  +        {
  +            File tempDir = new File(TEMP_FOLDER);
  +            tempDir.mkdirs();
  +            tempDir.deleteOnExit();
  +            String filename = file.getName();
  +            File tempFile = new File(tempDir, filename.substring(0, filename.lastIndexOf(".")));
  +            tempFile.deleteOnExit();
  +            IOUtils.extractGZip(file, tempFile);
  +            indexGZipDirectory(tempDir);
  +        }
  +        catch (IOException ioe)
  +        {
  +            cat.error("IOException ungzipping " + file.toString(), ioe);
  +        }
  +        return nestedDataSource;
  +    }
  +
  +    public boolean fileContentIsReadable()
  +    {
  +        return false;
  +    }
  +
  +    // only one file, but let's just treat it like a directory anyway
  +    private void indexGZipDirectory(File dir)
  +    {
  +        if (dir.isDirectory())
  +        {
  +            File[] dirContents = dir.listFiles();
  +            for (int i = 0; i < dirContents.length; i++)
  +            {
  +                indexGZipDirectory(dirContents[i]);
  +            }
  +        }
  +        else if (dir.isFile())
  +        {
  +            DataSource ds = new FSDataSource(dir);
  +            nestedDataSource.add(nestedDataSource);
  +        }
  +    }
   }
  
  
  
  1.2       +90 -92    jakarta-lucene-sandbox/projects/appex/src/java/search/contenthandler/NestedFileContentHandlerAdapter.java
  
  Index: NestedFileContentHandlerAdapter.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/projects/appex/src/java/search/contenthandler/NestedFileContentHandlerAdapter.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- NestedFileContentHandlerAdapter.java	4 May 2002 15:43:46 -0000	1.1
  +++ NestedFileContentHandlerAdapter.java	8 May 2002 15:52:37 -0000	1.2
  @@ -1,92 +1,90 @@
  -package search.contenthandler;
  -
  -/* ====================================================================
  - * The Apache Software License, Version 1.1
  - *
  - * Copyright (c) 2001 The Apache Software Foundation.  All rights
  - * reserved.
  - *
  - * Redistribution and use in source and binary forms, with or without
  - * modification, are permitted provided that the following conditions
  - * are met:
  - *
  - * 1. Redistributions of source code must retain the above copyright
  - *    notice, this list of conditions and the following disclaimer.
  - *
  - * 2. Redistributions in binary form must reproduce the above copyright
  - *    notice, this list of conditions and the following disclaimer in
  - *    the documentation and/or other materials provided with the
  - *    distribution.
  - *
  - * 3. The end-user documentation included with the redistribution,
  - *    if any, must include the following acknowledgment:
  - *       "This product includes software developed by the
  - *        Apache Software Foundation (http://www.apache.org/)."
  - *    Alternately, this acknowledgment may appear in the software itself,
  - *    if and wherever such third-party acknowledgments normally appear.
  - *
  - * 4. The names "Apache" and "Apache Software Foundation" and
  - *    "Apache Turbine" must not be used to endorse or promote products
  - *    derived from this software without prior written permission. For
  - *    written permission, please contact apache@apache.org.
  - *
  - * 5. Products derived from this software may not be called "Apache",
  - *    "Apache Turbine", nor may "Apache" appear in their name, without
  - *    prior written permission of the Apache Software Foundation.
  - *
  - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  - * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  - * SUCH DAMAGE.
  - * ====================================================================
  - *
  - * This software consists of voluntary contributions made by many
  - * individuals on behalf of the Apache Software Foundation.  For more
  - * information on the Apache Software Foundation, please see
  - * <http://www.apache.org/>.
  - */
  -
  -import org.apache.lucene.document.Document;
  -
  -import java.io.File;
  -import java.util.ArrayList;
  -import java.util.List;
  -
  -/**
  - * A no-op implementation to make FileContentHandler creation easier.
  - * <p>
  - * Classes which need to implement the FileContentHandler interface
  - * and need to handle nested content (example: zip, tar, rar, etc) should
  - * extend this class.
  - * </p>
  - *
  - * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
  - */
  -public abstract class NestedFileContentHandlerAdapter
  -        extends FileContentHandlerAdapter
  -{
  -    protected final String TEMP_FOLDER = "/usr/temp" + '/'
  -            + Math.random() + '/';
  -
  -    protected List dataMapList = new ArrayList();
  -
  -    public abstract void parse(Document doc, File f);
  -
  -    public boolean isNested()
  -    {
  -        return true;
  -    }
  -
  -    public List getNestedData()
  -    {
  -        return this.dataMapList;
  -    }
  -}
  +package search.contenthandler;
  +
  +/* ====================================================================
  + * The Apache Software License, Version 1.1
  + *
  + * Copyright (c) 2001 The Apache Software Foundation.  All rights
  + * reserved.
  + *
  + * Redistribution and use in source and binary forms, with or without
  + * modification, are permitted provided that the following conditions
  + * are met:
  + *
  + * 1. Redistributions of source code must retain the above copyright
  + *    notice, this list of conditions and the following disclaimer.
  + *
  + * 2. Redistributions in binary form must reproduce the above copyright
  + *    notice, this list of conditions and the following disclaimer in
  + *    the documentation and/or other materials provided with the
  + *    distribution.
  + *
  + * 3. The end-user documentation included with the redistribution,
  + *    if any, must include the following acknowledgment:
  + *       "This product includes software developed by the
  + *        Apache Software Foundation (http://www.apache.org/)."
  + *    Alternately, this acknowledgment may appear in the software itself,
  + *    if and wherever such third-party acknowledgments normally appear.
  + *
  + * 4. The names "Apache" and "Apache Software Foundation" and
  + *    "Apache Lucene" must not be used to endorse or promote products
  + *    derived from this software without prior written permission. For
  + *    written permission, please contact apache@apache.org.
  + *
  + * 5. Products derived from this software may not be called "Apache",
  + *    "Apache Lucene", nor may "Apache" appear in their name, without
  + *    prior written permission of the Apache Software Foundation.
  + *
  + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  + * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  + * SUCH DAMAGE.
  + * ====================================================================
  + *
  + * This software consists of voluntary contributions made by many
  + * individuals on behalf of the Apache Software Foundation.  For more
  + * information on the Apache Software Foundation, please see
  + * <http://www.apache.org/>.
  + */
  +
  +import org.apache.lucene.document.Document;
  +
  +import java.io.File;
  +import java.util.ArrayList;
  +import java.util.List;
  +
  +/**
  + * A no-op implementation to make FileContentHandler creation easier.
  + * <p>
  + * Classes which need to implement the FileContentHandler interface
  + * and need to handle nested content (example: zip, tar, rar, etc) should
  + * extend this class.
  + * </p>
  + *
  + * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
  + */
  +public abstract class NestedFileContentHandlerAdapter
  +        extends FileContentHandlerAdapter
  +{
  +    protected final String TEMP_FOLDER = "/usr/temp" + '/'
  +            + Math.random() + '/';
  +
  +    protected List nestedDataSource;
  +
  +    public NestedFileContentHandlerAdapter(File file)
  +    {
  +        super(file);
  +    }
  +
  +    public boolean containsNestedData()
  +    {
  +        return true;
  +    }
  +}
  
  
  
  1.2       +93 -80    jakarta-lucene-sandbox/projects/appex/src/java/search/contenthandler/NullHandler.java
  
  Index: NullHandler.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/projects/appex/src/java/search/contenthandler/NullHandler.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- NullHandler.java	4 May 2002 15:43:46 -0000	1.1
  +++ NullHandler.java	8 May 2002 15:52:37 -0000	1.2
  @@ -1,80 +1,93 @@
  -package search.contenthandler;
  -
  -/* ====================================================================
  - * The Apache Software License, Version 1.1
  - *
  - * Copyright (c) 2001 The Apache Software Foundation.  All rights
  - * reserved.
  - *
  - * Redistribution and use in source and binary forms, with or without
  - * modification, are permitted provided that the following conditions
  - * are met:
  - *
  - * 1. Redistributions of source code must retain the above copyright
  - *    notice, this list of conditions and the following disclaimer.
  - *
  - * 2. Redistributions in binary form must reproduce the above copyright
  - *    notice, this list of conditions and the following disclaimer in
  - *    the documentation and/or other materials provided with the
  - *    distribution.
  - *
  - * 3. The end-user documentation included with the redistribution,
  - *    if any, must include the following acknowledgment:
  - *       "This product includes software developed by the
  - *        Apache Software Foundation (http://www.apache.org/)."
  - *    Alternately, this acknowledgment may appear in the software itself,
  - *    if and wherever such third-party acknowledgments normally appear.
  - *
  - * 4. The names "Apache" and "Apache Software Foundation" and
  - *    "Apache Turbine" must not be used to endorse or promote products
  - *    derived from this software without prior written permission. For
  - *    written permission, please contact apache@apache.org.
  - *
  - * 5. Products derived from this software may not be called "Apache",
  - *    "Apache Turbine", nor may "Apache" appear in their name, without
  - *    prior written permission of the Apache Software Foundation.
  - *
  - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  - * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  - * SUCH DAMAGE.
  - * ====================================================================
  - *
  - * This software consists of voluntary contributions made by many
  - * individuals on behalf of the Apache Software Foundation.  For more
  - * information on the Apache Software Foundation, please see
  - * <http://www.apache.org/>.
  - */
  -
  -/**
  - * Do-nothing content handler.
  - *
  - * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
  - */
  -public class NullHandler extends FileContentHandlerAdapter
  -{
  -    static NullHandler singleton = new NullHandler();
  -
  -    public static FileContentHandler getInstance()
  -    {
  -        return singleton;
  -    }
  -
  -    public Object clone()
  -    {
  -        return this;
  -    }
  -
  -    public boolean isNested()
  -    {
  -        return false;
  -    }
  -}
  +package search.contenthandler;
  +
  +import java.io.File;
  +import java.io.Reader;
  +
  +/* ====================================================================
  + * The Apache Software License, Version 1.1
  + *
  + * Copyright (c) 2001 The Apache Software Foundation.  All rights
  + * reserved.
  + *
  + * Redistribution and use in source and binary forms, with or without
  + * modification, are permitted provided that the following conditions
  + * are met:
  + *
  + * 1. Redistributions of source code must retain the above copyright
  + *    notice, this list of conditions and the following disclaimer.
  + *
  + * 2. Redistributions in binary form must reproduce the above copyright
  + *    notice, this list of conditions and the following disclaimer in
  + *    the documentation and/or other materials provided with the
  + *    distribution.
  + *
  + * 3. The end-user documentation included with the redistribution,
  + *    if any, must include the following acknowledgment:
  + *       "This product includes software developed by the
  + *        Apache Software Foundation (http://www.apache.org/)."
  + *    Alternately, this acknowledgment may appear in the software itself,
  + *    if and wherever such third-party acknowledgments normally appear.
  + *
  + * 4. The names "Apache" and "Apache Software Foundation" and
  + *    "Apache Lucene" must not be used to endorse or promote products
  + *    derived from this software without prior written permission. For
  + *    written permission, please contact apache@apache.org.
  + *
  + * 5. Products derived from this software may not be called "Apache",
  + *    "Apache Lucene", nor may "Apache" appear in their name, without
  + *    prior written permission of the Apache Software Foundation.
  + *
  + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  + * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  + * SUCH DAMAGE.
  + * ====================================================================
  + *
  + * This software consists of voluntary contributions made by many
  + * individuals on behalf of the Apache Software Foundation.  For more
  + * information on the Apache Software Foundation, please see
  + * <http://www.apache.org/>.
  + */
  +
  +/**
  + * Do-nothing content handler.
  + *
  + * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
  + */
  +public class NullHandler extends FileContentHandlerAdapter
  +{
  +    private static NullHandler singleton = new NullHandler(null);
  +
  +    public static FileContentHandler getInstance()
  +    {
  +        return singleton;
  +    }
  +
  +    private NullHandler(File file)
  +    {
  +        super(file);
  +    }
  +
  +    public boolean fileContentIsReadable()
  +    {
  +        return false;
  +    }
  +
  +    public Reader getReader()
  +    {
  +        return null;
  +    }
  +
  +    public boolean containsNestedData()
  +    {
  +        return false;
  +    }
  +}
  
  
  
  1.2       +130 -117  jakarta-lucene-sandbox/projects/appex/src/java/search/contenthandler/TARHandler.java
  
  Index: TARHandler.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/projects/appex/src/java/search/contenthandler/TARHandler.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- TARHandler.java	4 May 2002 15:43:46 -0000	1.1
  +++ TARHandler.java	8 May 2002 15:52:37 -0000	1.2
  @@ -1,118 +1,131 @@
  -package search.contenthandler;
  -
  -/* ====================================================================
  - * The Apache Software License, Version 1.1
  - *
  - * Copyright (c) 2001 The Apache Software Foundation.  All rights
  - * reserved.
  - *
  - * Redistribution and use in source and binary forms, with or without
  - * modification, are permitted provided that the following conditions
  - * are met:
  - *
  - * 1. Redistributions of source code must retain the above copyright
  - *    notice, this list of conditions and the following disclaimer.
  - *
  - * 2. Redistributions in binary form must reproduce the above copyright
  - *    notice, this list of conditions and the following disclaimer in
  - *    the documentation and/or other materials provided with the
  - *    distribution.
  - *
  - * 3. The end-user documentation included with the redistribution,
  - *    if any, must include the following acknowledgment:
  - *       "This product includes software developed by the
  - *        Apache Software Foundation (http://www.apache.org/)."
  - *    Alternately, this acknowledgment may appear in the software itself,
  - *    if and wherever such third-party acknowledgments normally appear.
  - *
  - * 4. The names "Apache" and "Apache Software Foundation" and
  - *    "Apache Turbine" must not be used to endorse or promote products
  - *    derived from this software without prior written permission. For
  - *    written permission, please contact apache@apache.org.
  - *
  - * 5. Products derived from this software may not be called "Apache",
  - *    "Apache Turbine", nor may "Apache" appear in their name, without
  - *    prior written permission of the Apache Software Foundation.
  - *
  - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  - * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  - * SUCH DAMAGE.
  - * ====================================================================
  - *
  - * This software consists of voluntary contributions made by many
  - * individuals on behalf of the Apache Software Foundation.  For more
  - * information on the Apache Software Foundation, please see
  - * <http://www.apache.org/>.
  - */
  -
  -import search.util.IOUtils;
  -import org.apache.log4j.Category;
  -import org.apache.lucene.document.DateField;
  -import org.apache.lucene.document.Document;
  -
  -import java.io.File;
  -import java.io.IOException;
  -import java.util.ArrayList;
  -import java.util.HashMap;
  -import java.util.List;
  -import java.util.Map;
  -
  -/**
  - * Handles Tar files.
  - *
  - * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
  - */
  -public class TARHandler extends NestedFileContentHandlerAdapter
  -{
  -    static Category cat = Category.getInstance(TARHandler.class.getName());
  -
  -    public void parse(Document doc, File f)
  -    {
  -        if (!f.exists())
  -            return;
  -        try
  -        {
  -            File tempDir = new File(TEMP_FOLDER);
  -            tempDir.deleteOnExit();
  -            IOUtils.extractTar(f, tempDir);
  -            indexTarDirectory(tempDir, dataMapList);
  -        }
  -        catch (IOException ioe)
  -        {
  -            cat.error(ioe.getMessage(), ioe);
  -        }
  -    }
  -
  -    private void indexTarDirectory(File dir, List dataMapList)
  -    {
  -        if (dir.isDirectory())
  -        {
  -            File[] dirContents = dir.listFiles();
  -            for (int i = 0; i < dirContents.length; i++)
  -            {
  -                indexTarDirectory(dirContents[i], dataMapList);
  -            }
  -        }
  -        else if (dir.isFile())
  -        {
  -            // here create new DataMap for the tarred file
  -            Map dataMap = new HashMap();
  -            dataMap.put("filePath", dir.toString());
  -            dataMapList.add(dataMap);
  -        }
  -    }
  -
  -    public Object clone()
  -    {
  -        return new TARHandler();
  -    }
  +package search.contenthandler;
  +
  +/* ====================================================================
  + * The Apache Software License, Version 1.1
  + *
  + * Copyright (c) 2001 The Apache Software Foundation.  All rights
  + * reserved.
  + *
  + * Redistribution and use in source and binary forms, with or without
  + * modification, are permitted provided that the following conditions
  + * are met:
  + *
  + * 1. Redistributions of source code must retain the above copyright
  + *    notice, this list of conditions and the following disclaimer.
  + *
  + * 2. Redistributions in binary form must reproduce the above copyright
  + *    notice, this list of conditions and the following disclaimer in
  + *    the documentation and/or other materials provided with the
  + *    distribution.
  + *
  + * 3. The end-user documentation included with the redistribution,
  + *    if any, must include the following acknowledgment:
  + *       "This product includes software developed by the
  + *        Apache Software Foundation (http://www.apache.org/)."
  + *    Alternately, this acknowledgment may appear in the software itself,
  + *    if and wherever such third-party acknowledgments normally appear.
  + *
  + * 4. The names "Apache" and "Apache Software Foundation" and
  + *    "Apache Lucene" must not be used to endorse or promote products
  + *    derived from this software without prior written permission. For
  + *    written permission, please contact apache@apache.org.
  + *
  + * 5. Products derived from this software may not be called "Apache",
  + *    "Apache Lucene", nor may "Apache" appear in their name, without
  + *    prior written permission of the Apache Software Foundation.
  + *
  + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  + * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  + * SUCH DAMAGE.
  + * ====================================================================
  + *
  + * This software consists of voluntary contributions made by many
  + * individuals on behalf of the Apache Software Foundation.  For more
  + * information on the Apache Software Foundation, please see
  + * <http://www.apache.org/>.
  + */
  +
  +import org.apache.log4j.Category;
  +import search.DataSource;
  +import search.FSDataSource;
  +import search.util.IOUtils;
  +
  +import java.io.File;
  +import java.io.IOException;
  +import java.io.Reader;
  +import java.util.ArrayList;
  +import java.util.List;
  +
  +/**
  + * Handles Tar files.
  + *
  + * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
  + */
  +public class TARHandler extends NestedFileContentHandlerAdapter
  +{
  +    static Category cat = Category.getInstance(TARHandler.class.getName());
  +
  +    public TARHandler(File file)
  +    {
  +        super(file);
  +    }
  +
  +    public Reader getReader()
  +    {
  +        return null;
  +    }
  +
  +    public boolean fileContentIsReadable()
  +    {
  +        return false;
  +    }
  +
  +    public List getNestedDataSource()
  +    {
  +        if (!file.exists())
  +            return null;
  +        if (nestedDataSource == null)
  +        {
  +            nestedDataSource = new ArrayList();
  +        }
  +        try
  +        {
  +            File tempDir = new File(TEMP_FOLDER);
  +            tempDir.deleteOnExit();
  +            IOUtils.extractTar(file, tempDir);
  +            indexTarDirectory(tempDir);
  +        }
  +        catch (IOException ioe)
  +        {
  +            cat.error(ioe.getMessage(), ioe);
  +        }
  +        return nestedDataSource;
  +    }
  +
  +    private void indexTarDirectory(File dir)
  +    {
  +        if (dir.isDirectory())
  +        {
  +            File[] dirContents = dir.listFiles();
  +            for (int i = 0; i < dirContents.length; i++)
  +            {
  +                indexTarDirectory(dirContents[i]);
  +            }
  +        }
  +        else if (dir.isFile())
  +        {
  +            // here create new DataMap for the tarred file
  +            DataSource ds = new FSDataSource(dir);
  +            nestedDataSource.add(nestedDataSource);
  +        }
  +    }
   }
  
  
  
  1.2       +116 -121  jakarta-lucene-sandbox/projects/appex/src/java/search/contenthandler/TextHandler.java
  
  Index: TextHandler.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/projects/appex/src/java/search/contenthandler/TextHandler.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- TextHandler.java	4 May 2002 15:43:46 -0000	1.1
  +++ TextHandler.java	8 May 2002 15:52:37 -0000	1.2
  @@ -1,121 +1,116 @@
  -package search.contenthandler;
  -
  -/* ====================================================================
  - * The Apache Software License, Version 1.1
  - *
  - * Copyright (c) 2001 The Apache Software Foundation.  All rights
  - * reserved.
  - *
  - * Redistribution and use in source and binary forms, with or without
  - * modification, are permitted provided that the following conditions
  - * are met:
  - *
  - * 1. Redistributions of source code must retain the above copyright
  - *    notice, this list of conditions and the following disclaimer.
  - *
  - * 2. Redistributions in binary form must reproduce the above copyright
  - *    notice, this list of conditions and the following disclaimer in
  - *    the documentation and/or other materials provided with the
  - *    distribution.
  - *
  - * 3. The end-user documentation included with the redistribution,
  - *    if any, must include the following acknowledgment:
  - *       "This product includes software developed by the
  - *        Apache Software Foundation (http://www.apache.org/)."
  - *    Alternately, this acknowledgment may appear in the software itself,
  - *    if and wherever such third-party acknowledgments normally appear.
  - *
  - * 4. The names "Apache" and "Apache Software Foundation" and
  - *    "Apache Turbine" must not be used to endorse or promote products
  - *    derived from this software without prior written permission. For
  - *    written permission, please contact apache@apache.org.
  - *
  - * 5. Products derived from this software may not be called "Apache",
  - *    "Apache Turbine", nor may "Apache" appear in their name, without
  - *    prior written permission of the Apache Software Foundation.
  - *
  - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  - * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  - * SUCH DAMAGE.
  - * ====================================================================
  - *
  - * This software consists of voluntary contributions made by many
  - * individuals on behalf of the Apache Software Foundation.  For more
  - * information on the Apache Software Foundation, please see
  - * <http://www.apache.org/>.
  - */
  -
  -import org.apache.log4j.Category;
  -import org.apache.lucene.document.Document;
  -import org.apache.lucene.document.Field;
  -
  -import java.io.*;
  -
  -import search.util.StringUtils;
  -
  -/**
  - * Handles text-based content.
  - *
  - * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
  - */
  -public class TextHandler extends FileContentHandlerAdapter
  -{
  -    static Category cat = Category.getInstance(TextHandler.class.getName());
  -
  -    public void parse(Document doc, File f)
  -    {
  -        if (!f.exists())
  -        {
  -            cat.error(f.toString() + " doesn't exist! Failing silently...");
  -            return;
  -        }
  -        doc.add(Field.Text("fileContents", getReader(f)));
  -    }
  -
  -    public boolean isNested()
  -    {
  -        return false;
  -    }
  -
  -    private Reader getReader(File f)
  -    {
  -        Reader reader = null;
  -        try
  -        {
  -            BufferedReader br = new BufferedReader(new FileReader(f));
  -            String s = null;
  -            StringBuffer strbf = new StringBuffer();
  -            while ((s = br.readLine()) != null)
  -            {
  -                if (s.trim().length() > 0)
  -                {
  -                    strbf.append(StringUtils.removeUnreadableCharacters(s));
  -                }
  -            }
  -            reader = new StringReader(strbf.toString());
  -        }
  -        catch (FileNotFoundException nfe)
  -        {
  -            cat.error("File Not Found Exception:" + f.toString(), nfe);
  -        }
  -        catch (IOException ioe)
  -        {
  -            cat.error(ioe.getMessage(), ioe);
  -        }
  -        return reader;
  -    }
  -
  -    public Object clone()
  -    {
  -        return new TextHandler();
  -    }
  -}
  +package search.contenthandler;
  +
  +/* ====================================================================
  + * The Apache Software License, Version 1.1
  + *
  + * Copyright (c) 2001 The Apache Software Foundation.  All rights
  + * reserved.
  + *
  + * Redistribution and use in source and binary forms, with or without
  + * modification, are permitted provided that the following conditions
  + * are met:
  + *
  + * 1. Redistributions of source code must retain the above copyright
  + *    notice, this list of conditions and the following disclaimer.
  + *
  + * 2. Redistributions in binary form must reproduce the above copyright
  + *    notice, this list of conditions and the following disclaimer in
  + *    the documentation and/or other materials provided with the
  + *    distribution.
  + *
  + * 3. The end-user documentation included with the redistribution,
  + *    if any, must include the following acknowledgment:
  + *       "This product includes software developed by the
  + *        Apache Software Foundation (http://www.apache.org/)."
  + *    Alternately, this acknowledgment may appear in the software itself,
  + *    if and wherever such third-party acknowledgments normally appear.
  + *
  + * 4. The names "Apache" and "Apache Software Foundation" and
  + *    "Apache Lucene" must not be used to endorse or promote products
  + *    derived from this software without prior written permission. For
  + *    written permission, please contact apache@apache.org.
  + *
  + * 5. Products derived from this software may not be called "Apache",
  + *    "Apache Lucene", nor may "Apache" appear in their name, without
  + *    prior written permission of the Apache Software Foundation.
  + *
  + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  + * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  + * SUCH DAMAGE.
  + * ====================================================================
  + *
  + * This software consists of voluntary contributions made by many
  + * individuals on behalf of the Apache Software Foundation.  For more
  + * information on the Apache Software Foundation, please see
  + * <http://www.apache.org/>.
  + */
  +
  +import org.apache.log4j.Category;
  +import org.apache.lucene.document.Document;
  +import org.apache.lucene.document.Field;
  +
  +import java.io.*;
  +
  +import search.util.StringUtils;
  +
  +/**
  + * Handles text-based content.
  + *
  + * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
  + */
  +public class TextHandler extends FileContentHandlerAdapter
  +{
  +    static Category cat = Category.getInstance(TextHandler.class.getName());
  +
  +    public TextHandler(File file)
  +    {
  +        super(file);
  +    }
  +
  +    public Reader getReader()
  +    {
  +        if (!file.exists())
  +        {
  +            cat.error(file.toString() + " doesn't exist! Failing silently...");
  +            return null;
  +        }
  +        return getReader(file);
  +    }
  +
  +    public boolean containsNestedData()
  +    {
  +        return false;
  +    }
  +
  +    public boolean fileContentIsReadable()
  +    {
  +        return true;
  +    }
  +
  +    private Reader getReader(File f)
  +    {
  +        Reader reader = null;
  +        try
  +        {
  +            reader = new FileReader(f);
  +        }
  +        catch (FileNotFoundException nfe)
  +        {
  +            cat.error("File Not Found Exception:" + f.toString(), nfe);
  +        }
  +        catch (IOException ioe)
  +        {
  +            cat.error(ioe.getMessage(), ioe);
  +        }
  +        return reader;
  +    }
  +}
  
  
  
  1.2       +131 -115  jakarta-lucene-sandbox/projects/appex/src/java/search/contenthandler/ZIPHandler.java
  
  Index: ZIPHandler.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/projects/appex/src/java/search/contenthandler/ZIPHandler.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- ZIPHandler.java	4 May 2002 15:43:46 -0000	1.1
  +++ ZIPHandler.java	8 May 2002 15:52:37 -0000	1.2
  @@ -1,116 +1,132 @@
  -package search.contenthandler;
  -
  -/* ====================================================================
  - * The Apache Software License, Version 1.1
  - *
  - * Copyright (c) 2001 The Apache Software Foundation.  All rights
  - * reserved.
  - *
  - * Redistribution and use in source and binary forms, with or without
  - * modification, are permitted provided that the following conditions
  - * are met:
  - *
  - * 1. Redistributions of source code must retain the above copyright
  - *    notice, this list of conditions and the following disclaimer.
  - *
  - * 2. Redistributions in binary form must reproduce the above copyright
  - *    notice, this list of conditions and the following disclaimer in
  - *    the documentation and/or other materials provided with the
  - *    distribution.
  - *
  - * 3. The end-user documentation included with the redistribution,
  - *    if any, must include the following acknowledgment:
  - *       "This product includes software developed by the
  - *        Apache Software Foundation (http://www.apache.org/)."
  - *    Alternately, this acknowledgment may appear in the software itself,
  - *    if and wherever such third-party acknowledgments normally appear.
  - *
  - * 4. The names "Apache" and "Apache Software Foundation" and
  - *    "Apache Turbine" must not be used to endorse or promote products
  - *    derived from this software without prior written permission. For
  - *    written permission, please contact apache@apache.org.
  - *
  - * 5. Products derived from this software may not be called "Apache",
  - *    "Apache Turbine", nor may "Apache" appear in their name, without
  - *    prior written permission of the Apache Software Foundation.
  - *
  - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  - * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  - * SUCH DAMAGE.
  - * ====================================================================
  - *
  - * This software consists of voluntary contributions made by many
  - * individuals on behalf of the Apache Software Foundation.  For more
  - * information on the Apache Software Foundation, please see
  - * <http://www.apache.org/>.
  - */
  -
  -import search.util.IOUtils;
  -import org.apache.log4j.Category;
  -import org.apache.lucene.document.Document;
  -
  -import java.io.File;
  -import java.io.IOException;
  -import java.util.Enumeration;
  -import java.util.HashMap;
  -import java.util.Map;
  -import java.util.zip.ZipEntry;
  -import java.util.zip.ZipException;
  -import java.util.zip.ZipFile;
  -
  -/**
  - * Handles Zip files.
  - *
  - * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
  - */
  -public class ZIPHandler extends NestedFileContentHandlerAdapter
  -{
  -    static Category cat = Category.getInstance(ZIPHandler.class.getName());
  -
  -    public void parse(Document doc, File f)
  -    {
  -        if (!f.exists())
  -            return;
  -        try
  -        {
  -            ZipFile zFile = new ZipFile(f);
  -            for (Enumeration e = zFile.entries(); e.hasMoreElements();)
  -            {
  -                ZipEntry entry = (ZipEntry) e.nextElement();
  -                String entryName = entry.getName();
  -                IOUtils.writeToTempFile(zFile.getInputStream(entry),
  -                                        TEMP_FOLDER + entryName);
  -                if (!entry.isDirectory())
  -                {
  -                    // create a new DataMap for each zip entry
  -                    Map dataMap = new HashMap();
  -                    dataMap.put("filePath", TEMP_FOLDER + entryName);
  -                    dataMapList.add(dataMap);
  -                }
  -            }
  -            zFile.close();
  -        }
  -        catch (ZipException ze)
  -        {
  -            cat.error("ZipException parsing zip:" + ze.getMessage(), ze);
  -        }
  -        catch (IOException ioe)
  -        {
  -            cat.error("IOException parsing zip:" + ioe.getMessage(), ioe);
  -        }
  -    }
  -
  -    public Object clone()
  -    {
  -        return new ZIPHandler();
  -    }
  +package search.contenthandler;
  +
  +/* ====================================================================
  + * The Apache Software License, Version 1.1
  + *
  + * Copyright (c) 2001 The Apache Software Foundation.  All rights
  + * reserved.
  + *
  + * Redistribution and use in source and binary forms, with or without
  + * modification, are permitted provided that the following conditions
  + * are met:
  + *
  + * 1. Redistributions of source code must retain the above copyright
  + *    notice, this list of conditions and the following disclaimer.
  + *
  + * 2. Redistributions in binary form must reproduce the above copyright
  + *    notice, this list of conditions and the following disclaimer in
  + *    the documentation and/or other materials provided with the
  + *    distribution.
  + *
  + * 3. The end-user documentation included with the redistribution,
  + *    if any, must include the following acknowledgment:
  + *       "This product includes software developed by the
  + *        Apache Software Foundation (http://www.apache.org/)."
  + *    Alternately, this acknowledgment may appear in the software itself,
  + *    if and wherever such third-party acknowledgments normally appear.
  + *
  + * 4. The names "Apache" and "Apache Software Foundation" and
  + *    "Apache Lucene" must not be used to endorse or promote products
  + *    derived from this software without prior written permission. For
  + *    written permission, please contact apache@apache.org.
  + *
  + * 5. Products derived from this software may not be called "Apache",
  + *    "Apache Lucene", nor may "Apache" appear in their name, without
  + *    prior written permission of the Apache Software Foundation.
  + *
  + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  + * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  + * SUCH DAMAGE.
  + * ====================================================================
  + *
  + * This software consists of voluntary contributions made by many
  + * individuals on behalf of the Apache Software Foundation.  For more
  + * information on the Apache Software Foundation, please see
  + * <http://www.apache.org/>.
  + */
  +
  +import org.apache.log4j.Category;
  +import search.DataSource;
  +import search.FSDataSource;
  +import search.util.IOUtils;
  +
  +import java.io.File;
  +import java.io.IOException;
  +import java.io.Reader;
  +import java.util.ArrayList;
  +import java.util.Enumeration;
  +import java.util.List;
  +import java.util.zip.ZipEntry;
  +import java.util.zip.ZipException;
  +import java.util.zip.ZipFile;
  +
  +/**
  + * Handles Zip files.
  + *
  + * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
  + */
  +public class ZIPHandler extends NestedFileContentHandlerAdapter
  +{
  +    private static Category cat = Category.getInstance(ZIPHandler.class);
  +
  +    public ZIPHandler(File file)
  +    {
  +        super(file);
  +    }
  +
  +    public boolean fileContentIsReadable()
  +    {
  +        return false;
  +    }
  +
  +    public Reader getReader()
  +    {
  +        return null;
  +    }
  +
  +    public List getNestedDataSource()
  +    {
  +        if (!file.exists())
  +            return null;
  +        if (nestedDataSource == null)
  +        {
  +            nestedDataSource = new ArrayList();
  +        }
  +        try
  +        {
  +            ZipFile zFile = new ZipFile(file);
  +            for (Enumeration e = zFile.entries(); e.hasMoreElements();)
  +            {
  +                ZipEntry entry = (ZipEntry) e.nextElement();
  +                String entryName = entry.getName();
  +                IOUtils.writeToTempFile(zFile.getInputStream(entry),
  +                                        TEMP_FOLDER + entryName);
  +                if (!entry.isDirectory())
  +                {
  +                    // create a new DataMap for each zip entry
  +                    DataSource ds = new FSDataSource(TEMP_FOLDER + entryName);
  +                    nestedDataSource.add(ds);
  +                }
  +            }
  +            zFile.close();
  +        }
  +        catch (ZipException ze)
  +        {
  +            cat.error("ZipException parsing zip:" + ze.getMessage(), ze);
  +        }
  +        catch (IOException ioe)
  +        {
  +            cat.error("IOException parsing zip:" + ioe.getMessage(), ioe);
  +        }
  +        return nestedDataSource;
  +    }
   }
  
  
  

--
To unsubscribe, e-mail:   <mailto:lucene-dev-unsubscribe@jakarta.apache.org>
For additional commands, e-mail: <mailto:lucene-dev-help@jakarta.apache.org>


Mime
View raw message