cocoon-cvs mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From vgritse...@apache.org
Subject cvs commit: xml-cocoon2/src/java/org/apache/cocoon/components/search LuceneCocoonHelper.java LuceneCocoonIndexer.java LuceneCocoonPager.java LuceneIndexContentHandler.java LuceneXMLIndexer.java SimpleLuceneCocoonIndexerImpl.java SimpleLuceneCocoonSearcherImpl.java SimpleLuceneXMLIndexerImpl.java
Date Wed, 23 Jan 2002 19:06:39 GMT
vgritsenko    02/01/23 11:06:39

  Modified:    src/java/org/apache/cocoon/components/crawler
                        CocoonCrawler.java SimpleCocoonCrawlerImpl.java
               src/java/org/apache/cocoon/components/search
                        LuceneCocoonHelper.java LuceneCocoonIndexer.java
                        LuceneCocoonPager.java
                        LuceneIndexContentHandler.java
                        LuceneXMLIndexer.java
                        SimpleLuceneCocoonIndexerImpl.java
                        SimpleLuceneCocoonSearcherImpl.java
                        SimpleLuceneXMLIndexerImpl.java
  Log:
   - Add getCountOfHits() to the pager to fill in the gap;
   - Organize imports
   - Use '&' instead of '?' when requesting an URI with parameters (patch idea by Colin
Britton [cbritton@metatomix.com])
  
  Revision  Changes    Path
  1.2       +4 -3      xml-cocoon2/src/java/org/apache/cocoon/components/crawler/CocoonCrawler.java
  
  Index: CocoonCrawler.java
  ===================================================================
  RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/crawler/CocoonCrawler.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- CocoonCrawler.java	3 Jan 2002 12:31:09 -0000	1.1
  +++ CocoonCrawler.java	23 Jan 2002 19:06:38 -0000	1.2
  @@ -6,17 +6,18 @@
    * the LICENSE file.                                                         *
    */
   package org.apache.cocoon.components.crawler;
  -import java.net.*;
   
  -import java.util.*;
   
   import org.apache.avalon.framework.component.Component;
   
  +import java.net.URL;
  +import java.util.Iterator;
  +
   /**
    * The avalon behavioural component interface of crawling.
    *
    * @author     <a href="mailto:berni_huber@a1.net">Bernhard Huber</a>
  - * @version    CVS $Id: CocoonCrawler.java,v 1.1 2002/01/03 12:31:09 giacomo Exp $
  + * @version    CVS $Id: CocoonCrawler.java,v 1.2 2002/01/23 19:06:38 vgritsenko Exp $
    */
   public interface CocoonCrawler extends Component
   {
  
  
  
  1.2       +19 -10    xml-cocoon2/src/java/org/apache/cocoon/components/crawler/SimpleCocoonCrawlerImpl.java
  
  Index: SimpleCocoonCrawlerImpl.java
  ===================================================================
  RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/crawler/SimpleCocoonCrawlerImpl.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- SimpleCocoonCrawlerImpl.java	3 Jan 2002 12:31:09 -0000	1.1
  +++ SimpleCocoonCrawlerImpl.java	23 Jan 2002 19:06:38 -0000	1.2
  @@ -7,13 +7,7 @@
    */
   package org.apache.cocoon.components.crawler;
   
  -import java.io.*;
  -import java.net.*;
  -import java.util.*;
  -
  -import org.apache.avalon.excalibur.pool.Recyclable;
   import org.apache.avalon.framework.activity.Disposable;
  -
   import org.apache.avalon.framework.configuration.Configurable;
   import org.apache.avalon.framework.configuration.Configuration;
   import org.apache.avalon.framework.configuration.ConfigurationException;
  @@ -21,6 +15,8 @@
   import org.apache.avalon.framework.parameters.Parameters;
   import org.apache.avalon.framework.thread.ThreadSafe;
   
  +import org.apache.avalon.excalibur.pool.Recyclable;
  +
   import org.apache.cocoon.Constants;
   import org.apache.cocoon.util.Tokenizer;
   
  @@ -29,11 +25,22 @@
   import org.apache.regexp.RE;
   import org.apache.regexp.RESyntaxException;
   
  +import java.io.InputStream;
  +import java.io.BufferedReader;
  +import java.io.InputStreamReader;
  +import java.io.IOException;
  +import java.net.URL;
  +import java.net.URLConnection;
  +import java.util.HashSet;
  +import java.util.Iterator;
  +import java.util.List;
  +import java.util.ArrayList;
  +
   /**
    * A simple cocoon crawler.
    *
    * @author     <a href="mailto:berni_huber@a1.net">Bernhard Huber</a>
  - * @version    CVS $Id: SimpleCocoonCrawlerImpl.java,v 1.1 2002/01/03 12:31:09 giacomo
Exp $
  + * @version    CVS $Id: SimpleCocoonCrawlerImpl.java,v 1.2 2002/01/23 19:06:38 vgritsenko
Exp $
    */
   public class SimpleCocoonCrawlerImpl extends AbstractLoggable
            implements CocoonCrawler, Configurable, Disposable, Recyclable
  @@ -77,7 +84,7 @@
        *
        * @since
        */
  -    public final static String LINK_VIEW_QUERY_DEFAULT = "?cocoon-view=links";
  +    public final static String LINK_VIEW_QUERY_DEFAULT = "cocoon-view=links";
   
       /**
        * Config element name specifying excluding regular expression pattern.
  @@ -416,7 +423,9 @@
   
           // get links of url
           try {
  -            URL links_url = new URL(url, url.getPath() + linkViewQuery);
  +            URL links_url = new URL(url, url.getPath()
  +                + ((url.getPath().indexOf("?") == -1) ? "?" : "&") 
  +                + linkViewQuery);
               URLConnection links_url_connection = links_url.openConnection();
               InputStream is = links_url_connection.getInputStream();
               BufferedReader br = new BufferedReader(new InputStreamReader(is));
  @@ -546,7 +555,7 @@
        * </p>
        *
        * @author     <a href="mailto:berni_huber@a1.net>Bernhard Huber</a>
  -     * @version    $Id: SimpleCocoonCrawlerImpl.java,v 1.1 2002/01/03 12:31:09 giacomo
Exp $
  +     * @version    $Id: SimpleCocoonCrawlerImpl.java,v 1.2 2002/01/23 19:06:38 vgritsenko
Exp $
        */
       public static class CocoonCrawlerIterator implements Iterator
       {
  
  
  
  1.2       +6 -9      xml-cocoon2/src/java/org/apache/cocoon/components/search/LuceneCocoonHelper.java
  
  Index: LuceneCocoonHelper.java
  ===================================================================
  RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/search/LuceneCocoonHelper.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- LuceneCocoonHelper.java	3 Jan 2002 12:31:13 -0000	1.1
  +++ LuceneCocoonHelper.java	23 Jan 2002 19:06:38 -0000	1.2
  @@ -9,20 +9,21 @@
   
   import java.io.File;
   import java.io.IOException;
  -import org.apache.lucene.analysis.Analyzer;
  -import org.apache.lucene.index.*;
   
  -import org.apache.lucene.store.*;
  +import org.apache.lucene.analysis.Analyzer;
  +import org.apache.lucene.store.FSDirectory;
  +import org.apache.lucene.store.Directory;
  +import org.apache.lucene.index.IndexReader;
  +import org.apache.lucene.index.IndexWriter;
   
   /**
    * This class encapsulates some helper methods.
    *
    * @author     <a href="mailto:berni_huber@a1.net">Bernhard Huber</a>
  - * @version    CVS $Id: LuceneCocoonHelper.java,v 1.1 2002/01/03 12:31:13 giacomo Exp $
  + * @version    CVS $Id: LuceneCocoonHelper.java,v 1.2 2002/01/23 19:06:38 vgritsenko Exp
$
    */
   public class LuceneCocoonHelper
   {
  -
       /**
        *Gets the directory attribute of the LuceneCocoonHelper class
        *
  @@ -37,7 +38,6 @@
           return fsDirectory;
       }
   
  -
       /**
        *Gets the analyzer attribute of the LuceneCocoonHelper class
        *
  @@ -55,7 +55,6 @@
           return analyzer;
       }
   
  -
       /**
        *Gets the indexReader attribute of the LuceneCocoonHelper class
        *
  @@ -69,7 +68,6 @@
           return reader;
       }
   
  -
       /**
        *Gets the indexWriter attribute of the LuceneCocoonHelper class
        *
  @@ -84,6 +82,5 @@
           IndexWriter writer = new IndexWriter(index, analyzer, create);
           return writer;
       }
  -
   }
   
  
  
  
  1.2       +1 -3      xml-cocoon2/src/java/org/apache/cocoon/components/search/LuceneCocoonIndexer.java
  
  Index: LuceneCocoonIndexer.java
  ===================================================================
  RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/search/LuceneCocoonIndexer.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- LuceneCocoonIndexer.java	3 Jan 2002 12:31:13 -0000	1.1
  +++ LuceneCocoonIndexer.java	23 Jan 2002 19:06:38 -0000	1.2
  @@ -19,11 +19,10 @@
    * The avalon behavioural component interface of an indexer.
    *
    * @author     <a href="mailto:berni_huber@a1.net">Bernhard Huber</a>
  - * @version    CVS $Id: LuceneCocoonIndexer.java,v 1.1 2002/01/03 12:31:13 giacomo Exp
$
  + * @version    CVS $Id: LuceneCocoonIndexer.java,v 1.2 2002/01/23 19:06:38 vgritsenko Exp
$
    */
   public interface LuceneCocoonIndexer extends Component
   {
  -
       /**
        *Description of the Field
        *
  @@ -53,4 +52,3 @@
       void index(Directory index, boolean create, URL base_url)
                throws ProcessingException;
   }
  -
  
  
  
  1.2       +15 -8     xml-cocoon2/src/java/org/apache/cocoon/components/search/LuceneCocoonPager.java
  
  Index: LuceneCocoonPager.java
  ===================================================================
  RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/search/LuceneCocoonPager.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- LuceneCocoonPager.java	3 Jan 2002 12:31:13 -0000	1.1
  +++ LuceneCocoonPager.java	23 Jan 2002 19:06:38 -0000	1.2
  @@ -9,21 +9,19 @@
   
   import java.io.File;
   import java.io.IOException;
  +import java.util.ListIterator;
  +import java.util.ArrayList;
  +import java.util.NoSuchElementException;
   
  -import java.util.*;
   import org.apache.lucene.analysis.Analyzer;
   import org.apache.lucene.document.Document;
  -import org.apache.lucene.index.*;
   import org.apache.lucene.search.Hits;
   
  -import org.apache.lucene.store.*;
  -
  -// implementtion of ListIterator
   /**
    * This class should help you to manage paging of hits.
    *
    * @author     <a href="mailto:berni_huber@a1.net">Bernhard Huber</a>
  - * @version    CVS $Id: LuceneCocoonPager.java,v 1.1 2002/01/03 12:31:13 giacomo Exp $
  + * @version    CVS $Id: LuceneCocoonPager.java,v 1.2 2002/01/23 19:06:38 vgritsenko Exp
$
    */
   public class LuceneCocoonPager implements ListIterator
   {
  @@ -127,6 +125,16 @@
   
   
       /**
  +     * Get count of hits
  +     *
  +     * @return    The count of hits
  +     * @since
  +     */
  +    public int getCountOfHits() {
  +        return hits.length();
  +    }
  +
  +    /**
        * Get count of hits displayed per single page
        *
        * @return    The countOfHitsPerPage value
  @@ -136,7 +144,6 @@
           return this.countOfHitsPerPage;
       }
   
  -
       /**
        * Caluclate count of pages for displaying all hits
        *
  @@ -303,7 +310,7 @@
        * A helper class encapsulating found document, and its score
        *
        * @author     <a href="mailto:berni_huber@a1.net">Bernhard Huber</a>
  -     * @version    CVS $Id: LuceneCocoonPager.java,v 1.1 2002/01/03 12:31:13 giacomo Exp
$
  +     * @version    CVS $Id: LuceneCocoonPager.java,v 1.2 2002/01/23 19:06:38 vgritsenko
Exp $
        */
       public static class HitWrapper
       {
  
  
  
  1.2       +4 -6      xml-cocoon2/src/java/org/apache/cocoon/components/search/LuceneIndexContentHandler.java
  
  Index: LuceneIndexContentHandler.java
  ===================================================================
  RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/search/LuceneIndexContentHandler.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- LuceneIndexContentHandler.java	3 Jan 2002 12:31:13 -0000	1.1
  +++ LuceneIndexContentHandler.java	23 Jan 2002 19:06:38 -0000	1.2
  @@ -6,17 +6,17 @@
    *  the LICENSE file.                                                         *
    */
   package org.apache.cocoon.components.search;
  +
   import java.util.ArrayList;
   import java.util.Iterator;
   import java.util.List;
  -
   import java.util.Stack;
  -import org.apache.lucene.document.DateField;
   
  +import org.apache.lucene.document.DateField;
   import org.apache.lucene.document.Document;
   import org.apache.lucene.document.Field;
  -import org.xml.sax.Attributes;
   
  +import org.xml.sax.Attributes;
   import org.xml.sax.ContentHandler;
   import org.xml.sax.InputSource;
   import org.xml.sax.Locator;
  @@ -27,7 +27,7 @@
    * Parse XML and generate lucene document(s)
    *
    * @author     <a href="mailto:berni_huber@a1.net">Bernhard Huber</a>
  - * @version    CVS $Id: LuceneIndexContentHandler.java,v 1.1 2002/01/03 12:31:13 giacomo
Exp $
  + * @version    CVS $Id: LuceneIndexContentHandler.java,v 1.2 2002/01/23 19:06:38 vgritsenko
Exp $
    */
   public class LuceneIndexContentHandler implements ContentHandler
   {
  @@ -222,6 +222,4 @@
        * @since
        */
       public void startPrefixMapping(String prefix, String uri) { }
  -
   }
  -
  
  
  
  1.2       +3 -2      xml-cocoon2/src/java/org/apache/cocoon/components/search/LuceneXMLIndexer.java
  
  Index: LuceneXMLIndexer.java
  ===================================================================
  RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/search/LuceneXMLIndexer.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- LuceneXMLIndexer.java	3 Jan 2002 12:31:13 -0000	1.1
  +++ LuceneXMLIndexer.java	23 Jan 2002 19:06:38 -0000	1.2
  @@ -6,11 +6,13 @@
    * the LICENSE file.                                                         *
    */
   package org.apache.cocoon.components.search;
  +
   import java.net.URL;
   import java.util.Iterator;
   import java.util.List;
   
   import org.apache.avalon.framework.component.Component;
  +
   import org.apache.cocoon.ProcessingException;
   
   /**
  @@ -31,7 +33,7 @@
    * </p>
    *
    * @author     <a href="mailto:berni_huber@a1.net">Bernhard Huber</a>
  - * @version    CVS $Id: LuceneXMLIndexer.java,v 1.1 2002/01/03 12:31:13 giacomo Exp $
  + * @version    CVS $Id: LuceneXMLIndexer.java,v 1.2 2002/01/23 19:06:38 vgritsenko Exp
$
    */
   public interface LuceneXMLIndexer extends Component
   {
  @@ -129,4 +131,3 @@
        */
       void build(URL url) throws ProcessingException;
   }
  -
  
  
  
  1.2       +4 -2      xml-cocoon2/src/java/org/apache/cocoon/components/search/SimpleLuceneCocoonIndexerImpl.java
  
  Index: SimpleLuceneCocoonIndexerImpl.java
  ===================================================================
  RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/search/SimpleLuceneCocoonIndexerImpl.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- SimpleLuceneCocoonIndexerImpl.java	3 Jan 2002 12:31:13 -0000	1.1
  +++ SimpleLuceneCocoonIndexerImpl.java	23 Jan 2002 19:06:38 -0000	1.2
  @@ -46,7 +46,7 @@
    * </p>
    *
    * @author     <a href="mailto:berni_huber@a1.net">Bernhard Huber</a>
  - * @version    CVS $Revision: 1.1 $ $Date: 2002/01/03 12:31:13 $
  + * @version    CVS $Revision: 1.2 $ $Date: 2002/01/23 19:06:38 $
    */
   public class SimpleLuceneCocoonIndexerImpl extends AbstractLoggable
            implements LuceneCocoonIndexer, Configurable, Composable, Disposable
  @@ -198,8 +198,10 @@
   
                       // skip urls using different host, or port than host,
                       // or port of base url
  -                    System.out.println("Skipping carwling URL " + crawl_url.toString()
+
  +                    if (getLogger().isDebugEnabled()) {
  +                        getLogger().debug("Skipping crawling URL " + crawl_url.toString()
+
                               " as base_url is " + base_url.toString());
  +                    }
                       continue;
                   }
   
  
  
  
  1.2       +3 -6      xml-cocoon2/src/java/org/apache/cocoon/components/search/SimpleLuceneCocoonSearcherImpl.java
  
  Index: SimpleLuceneCocoonSearcherImpl.java
  ===================================================================
  RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/search/SimpleLuceneCocoonSearcherImpl.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- SimpleLuceneCocoonSearcherImpl.java	3 Jan 2002 12:31:13 -0000	1.1
  +++ SimpleLuceneCocoonSearcherImpl.java	23 Jan 2002 19:06:38 -0000	1.2
  @@ -14,22 +14,19 @@
   
   import org.apache.avalon.excalibur.pool.Recyclable;
   import org.apache.avalon.framework.activity.Disposable;
  -
   import org.apache.avalon.framework.component.ComponentException;
   import org.apache.avalon.framework.component.ComponentManager;
   import org.apache.avalon.framework.component.Composable;
  -
   import org.apache.avalon.framework.configuration.Configurable;
   import org.apache.avalon.framework.configuration.Configuration;
   import org.apache.avalon.framework.configuration.ConfigurationException;
  -
   import org.apache.avalon.framework.logger.AbstractLoggable;
   
   import org.apache.cocoon.ProcessingException;
   import org.apache.cocoon.util.ClassUtils;
  +
   import org.apache.lucene.analysis.Analyzer;
   import org.apache.lucene.document.DateField;
  -
   import org.apache.lucene.document.Document;
   import org.apache.lucene.document.Field;
   import org.apache.lucene.index.*;
  @@ -57,7 +54,7 @@
    * </p>
    *
    * @author     <a href="mailto:berni_huber@a1.net">Bernhard Huber</a>
  - * @version    CVS $Revision: 1.1 $ $Date: 2002/01/03 12:31:13 $
  + * @version    CVS $Revision: 1.2 $ $Date: 2002/01/23 19:06:38 $
    */
   public class SimpleLuceneCocoonSearcherImpl extends AbstractLoggable
            implements LuceneCocoonSearcher, Configurable, Composable, Disposable, Recyclable
  @@ -218,8 +215,8 @@
           this.directory = directory;
           if (indexReaderCache != null) {
               indexReaderCache.close();
  +            indexReaderCache = null;
           }
  -        indexReaderCache = null;
       }
   
   
  
  
  
  1.2       +5 -3      xml-cocoon2/src/java/org/apache/cocoon/components/search/SimpleLuceneXMLIndexerImpl.java
  
  Index: SimpleLuceneXMLIndexerImpl.java
  ===================================================================
  RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/search/SimpleLuceneXMLIndexerImpl.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- SimpleLuceneXMLIndexerImpl.java	3 Jan 2002 12:31:13 -0000	1.1
  +++ SimpleLuceneXMLIndexerImpl.java	23 Jan 2002 19:06:38 -0000	1.2
  @@ -54,7 +54,7 @@
    * A simple class building lucene documents from xml content.
    *
    * @author     <a href="mailto:berni_huber@a1.net">Bernhard Huber</a>
  - * @version    CVS $Revision: 1.1 $ $Date: 2002/01/03 12:31:13 $
  + * @version    CVS $Revision: 1.2 $ $Date: 2002/01/23 19:06:38 $
    */
   public class SimpleLuceneXMLIndexerImpl extends AbstractLoggable
            implements LuceneXMLIndexer, Configurable, Composable
  @@ -80,7 +80,7 @@
        *
        * @since
        */
  -    final String CONTENT_QUERY = "?cocoon-view=content";
  +    final String CONTENT_QUERY = "cocoon-view=content";
   
       /**
        * set of allowed content types
  @@ -163,7 +163,9 @@
                throws ProcessingException {
   
           try {
  -            URL contentURL = new URL(url, url.getPath() + CONTENT_QUERY);
  +            URL contentURL = new URL(url, url.getPath()
  +                + ((url.getPath().indexOf("?") == -1) ? "?" : "&")
  +                + CONTENT_QUERY);
               URLConnection contentURLConnection = contentURL.openConnection();
               String contentType = contentURLConnection.getContentType();
               if (contentType != null &&
  
  
  

----------------------------------------------------------------------
In case of troubles, e-mail:     webmaster@xml.apache.org
To unsubscribe, e-mail:          cocoon-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: cocoon-cvs-help@xml.apache.org


Mime
View raw message