cocoon-cvs mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From fel...@apache.org
Subject svn commit: r554234 [2/2] - in /cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src: changes/ main/java/org/apache/cocoon/components/search/ main/java/org/apache/cocoon/generation/ main/java/org/apache/cocoon/samples/ main/resources/META-INF/cocoo...
Date Sat, 07 Jul 2007 18:08:57 GMT
Added: cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/java/org/apache/cocoon/samples/LuceneCocoonCrawler.java
URL: http://svn.apache.org/viewvc/cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/java/org/apache/cocoon/samples/LuceneCocoonCrawler.java?view=auto&rev=554234
==============================================================================
--- cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/java/org/apache/cocoon/samples/LuceneCocoonCrawler.java
(added)
+++ cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/java/org/apache/cocoon/samples/LuceneCocoonCrawler.java
Sat Jul  7 11:08:56 2007
@@ -0,0 +1,697 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cocoon.samples;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.net.URL;
+import java.net.URLConnection;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.avalon.excalibur.pool.Recyclable;
+import org.apache.avalon.framework.activity.Disposable;
+import org.apache.avalon.framework.configuration.Configuration;
+import org.apache.avalon.framework.configuration.ConfigurationException;
+import org.apache.cocoon.Constants;
+import org.apache.cocoon.components.crawler.CocoonCrawler;
+import org.apache.cocoon.util.AbstractLogEnabled;
+import org.apache.commons.lang.StringUtils;
+import org.apache.regexp.RE;
+import org.apache.regexp.RESyntaxException;
+import org.springframework.beans.factory.InitializingBean;
+
+/**
+ * A simple cocoon crawler.
+ * 
+ * @version $Id: SimpleCocoonCrawlerImpl.java 448473 2006-09-21 06:22:51Z
+ *          crossley $
+ */
+public class LuceneCocoonCrawler extends AbstractLogEnabled implements CocoonCrawler, InitializingBean,
Disposable,
+        Recyclable {
+
+    /**
+     * Config element name specifying expected link content-typ.
+     * <p>
+     * Its value is <code>link-content-type</code>.
+     * </p>
+     */
+    public final static String LINK_CONTENT_TYPE_CONFIG = "link-content-type";
+
+    /**
+     * Default value of <code>link-content-type</code> configuration value.
+     * <p>
+     * Its value is <code>application/x-cocoon-links</code>.
+     * </p>
+     */
+    public final String LINK_CONTENT_TYPE_DEFAULT = Constants.LINK_CONTENT_TYPE;
+
+    /**
+     * Config element name specifying query-string appendend for requesting
+     * links of an URL.
+     * <p>
+     * Its value is <code>link-view-query</code>.
+     * </p>
+     */
+    public final static String LINK_VIEW_QUERY_CONFIG = "link-view-query";
+
+    /**
+     * Default value of <code>link-view-query</code> configuration option.
+     * <p>
+     * Its value is <code>?cocoon-view=links</code>.
+     * </p>
+     */
+    public final static String LINK_VIEW_QUERY_DEFAULT = "cocoon-view=links";
+
+    /**
+     * Config element name specifying excluding regular expression pattern.
+     * <p>
+     * Its value is <code>exclude</code>.
+     * </p>
+     */
+    public final static String EXCLUDE_CONFIG = "exclude";
+
+    /**
+     * Config element name specifying including regular expression pattern.
+     * <p>
+     * Its value is <code>include</code>.
+     * </p>
+     */
+    public final static String INCLUDE_CONFIG = "include";
+
+    /**
+     * Config element name specifying http header value for user-Agent.
+     * <p>
+     * Its value is <code>user-agent</code>.
+     * </p>
+     */
+    public final static String USER_AGENT_CONFIG = "user-agent";
+
+    /**
+     * Default value of <code>user-agent</code> configuration option.
+     * 
+     * @see Constants#COMPLETE_NAME
+     */
+    public final static String USER_AGENT_DEFAULT = Constants.COMPLETE_NAME;
+
+    /**
+     * Config element name specifying http header value for accept.
+     * <p>
+     * Its value is <code>accept</code>.
+     * </p>
+     */
+    public final static String ACCEPT_CONFIG = "accept";
+
+    /**
+     * Default value of <code>accept</code> configuration option.
+     * <p>
+     * Its value is <code>* / *</code>
+     * </p>
+     */
+    public final static String ACCEPT_DEFAULT = "*/*";
+
+    private String linkViewQuery = LINK_VIEW_QUERY_DEFAULT;
+    private String linkContentType = LINK_CONTENT_TYPE_DEFAULT;
+    private HashSet excludeCrawlingURL;
+    private HashSet includeCrawlingURL;
+    private String userAgent = USER_AGENT_DEFAULT;
+    private String accept = ACCEPT_DEFAULT;
+    private HashSet crawled;
+
+    private List includes;
+    private List excludes;
+
+    protected int depth;
+    protected HashSet urlsToProcess;
+    protected HashSet urlsNextDepth;
+
+    /**
+     * Constructor for the LuceneCocoonCrawler object
+     */
+    public LuceneCocoonCrawler() {
+        // by default include everything
+        includeCrawlingURL = null;
+        // by default exclude common image patterns
+        excludeCrawlingURL = null;
+    }
+
+    /**
+     * Configure the crawler component.
+     * <p>
+     * Configure can specify which URI to include, and which URI to exclude from
+     * crawling. You specify the patterns as regular expressions.
+     * </p>
+     * <p>
+     * Morover you can configure the required content-type of crawling request,
+     * and the query-string appended to each crawling request.
+     * </p>
+     * 
+     * <pre><tt>
+     * &lt;include&gt;.*\.html?&lt;/exclude&gt; or &lt;exclude&gt;.*\.html?,
.*\.xsp&lt;/exclude&gt;
+     * &lt;exclude&gt;.*\.gif&lt;/exclude&gt; or &lt;exclude&gt;.*\.gif,
.*\.jpe?g&lt;/exclude&gt;
+     * &lt;link-content-type&gt; application/x-cocoon-links &lt;/link-content-type&gt;
+     * &lt;link-view-query&gt; ?cocoon-view=links &lt;/link-view-query&gt;
+     * </tt></pre>
+     * 
+     * @param configuration
+     *            XML configuration of this avalon component.
+     * @exception ConfigurationException
+     *                is throwing if configuration is invalid.
+     */
+    public void afterPropertiesSet() throws IllegalArgumentException {
+
+        Configuration[] children;
+
+        if (includes != null) {
+            includeCrawlingURL = new HashSet();
+            for (int i = 0; i < includes.size(); i++) {
+                String pattern = (String) includes.get(i);
+                try {
+                    String params[] = StringUtils.split(pattern, ", ");
+                    for (int index = 0; index < params.length; index++) {
+                        String tokenized_pattern = params[index];
+                        this.includeCrawlingURL.add(new RE(tokenized_pattern));
+                    }
+                } catch (RESyntaxException rese) {
+                    getLogger().error("Cannot create including regular-expression for " +
pattern, rese);
+                }
+            }
+        } else {
+            if (getLogger().isDebugEnabled()) {
+                getLogger().debug("Include all URLs");
+            }
+        }
+
+        if (excludes != null) {
+            excludeCrawlingURL = new HashSet();
+            for (int i = 0; i < excludes.size(); i++) {
+                String pattern = (String) excludes.get(i);
+                try {
+                    String params[] = StringUtils.split(pattern, ", ");
+                    for (int index = 0; index < params.length; index++) {
+                        String tokenized_pattern = params[index];
+                        this.excludeCrawlingURL.add(new RE(tokenized_pattern));
+                    }
+                } catch (RESyntaxException rese) {
+                    getLogger().error("Cannot create excluding regular-expression for " +
pattern, rese);
+                }
+            }
+        } else {
+            excludeCrawlingURL = new HashSet();
+            setDefaultExcludeFromCrawling();
+            if (getLogger().isDebugEnabled()) {
+                getLogger().debug("Exclude default URLs only");
+            }
+        }
+    }
+
+    /**
+     * dispose at end of life cycle, releasing all resources.
+     */
+    public void dispose() {
+        crawled = null;
+        urlsToProcess = null;
+        urlsNextDepth = null;
+        excludeCrawlingURL = null;
+        includeCrawlingURL = null;
+    }
+
+    /**
+     * recylcle this object, relasing resources
+     */
+    public void recycle() {
+        crawled = null;
+        urlsToProcess = null;
+        urlsNextDepth = null;
+        depth = -1;
+    }
+
+    /**
+     * The same as calling crawl(url,-1);
+     * 
+     * @param url
+     *            Crawl this URL, getting all links from this URL.
+     */
+    public void crawl(URL url) {
+        crawl(url, -1);
+    }
+
+    /**
+     * Start crawling a URL.
+     * 
+     * <p>
+     * Use this method to start crawling. Get the this url, and all its children
+     * by using <code>iterator()</code>. The Iterator object will return URL
+     * objects.
+     * </p>
+     * <p>
+     * You may use the crawl(), and iterator() methods the following way:
+     * </p>
+     * 
+     * <pre><tt>
+     *   SimpleCocoonCrawlerImpl scci = ....;
+     *   scci.crawl( &quot;http://foo/bar&quot; );
+     *   Iterator i = scci.iterator();
+     *   while (i.hasNext()) {
+     *     URL url = (URL)i.next();
+     *     ...
+     *   }
+     * </tt></pre>
+     * 
+     * <p>
+     * The i.next() method returns a URL, and calculates the links of the URL
+     * before return it.
+     * </p>
+     * 
+     * @param url
+     *            Crawl this URL, getting all links from this URL.
+     * @param maxDepth
+     *            maximum depth to crawl to. -1 for no maximum.
+     */
+    public void crawl(URL url, int maxDepth) {
+        crawled = new HashSet();
+        urlsToProcess = new HashSet();
+        urlsNextDepth = new HashSet();
+        depth = maxDepth;
+
+        if (getLogger().isDebugEnabled()) {
+            getLogger().debug("crawl URL " + url + " to depth " + maxDepth);
+        }
+        urlsToProcess.add(url);
+    }
+
+    /**
+     * Return iterator, iterating over all links of the currently crawled URL.
+     * <p>
+     * The Iterator object will return URL objects at its <code>next()</code>
+     * method.
+     * </p>
+     * 
+     * @return Iterator iterator of all links from the crawl URL.
+     * @since
+     */
+    public Iterator iterator() {
+        return new CocoonCrawlerIterator(this);
+    }
+
+    /**
+     * Default exclude patterns.
+     * <p>
+     * By default URLs matching following patterns are excluded:
+     * </p>
+     * <ul>
+     * <li>.*\\.gif(\\?.*)?$ - exclude gif images</li>
+     * <li>.*\\.png(\\?.*)?$ - exclude png images</li>
+     * <li>.*\\.jpe?g(\\?.*)?$ - exclude jpeg images</li>
+     * <li>.*\\.js(\\?.*)?$ - exclude javascript </li>
+     * <li>.*\\.css(\\?.*)?$ - exclude cascaded stylesheets</li>
+     * </ul>
+     * 
+     * @since
+     */
+    private void setDefaultExcludeFromCrawling() {
+        String[] EXCLUDE_FROM_CRAWLING_DEFAULT = { ".*\\.gif(\\?.*)?$", ".*\\.png(\\?.*)?$",
".*\\.jpe?g(\\?.*)?$",
+                ".*\\.js(\\?.*)?$", ".*\\.css(\\?.*)?$" };
+
+        for (int i = 0; i < EXCLUDE_FROM_CRAWLING_DEFAULT.length; i++) {
+            String pattern = EXCLUDE_FROM_CRAWLING_DEFAULT[i];
+            try {
+                excludeCrawlingURL.add(new RE(pattern));
+            } catch (RESyntaxException rese) {
+                getLogger().error("Cannot create excluding regular-expression for " + pattern,
rese);
+            }
+        }
+    }
+
+    /**
+     * Compute list of links from the url.
+     * <p>
+     * Check for include, exclude pattern, content-type, and if url has been
+     * craweled already.
+     * </p>
+     * 
+     * @param url
+     *            Crawl this URL
+     * @return List of URLs, which are links from url, asserting the conditions.
+     * @since
+     */
+    private List getLinks(URL url) {
+        ArrayList url_links = null;
+        String sURL = url.toString();
+
+        if (!isIncludedURL(sURL) || isExcludedURL(sURL)) {
+            return null;
+        }
+
+        // don't try to get links for url which has been crawled already
+        if (crawled.contains(sURL)) {
+            return null;
+        }
+
+        // mark it as crawled
+        crawled.add(sURL);
+
+        // get links of url
+        if (getLogger().isDebugEnabled()) {
+            getLogger().debug("Getting links of URL " + sURL);
+        }
+        BufferedReader br = null;
+        try {
+            sURL = url.getFile();
+            URL links = new URL(url, sURL + ((sURL.indexOf("?") == -1) ? "?" : "&") +
linkViewQuery);
+            URLConnection links_url_connection = links.openConnection();
+            links_url_connection.setRequestProperty("Accept", accept);
+            links_url_connection.setRequestProperty("User-Agent", userAgent);
+            links_url_connection.connect();
+            InputStream is = links_url_connection.getInputStream();
+            br = new BufferedReader(new InputStreamReader(is));
+
+            String contentType = links_url_connection.getContentType();
+            if (contentType == null) {
+                if (getLogger().isDebugEnabled()) {
+                    getLogger().debug("Ignoring " + sURL + " (no content type)");
+                }
+                // there is a check on null in the calling method
+                return null;
+            }
+
+            int index = contentType.indexOf(';');
+            if (index != -1) {
+                contentType = contentType.substring(0, index);
+            }
+
+            if (getLogger().isDebugEnabled()) {
+                getLogger().debug("Content-type: " + contentType);
+            }
+
+            if (contentType.equals(linkContentType)) {
+                url_links = new ArrayList();
+
+                // content is supposed to be a list of links,
+                // relative to current URL
+                String line;
+                while ((line = br.readLine()) != null) {
+                    final URL newUrl = new URL(url, line);
+                    final String sNewUrl = newUrl.toString();
+
+                    boolean add_url = true;
+                    // don't add new_url twice
+                    if (add_url) {
+                        add_url &= !url_links.contains(sNewUrl);
+                    }
+
+                    // don't add new_url if it has been crawled already
+                    if (add_url) {
+                        add_url &= !crawled.contains(sNewUrl);
+                    }
+
+                    // don't add if is not matched by existing include
+                    // definition
+                    if (add_url) {
+                        add_url &= isIncludedURL(sNewUrl);
+                    }
+
+                    // don't add if is matched by existing exclude definition
+                    if (add_url) {
+                        add_url &= !isExcludedURL(sNewUrl);
+                    }
+                    if (add_url) {
+                        if (getLogger().isDebugEnabled()) {
+                            getLogger().debug("Add URL: " + sNewUrl);
+                        }
+                        url_links.add(newUrl);
+                    }
+                }
+                // now we have a list of URL which should be examined
+            }
+        } catch (IOException ioe) {
+            getLogger().warn("Problems get links of " + url, ioe);
+        } finally {
+            if (br != null) {
+                try {
+                    br.close();
+                    br = null;
+                } catch (IOException ignored) {
+                }
+            }
+        }
+        return url_links;
+    }
+
+    /**
+     * check if URL is a candidate for indexing
+     * 
+     * @param url
+     *            the URL to check
+     * @return The excludedURL value
+     */
+    private boolean isExcludedURL(String url) {
+        // by default do not exclude URL for crawling
+        if (excludeCrawlingURL == null) {
+            return false;
+        }
+
+        final String s = url.toString();
+        Iterator i = excludeCrawlingURL.iterator();
+        while (i.hasNext()) {
+            RE pattern = (RE) i.next();
+            if (pattern.match(s)) {
+                if (getLogger().isDebugEnabled()) {
+                    getLogger().debug("Excluded URL " + url);
+                }
+                return true;
+            }
+        }
+        if (getLogger().isDebugEnabled()) {
+            getLogger().debug("Not excluded URL " + url);
+        }
+        return false;
+    }
+
+    /**
+     * check if URL is a candidate for indexing
+     * 
+     * @param url
+     *            Description of Parameter
+     * @return The includedURL value
+     */
+    private boolean isIncludedURL(String url) {
+        // by default include URL for crawling
+        if (includeCrawlingURL == null) {
+            return true;
+        }
+
+        final String s = url.toString();
+        Iterator i = includeCrawlingURL.iterator();
+        while (i.hasNext()) {
+            RE pattern = (RE) i.next();
+            if (pattern.match(s)) {
+                if (getLogger().isDebugEnabled()) {
+                    getLogger().debug("Included URL " + url);
+                }
+                return true;
+            }
+        }
+        if (getLogger().isDebugEnabled()) {
+            getLogger().debug("Not included URL " + url);
+        }
+        return false;
+    }
+
+    /**
+     * Helper class implementing an Iterator
+     * <p>
+     * This Iterator implementation calculates the links of an URL before
+     * returning in the next() method.
+     * </p>
+     * 
+     * @version $Id: SimpleCocoonCrawlerImpl.java 448473 2006-09-21 06:22:51Z
+     *          crossley $
+     */
+    public static class CocoonCrawlerIterator implements Iterator {
+        private LuceneCocoonCrawler cocoonCrawler;
+
+        /**
+         * Constructor for the CocoonCrawlerIterator object
+         * 
+         * @param cocoonCrawler
+         *            the containing CocoonCrawler instance.
+         */
+        CocoonCrawlerIterator(LuceneCocoonCrawler cocoonCrawler) {
+            this.cocoonCrawler = cocoonCrawler;
+        }
+
+        /**
+         * check if crawling is finished.
+         * 
+         * @return <code>true</code> if crawling has finished, else
+         *         <code>false</code>.
+         */
+        public boolean hasNext() {
+            return cocoonCrawler.urlsToProcess.size() > 0 || cocoonCrawler.urlsNextDepth.size()
> 0;
+        }
+
+        /**
+         * @return the next URL
+         */
+        public Object next() {
+            if (cocoonCrawler.urlsToProcess.size() == 0 && cocoonCrawler.urlsNextDepth.size()
> 0) {
+                // process queued urls belonging to the next depth level
+                cocoonCrawler.urlsToProcess = cocoonCrawler.urlsNextDepth;
+                cocoonCrawler.urlsNextDepth = new HashSet();
+                // fix Bugzilla Bug 25270
+                // only decrease if depth > 0, excluding decreasing
+                // if depth is already equal to -1
+                if (cocoonCrawler.depth > 0) {
+                    cocoonCrawler.depth--;
+                }
+            }
+            URL theNextUrl = null;
+            // fix Bugzilla Bug 25270
+            // return NextUrl != null only if getLinks() returns non-null
+            // list
+            for (Iterator i = cocoonCrawler.urlsToProcess.iterator(); i.hasNext() &&
theNextUrl == null;) {
+                // fetch a URL
+                URL url = (URL) i.next();
+
+                // remove it from the to-do list
+                i.remove();
+
+                if (cocoonCrawler.depth == -1 || cocoonCrawler.depth > 0) {
+                    // calc all links from this url
+                    List url_links = cocoonCrawler.getLinks(url);
+                    if (url_links != null) {
+                        // add links of this url to the to-do list
+                        cocoonCrawler.urlsNextDepth.addAll(url_links);
+                        theNextUrl = url;
+                    }
+                }
+            }
+            // finally return url
+            return theNextUrl;
+        }
+
+        /**
+         * remove is not implemented
+         */
+        public void remove() {
+            throw new UnsupportedOperationException("remove is not implemented");
+        }
+    }
+
+    /**
+     * @return the includes
+     */
+    public List getIncludes() {
+        return includes;
+    }
+
+    /**
+     * @param includes
+     *            the includes to set
+     */
+    public void setIncludes(List includes) {
+        this.includes = includes;
+    }
+
+    /**
+     * @return the excludes
+     */
+    public List getExcludes() {
+        return excludes;
+    }
+
+    /**
+     * @param excludes
+     *            the excludes to set
+     */
+    public void setExcludes(List excludes) {
+        this.excludes = excludes;
+    }
+
+    /**
+     * @return the linkContentType
+     */
+    public String getLinkContentType() {
+        return linkContentType;
+    }
+
+    /**
+     * @param linkContentType
+     *            the linkContentType to set
+     */
+    public void setLinkContentType(String linkContentType) {
+        if (linkContentType != null && linkContentType.length() > 0) {
+            this.linkContentType = linkContentType.trim();
+        }
+    }
+
+    /**
+     * @return the linkViewQuery
+     */
+    public String getLinkViewQuery() {
+        return linkViewQuery;
+    }
+
+    /**
+     * @param linkViewQuery
+     *            the linkViewQuery to set
+     */
+    public void setLinkViewQuery(String linkViewQuery) {
+        if (linkViewQuery != null && linkViewQuery.length() > 0) {
+            this.linkViewQuery = linkViewQuery;
+        }
+    }
+
+    /**
+     * @return the userAgent
+     */
+    public String getUserAgent() {
+        return userAgent;
+    }
+
+    /**
+     * @param userAgent
+     *            the userAgent to set
+     */
+    public void setUserAgent(String userAgent) {
+        if (userAgent != null && userAgent.length() > 0) {
+            this.userAgent = userAgent;
+        }
+    }
+
+    /**
+     * @return the accept
+     */
+    public String getAccept() {
+        return accept;
+    }
+
+    /**
+     * @param accept
+     *            the accept to set
+     */
+    public void setAccept(String accept) {
+        if (accept != null && accept.length() > 0) {
+            this.accept = accept;
+        }
+    }
+}

Propchange: cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/java/org/apache/cocoon/samples/LuceneCocoonCrawler.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/java/org/apache/cocoon/samples/LuceneCocoonCrawler.java
------------------------------------------------------------------------------
    svn:keywords = Id

Modified: cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/java/org/apache/cocoon/samples/LuceneUtil.java
URL: http://svn.apache.org/viewvc/cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/java/org/apache/cocoon/samples/LuceneUtil.java?view=diff&rev=554234&r1=554233&r2=554234
==============================================================================
--- cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/java/org/apache/cocoon/samples/LuceneUtil.java
(original)
+++ cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/java/org/apache/cocoon/samples/LuceneUtil.java
Sat Jul  7 11:08:56 2007
@@ -21,60 +21,74 @@
 import java.net.MalformedURLException;
 import java.net.URL;
 
-import org.apache.avalon.framework.service.ServiceException;
-import org.apache.avalon.framework.service.ServiceManager;
-import org.apache.avalon.framework.service.Serviceable;
 import org.apache.cocoon.ProcessingException;
 import org.apache.cocoon.components.search.LuceneCocoonHelper;
 import org.apache.cocoon.components.search.LuceneCocoonIndexer;
 import org.apache.cocoon.configuration.Settings;
+import org.apache.cocoon.spring.configurator.WebAppContextUtils;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.store.Directory;
 
 /**
- * This is a sample helper class that can be used from flow to 
- * create an index.
+ * This is a sample helper class that can be used from flow to create an index.
+ * 
  * @version $Id$
  */
-public class LuceneUtil 
-    implements Serviceable {
+public class LuceneUtil {
 
-    private File workDir;
-    private ServiceManager manager;
+    private LuceneCocoonIndexer luceneCocoonIndexer;
 
-    /**
-     * @see org.apache.avalon.framework.service.Serviceable#service(org.apache.avalon.framework.service.ServiceManager)
-     */
-    public void service(ServiceManager manager) throws ServiceException {
-        this.manager = manager;
-        final Settings settings = (Settings)this.manager.lookup(Settings.ROLE);
-        this.workDir = new File(settings.getWorkDirectory());
-        this.manager.release(settings);
-    }
+    public void createIndex(String baseURL, boolean create) throws ProcessingException {
+        Analyzer analyzer = LuceneCocoonHelper.getAnalyzer("org.apache.lucene.analysis.standard.StandardAnalyzer");
 
-    public void createIndex(String baseURL, boolean create)
-    throws ProcessingException {
-        LuceneCocoonIndexer lcii = null;
-        Analyzer analyzer = LuceneCocoonHelper.getAnalyzer( "org.apache.lucene.analysis.standard.StandardAnalyzer"
);
-        
         try {
-        
-            lcii = (LuceneCocoonIndexer)this.manager.lookup( LuceneCocoonIndexer.ROLE );
-            Directory directory = LuceneCocoonHelper.getDirectory( new File( workDir, "index"
), create );
-            lcii.setAnalyzer( analyzer );
-            URL base_url = new URL( baseURL );
-            lcii.index( directory, create, base_url );
+            final Settings settings = (Settings) WebAppContextUtils.getCurrentWebApplicationContext().getBean(
+                    "org.apache.cocoon.configuration.Settings");
+            Directory directory = LuceneCocoonHelper.getDirectory(new File(new File(settings.getWorkDirectory()),
+                    "index"), create);
+            getLuceneCocoonIndexer().setAnalyzer(analyzer);
+            URL base_url = new URL(baseURL);
+            getLuceneCocoonIndexer().index(directory, create, base_url);
         } catch (MalformedURLException mue) {
-            throw new ProcessingException( "MalformedURLException in createIndex()!", mue
);
+            throw new ProcessingException("MalformedURLException in createIndex()!", mue);
         } catch (IOException ioe) {
             // ignore ??
-            throw new ProcessingException( "IOException in createIndex()!", ioe );
-        } catch (ServiceException ce) {
+            throw new ProcessingException("IOException in createIndex()!", ioe);
+        }
+    }
+
+    public void createIndex2(String baseURL, boolean create) throws ProcessingException {
+        Analyzer analyzer = LuceneCocoonHelper.getAnalyzer("org.apache.lucene.analysis.standard.StandardAnalyzer");
+
+        try {
+            final Settings settings = (Settings) WebAppContextUtils.getCurrentWebApplicationContext().getBean(
+                    "org.apache.cocoon.configuration.Settings");
+            Directory directory = LuceneCocoonHelper.getDirectory(new File(new File(settings.getWorkDirectory()),
+                    "index2"), create);
+            getLuceneCocoonIndexer().setAnalyzer(analyzer);
+            URL base_url = new URL(baseURL);
+            getLuceneCocoonIndexer().index(directory, create, base_url);
+        } catch (MalformedURLException mue) {
+            throw new ProcessingException("MalformedURLException in createIndex2()!", mue);
+        } catch (IOException ioe) {
             // ignore ??
-            throw new ProcessingException( "ServiceException in createIndex()!", ce );
-        } finally {
-            this.manager.release( lcii );
+            throw new ProcessingException("IOException in createIndex2()!", ioe);
         }
     }
-    
+
+    /**
+     * @return the luceneCocoonIndexer
+     */
+    public LuceneCocoonIndexer getLuceneCocoonIndexer() {
+        return luceneCocoonIndexer;
+    }
+
+    /**
+     * @param luceneCocoonIndexer
+     *            the luceneCocoonIndexer to set
+     */
+    public void setLuceneCocoonIndexer(LuceneCocoonIndexer luceneCocoonIndexer) {
+        this.luceneCocoonIndexer = luceneCocoonIndexer;
+    }
+
 }

Added: cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/resources/META-INF/cocoon/spring/cocoon-lucene.xml
URL: http://svn.apache.org/viewvc/cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/resources/META-INF/cocoon/spring/cocoon-lucene.xml?view=auto&rev=554234
==============================================================================
--- cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/resources/META-INF/cocoon/spring/cocoon-lucene.xml
(added)
+++ cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/resources/META-INF/cocoon/spring/cocoon-lucene.xml
Sat Jul  7 11:08:56 2007
@@ -0,0 +1,109 @@
+<?xml version="1.0"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+  
+  http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<!--+
+  | This is the configuration for the standard application manager.
+  |
+  | @version $Id$
+  +-->
+<beans xmlns="http://www.springframework.org/schema/beans" 
+       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
+       xmlns:util="http://www.springframework.org/schema/util"
+       xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans-2.0.xsd
+                           http://www.springframework.org/schema/util http://www.springframework.org/schema/util/spring-util-2.0.xsd">
+
+  <bean name="org.apache.cocoon.generation.Generator/lucene-search" 
+        class="org.apache.cocoon.generation.SearchGenerator"
+        scope="prototype">
+    <!-- The component to use for searching. -->
+    <property name="luceneCocoonSearcher" ref="org.apache.cocoon.components.search.LuceneCocoonSearcher"
/>
+  </bean>
+
+  <bean name="org.apache.cocoon.components.search.LuceneCocoonIndexer"
+        class="org.apache.cocoon.components.search.SimpleLuceneCocoonIndexerImpl">
+    <!-- XML Indexer implementation. -->
+    <property name="luceneXMLIndexer" ref="org.apache.cocoon.components.search.LuceneXMLIndexer"
/>
+    <!-- CocoonCrawler implementation. -->
+    <property name="cocoonCrawler" ref="org.apache.cocoon.components.crawler.CocoonCrawler"
/>
+    <!-- Configuration element of lucene's Analyzer class. Its default class is
+         'org.apache.lucene.analysis.standard.StandardAnalyzer' -->
+    <!-- property name="analyzer" ref="org.apache.lucene.analysis.standard.StandardAnalyzer"
/-->
+    <!-- Configuration element of lucene's merge factor. its default value is '10'. -->
+    <property name="mergeFactor" value="10" />
+  </bean>
+
+  <bean name="org.apache.cocoon.components.search.LuceneCocoonSearcher"
+        class="org.apache.cocoon.components.search.SimpleLuceneCocoonSearcherImpl">
+    <!-- Configuration element of lucene's Analyzer class.Its default class is
+         'org.apache.lucene.analysis.standard.StandardAnalyzer' -->
+    <!-- property name="analyzer" ref="org.apache.lucene.analysis.standard.StandardAnalyzer"
/-->
+    <!-- Configuration element default value of lucene's default search field. Its value
is 'body'. -->
+    <property name="defaultSearchFieldDefault" value="body" />
+    <!-- Configuration element default value of default-query. It's default value is 'null'.
-->
+    <!-- property name="defaultQueryDefault" value="" / -->
+    <!-- Configuration element name of lucene's default filesystem default directory.
Its 
+         default value is 'null'. -->
+    <property name="defaultDirectory" value="target" />
+    <!-- Configuration element name of query parser class name. Configuration element
default 
+         value of queryparser-classname.
+         Its value is 'org.apache.lucene.queryParser.QueryParser'. -->
+    <property name="defaultQueryparser" value="org.apache.lucene.queryParser.QueryParser"
/>
+  </bean>
+
+  <bean name="org.apache.cocoon.components.search.LuceneXMLIndexer"
+        class="org.apache.cocoon.components.search.SimpleLuceneXMLIndexerImpl">
+    <!-- SAXParser implementation. -->
+    <property name="parser" ref="org.apache.cocoon.core.xml.SAXParser" />
+    <!-- Optional config element name specifying query-string appendend for requesting
links of an URL. -->
+    <property name="contentViewQuery" value="cocoon-view=content" />
+    <!-- Optional config element name specifying the tags to be added as Stored, Untokenised,
Unindexed Fields. -->
+    <!-- property name="storeFields">
+      <set>
+      <value>title</value>
+      <value>summary</value>
+      </set>
+      </property-->
+  </bean>
+
+  <bean name="org.apache.cocoon.components.crawler.CocoonCrawler"
+        class="org.apache.cocoon.samples.LuceneCocoonCrawler">
+    <!-- Optional list of properties -->
+    <!--
+      <property name="include">
+      <list>
+      <value></value>
+      </list>
+      </property>
+      <property name="exclude">
+      <list>
+      <value></value>
+      </list>
+      </property>
+      <property name="linkContentType" value="" />
+      <property name="linkViewQuery" value="" />
+      <property name"userAgent" value="" />
+      <property name="accept" value="" />
+    -->
+  </bean>
+
+  <!-- Sample util -->
+  <bean name="org.apache.cocoon.samples.LuceneUtil"
+        class="org.apache.cocoon.samples.LuceneUtil">
+    <property name="luceneCocoonIndexer" ref="org.apache.cocoon.components.search.LuceneCocoonIndexer"
/>
+  </bean>
+
+</beans>
\ No newline at end of file

Propchange: cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/resources/META-INF/cocoon/spring/cocoon-lucene.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/resources/META-INF/cocoon/spring/cocoon-lucene.xml
------------------------------------------------------------------------------
    svn:keywords = Id



Mime
View raw message