incubator-droids-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From t...@apache.org
Subject svn commit: r1440010 - in /incubator/droids/branches/0.2.x-cleanup/droids-crawler: ./ src/main/java/org/apache/droids/crawler/ src/main/java/org/apache/droids/fetcher/ src/main/java/org/apache/droids/norobots/ src/main/java/org/apache/droids/protocol/h...
Date Tue, 29 Jan 2013 17:27:44 GMT
Author: tobr
Date: Tue Jan 29 17:27:43 2013
New Revision: 1440010

URL: http://svn.apache.org/viewvc?rev=1440010&view=rev
Log:
added simple crawler module
simplified the usage of HttpClient
added back the test http server from examples

Added:
    incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/SimpleCrawlingDroid.java   (with props)
    incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/fetcher/
    incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/fetcher/CrawlingFetcher.java   (with props)
    incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/fetcher/DroidsHttpClient.java   (with props)
    incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/norobots/
    incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/norobots/HttpClientContentLoader.java   (with props)
    incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/java/org/apache/droids/crawler/
    incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/java/org/apache/droids/crawler/CrawlingDroidTest.java   (with props)
    incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/java/org/apache/droids/crawler/localserver/
    incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/java/org/apache/droids/crawler/localserver/LocalHttpServer.java   (with props)
    incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/java/org/apache/droids/crawler/localserver/ResourceHandler.java   (with props)
    incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/resources/
    incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/resources/data/
    incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/resources/data/page1_html
    incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/resources/data/page2_html
    incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/resources/data/page3_html
    incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/resources/data/page4_html
    incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/resources/data/start_html
    incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/resources/logback.xml   (with props)
Removed:
    incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/Link.java
    incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/ReportCrawlingDroid.java
    incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/protocol/http/
    incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/java/org/apache/droids/AppTest.java
Modified:
    incubator/droids/branches/0.2.x-cleanup/droids-crawler/pom.xml
    incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlingDroid.java
    incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlingWorker.java
    incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/LinkTask.java

Modified: incubator/droids/branches/0.2.x-cleanup/droids-crawler/pom.xml
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-crawler/pom.xml?rev=1440010&r1=1440009&r2=1440010&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-crawler/pom.xml (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-crawler/pom.xml Tue Jan 29 17:27:43 2013
@@ -9,7 +9,7 @@
         <version>0.3.0-incubating-SNAPSHOT</version>
     </parent>
     <artifactId>droids-crawler</artifactId>
-    <name>droids-crawler</name>
+    <name>APACHE DROIDS CRAWLER</name>
     <dependencies>
         <dependency>
             <groupId>org.apache.droids</groupId>
@@ -17,10 +17,26 @@
             <version>${project.version}</version>
         </dependency>
         <dependency>
+            <groupId>org.apache.droids</groupId>
+            <artifactId>droids-norobots</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.httpcomponents</groupId>
+            <artifactId>httpclient</artifactId>
+            <version>4.2.2</version>
+        </dependency>
+        <dependency>
             <groupId>junit</groupId>
             <artifactId>junit</artifactId>
             <version>${junit.version}</version>
             <scope>test</scope>
         </dependency>
+        <dependency>
+            <groupId>ch.qos.logback</groupId>
+            <artifactId>logback-classic</artifactId>
+            <version>${logback.version}</version>
+            <scope>test</scope>
+        </dependency>
     </dependencies>
 </project>

Modified: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlingDroid.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlingDroid.java?rev=1440010&r1=1440009&r2=1440010&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlingDroid.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlingDroid.java Tue Jan 29 17:27:43 2013
@@ -16,57 +16,36 @@
  */
 package org.apache.droids.crawler;
 
-import java.net.URI;
-import java.net.URISyntaxException;
 import java.util.Collection;
 
-import com.google.common.base.Preconditions;
-
 import java.util.Queue;
 
 import org.apache.droids.core.AbstractDroid;
 import org.apache.droids.core.TaskMaster;
 import org.apache.droids.core.Worker;
-import org.apache.droids.exception.InvalidTaskException;
-
-public abstract class CrawlingDroid extends AbstractDroid<Link> {
+import org.apache.droids.fetcher.CrawlingFetcher;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public abstract class CrawlingDroid extends AbstractDroid<LinkTask> {
+    protected Collection<String> initialLocations;
+    protected final Logger logger = LoggerFactory.getLogger(CrawlingDroid.class);
 
-    private Collection<String> initialLocations;
+    public CrawlingDroid() {
+        this(null, null);
+    }
 
-    public CrawlingDroid(Queue<Link> queue, TaskMaster<Link> taskMaster) {
+    public CrawlingDroid(Queue<LinkTask> queue, TaskMaster<LinkTask> taskMaster) {
         super(queue, taskMaster);
+        this.setFetcher(new CrawlingFetcher());
     }
 
     public void setInitialLocations(Collection<String> initialLocations) {
         this.initialLocations = initialLocations;
     }
 
-    @Override
-    public void init() throws InvalidTaskException {
-        Preconditions.checkState(initialLocations != null,
-                "WebCrawlerDroid requires at least one starting file");
-        Preconditions.checkState(!initialLocations.isEmpty(),
-                "WebCrawlerDroid requires at least one starting file");
-        for (String location : initialLocations) {
-            URI uri;
-            try {
-                uri = new URI(location);
-            } catch (URISyntaxException ex) {
-                throw new InvalidTaskException("Invalid lication: " + location);
-            }
-            queue.offer(new LinkTask(null, uri, 0));
-        }
-    }
-
-    public void start() {
-        taskMaster.start(queue, this);
-    }
-
-    @Override
-    public void finished() {
-        logger.info("FINISHED!!!");
+    public Worker<LinkTask> getNewWorker() {
+        return new CrawlingWorker(this);
     }
 
-    public abstract Worker<Link> getNewWorker();
-
 }

Modified: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlingWorker.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlingWorker.java?rev=1440010&r1=1440009&r2=1440010&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlingWorker.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlingWorker.java Tue Jan 29 17:27:43 2013
@@ -16,123 +16,43 @@
  */
 package org.apache.droids.crawler;
 
-import java.io.IOException;
-import java.net.URI;
-import java.util.Collection;
-import java.util.LinkedHashMap;
-import java.util.Map;
-
 import org.apache.droids.core.DroidsException;
-import org.apache.droids.core.Protocol;
-import org.apache.droids.core.Task;
 import org.apache.droids.core.Worker;
-import org.apache.droids.helper.factories.HandlerFactory;
-import org.apache.droids.helper.factories.URLFiltersFactory;
-import org.apache.droids.parse.Parse;
-import org.apache.droids.parse.Parser;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-public class CrawlingWorker implements Worker<Link> {
+import java.io.IOException;
+import java.util.Set;
+
+public class CrawlingWorker implements Worker<LinkTask> {
 
-    private static final Logger LOG = LoggerFactory.getLogger(CrawlingWorker.class);
+    private static final Logger logger = LoggerFactory.getLogger(CrawlingWorker.class);
 
     private final CrawlingDroid droid;
-    HandlerFactory handlerFactory;
 
     public CrawlingWorker(CrawlingDroid droid) {
         this.droid = droid;
     }
 
     @Override
-    public void execute(Link link) throws DroidsException, IOException {
-        final String userAgent = this.getClass().getCanonicalName();
-        if (LOG.isDebugEnabled()) {
-            LOG.debug("Starting " + userAgent);
-        }
-        URI uri = link.getURI();
-        final Protocol protocol = droid.getProtocolFactory().getProtocol(uri);
-        if (protocol == null) {
-            if (LOG.isWarnEnabled()) {
-                LOG.warn("Unsupported protocol scheme '" + uri.getScheme() + "'");
-            }
-            return;
+    public void execute(LinkTask task) throws DroidsException, IOException {
+        if (logger.isInfoEnabled()) {
+            logger.info("Loading " + task.getURI());
         }
+        droid.load(task);
+        droid.parse(task);
 
-        if (protocol.isAllowed(uri)) {
-            if (LOG.isInfoEnabled()) {
-                LOG.info("Loading " + uri);
-            }
-//			ContentEntity entity = null;
-            try {
-//				entity = protocol.load(uri);
-            } catch (OutOfMemoryError e) {
-                LOG.error("Out of memory processing: " + uri + " skipping", e);
-                throw new DroidsException(e);
-            }
-            try {
-//				String contentType = entity.getMimeType();
-                String contentType = "";
-                if (LOG.isDebugEnabled()) {
-                    LOG.debug("Content type " + contentType);
-                }
-                if (contentType == null) {
-                    LOG.info("Missing content type... can't parse...");
-                } else {
-                    Parser parser = droid.getParserFactory().getParser(contentType);
-                    if (parser == null) {
-                        if (LOG.isDebugEnabled()) {
-                            LOG.debug("Could not find parser for " + contentType);
-                        }
-                    } else {
-//						Parse parse = parser.parse(entity, link);
-                        Parse parse = null;
-                        if (parse.getNewTasks() != null && parse.isFollowed()) {
-                            Collection<Link> outlinks = getFilteredOutlinks(parse);
-                            droid.getQueue().addAll(outlinks);
-                        }
-//						entity.setParse(parse);
-                        handle(link);
-                    }
+        // add this to a link handler
+        Set<LinkTask> links = task.getContentEntity().getLinks();
+        if (links != null) {
+            for (LinkTask outLink : links) {
+                if (droid.filter(outLink) != null) {
+                    droid.add(outLink);
                 }
-            } finally {
-//				entity.finish();
-            }
-        } else {
-            if (LOG.isInfoEnabled()) {
-                LOG.info("Stopping processing since" + " bots are not allowed for " + uri);
             }
         }
+        droid.handle(task);
+        droid.finish(task);
     }
 
-    protected void handle(Task task) throws DroidsException, IOException {
-        getHandlerFactory().handle(task);
-    }
-
-    protected Collection<Link> getFilteredOutlinks(Parse parse) {
-        URLFiltersFactory filters = droid.getFiltersFactory();
-
-        // TODO -- make the hashvalue for Outlink...
-        Map<URI, Link> filtered = new LinkedHashMap<URI, Link>();
-        for (Task outTask : parse.getNewTasks()) {
-            // only use Links, so if for some reason it isn't a Link, skip
-            if (!(outTask instanceof Link)) {
-                continue;
-            }
-            Link outlink = (Link) outTask;
-            URI uri = outlink.getURI();
-            if (filters.accept(outlink) && !filtered.containsKey(uri)) {
-                filtered.put(uri, outlink);
-            }
-        }
-        return filtered.values();
-    }
-
-    public HandlerFactory getHandlerFactory() {
-        return handlerFactory;
-    }
-
-    public void setHandlerFactory(HandlerFactory handlerFactory) {
-        this.handlerFactory = handlerFactory;
-    }
 }

Modified: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/LinkTask.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/LinkTask.java?rev=1440010&r1=1440009&r2=1440010&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/LinkTask.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/LinkTask.java Tue Jan 29 17:27:43 2013
@@ -16,73 +16,48 @@
  */
 package org.apache.droids.crawler;
 
+import org.apache.droids.core.ContentEntity;
+import org.apache.droids.core.Task;
+
 import java.net.URI;
-import java.util.Collection;
 import java.util.Date;
-import java.io.IOException;
-import java.io.InputStream;
 import java.io.Serializable;
 
 
 /**
- * Basic implementation for @Link. LinkTasks are working instructions for URI
+ * Basic implementation for Crawler @Task. LinkTasks are working instructions for URI
  * based droids.
  */
-public class LinkTask implements Link, Serializable {
+public class LinkTask implements Task, Serializable {
     private static final long serialVersionUID = -44808094386453088L;
 
+    private ContentEntity contentEntity;
     private Date started;
     private final int depth;
     private final URI uri;
-    private final Link from;
 
-    private Date lastModifiedDate;
-    private Collection<URI> linksTo;
-    private String anchorText;
-    private int weight;
     private boolean aborted = false;
 
     /**
      * Creates a new LinkTask.
      *
-     * @param from
-     * @param uri
-     * @param depth
+     * @param uri The URI of the task.
      */
-    public LinkTask(Link from, URI uri, int depth) {
-        this.from = from;
-        this.uri = uri;
-        this.depth = depth;
-        this.started = new Date();
+    public LinkTask(URI uri) {
+        this(uri, 0);
     }
 
     /**
      * Creates a new LinkTask.
      *
-     * @param from
-     * @param uri
-     * @param depth
-     * @param weight
+     * @param uri The URI of the task.
+     * @param depth The depth of the task.
      */
-    public LinkTask(Link from, URI uri, int depth, int weight) {
-        this.from = from;
+    public LinkTask(URI uri, int depth) {
         this.uri = uri;
         this.depth = depth;
         this.started = new Date();
-        this.weight = weight;
-    }
-
-    /**
-     * Creates a new LinkTask
-     *
-     * @param from
-     * @param uri
-     * @param depth
-     * @param anchorText
-     */
-    public LinkTask(Link from, URI uri, int depth, String anchorText) {
-        this(from, uri, depth);
-        this.anchorText = anchorText;
+        this.contentEntity = new ContentEntity();
     }
 
     @Override
@@ -91,9 +66,8 @@ public class LinkTask implements Link, S
     }
 
     @Override
-    public InputStream getContent() throws IOException {
-        // TODO Auto-generated method stub
-        return null;
+    public ContentEntity getContentEntity() {
+        return this.contentEntity;
     }
 
     @Override
@@ -101,93 +75,28 @@ public class LinkTask implements Link, S
         return started;
     }
 
-    /**
-     * Set the Date the task started.
-     *
-     * @param started
-     */
-    public void setTaskDate(Date started) {
-        this.started = started;
-    }
-
     @Override
     public int getDepth() {
         return depth;
     }
 
     @Override
-    public Link getFrom() {
-        return from;
-    }
-
-    @Override
-    public Collection<URI> getTo() {
-        return linksTo;
-    }
-
-    @Override
-    public Date getLastModifiedDate() {
-        return lastModifiedDate;
-    }
-
-    /**
-     * Set the Date the Task object was last modified.
-     *
-     * @param lastModifiedDate
-     */
-    public void setLastModifiedDate(Date lastModifiedDate) {
-        this.lastModifiedDate = lastModifiedDate;
-    }
-
-    /**
-     * Set Outgoing links.
-     *
-     * @param linksTo
-     */
-    public void setLinksTo(Collection<URI> linksTo) {
-        this.linksTo = linksTo;
+    public void abort() {
+        aborted = true;
     }
 
     @Override
-    public String getAnchorText() {
-        return anchorText;
-    }
-
-    /**
-     * Set the anchor text for this link.
-     *
-     * @param anchorText
-     */
-    public void setAnchorText(String anchorText) {
-        this.anchorText = anchorText;
-    }
-
-    /**
-     * Get the weight of the link
-     *
-     * @return the links weight
-     */
-    public int getWeight() {
-        return weight;
-    }
-
-    /**
-     * Set the weight of the link.
-     *
-     * @param weight
-     */
-    public void setWeight(int weight) {
-        this.weight = weight;
+    public boolean isAborted() {
+        return aborted;
     }
 
     @Override
-    public void abort() {
-        aborted = true;
+    public Task createTask(URI uri) {
+        return new LinkTask(uri, this.getDepth());
     }
 
     @Override
-    public boolean isAborted() {
-        return aborted;
+    public String toString() {
+        return "(" + getURI().toString() + "," + getDepth() + ")";
     }
-
 }
\ No newline at end of file

Added: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/SimpleCrawlingDroid.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/SimpleCrawlingDroid.java?rev=1440010&view=auto
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/SimpleCrawlingDroid.java (added)
+++ incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/SimpleCrawlingDroid.java Tue Jan 29 17:27:43 2013
@@ -0,0 +1,54 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*/
+package org.apache.droids.crawler;
+
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.Queue;
+
+import com.google.common.base.Preconditions;
+import org.apache.droids.core.TaskMaster;
+import org.apache.droids.handle.ReportHandler;
+
+
+/**
+ * This simple CrawlingDroid uses the ReportHandler to handle all retrieved files.
+ */
+public class SimpleCrawlingDroid extends CrawlingDroid {
+
+    public SimpleCrawlingDroid(Queue<LinkTask> queue, TaskMaster<LinkTask> taskMaster) {
+        super(queue, taskMaster);
+    }
+
+    @Override
+    public void start() {
+        Preconditions.checkState(initialLocations != null || !initialLocations.isEmpty(),
+                "CrawlingDroid requires at least one starting file");
+        for (String location : initialLocations) {
+            try {
+                URI uri = new URI(location);
+                queue.offer(new LinkTask(uri));
+            } catch (URISyntaxException ex) {
+                logger.error(ex.getMessage());
+            }
+        }
+        super.start();
+    }
+
+}

Propchange: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/SimpleCrawlingDroid.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/SimpleCrawlingDroid.java
------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Propchange: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/SimpleCrawlingDroid.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/fetcher/CrawlingFetcher.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/fetcher/CrawlingFetcher.java?rev=1440010&view=auto
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/fetcher/CrawlingFetcher.java (added)
+++ incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/fetcher/CrawlingFetcher.java Tue Jan 29 17:27:43 2013
@@ -0,0 +1,134 @@
+package org.apache.droids.fetcher;
+
+import org.apache.droids.core.Fetcher;
+import org.apache.droids.crawler.LinkTask;
+import org.apache.droids.norobots.ContentLoader;
+import org.apache.droids.norobots.HttpClientContentLoader;
+import org.apache.droids.norobots.NoRobotClient;
+import org.apache.droids.norobots.NoRobotException;
+import org.apache.http.*;
+import org.apache.http.client.HttpClient;
+import org.apache.http.client.HttpResponseException;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.impl.client.SystemDefaultHttpClient;
+import org.apache.http.params.CoreConnectionPNames;
+import org.apache.http.params.CoreProtocolPNames;
+import org.apache.http.params.HttpParams;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URI;
+import java.net.URISyntaxException;
+
+/**
+ *
+ *
+ *
+ */
+public class CrawlingFetcher implements Fetcher<LinkTask> {
+    private boolean forceAllow;
+    private String userAgent;
+    private final HttpClient httpClient;
+    private final ContentLoader contentLoader;
+    private final static String DROIDS_USER_AGENT = "Apache-Droids/0.3 (java 1.5)";
+    private final static Logger logger = LoggerFactory.getLogger(CrawlingFetcher.class);
+
+    public CrawlingFetcher() {
+        this(DROIDS_USER_AGENT);
+    }
+
+    public CrawlingFetcher(String userAgent) {
+        this.httpClient = new SystemDefaultHttpClient();
+        this.contentLoader = new HttpClientContentLoader(httpClient);
+        this.userAgent = userAgent;
+    }
+
+    @Override
+    public boolean isAllowed(LinkTask task) throws IOException {
+        if (forceAllow) {
+            return forceAllow;
+        }
+        URI uri = task.getURI();
+        URI baseURI;
+        try {
+            baseURI = new URI(
+                    uri.getScheme(), uri.getUserInfo(), uri.getHost(), uri.getPort(),
+                    "/", null, null);
+        } catch (URISyntaxException ex) {
+            logger.error("Unable to determine base URI for " + uri);
+            return false;
+        }
+
+        NoRobotClient nrc = new NoRobotClient(contentLoader, userAgent);
+        try {
+            nrc.parse(baseURI);
+        } catch (NoRobotException ex) {
+            logger.error("Failure parsing robots.txt: " + ex.getMessage());
+            return false;
+        }
+        boolean test = nrc.isUrlAllowed(uri);
+        if (logger.isInfoEnabled()) {
+            logger.info(uri + " is " + (test ? "allowed" : "denied"));
+        }
+        return test;
+
+    }
+
+    @Override
+    public void fetch(LinkTask task) throws IOException {
+        HttpGet httpget = new HttpGet(task.getURI());
+        HttpResponse response = httpClient.execute(httpget);
+        StatusLine statusline = response.getStatusLine();
+//        if (statusline.getStatusCode() >= HttpStatus.SC_BAD_REQUEST) {
+//            httpget.abort();
+//            throw new HttpResponseException(statusline.getStatusCode(), statusline.getReasonPhrase());
+//        }
+        HttpEntity entity = response.getEntity();
+        if (entity != null) {
+            InputStream instream = entity.getContent();
+            task.getContentEntity().setContent(instream);
+            for (Header header : response.getAllHeaders()) {
+                task.getContentEntity().put(header.getName(), header.getValue());
+            }
+        }
+    }
+
+    public void setDefaultHttpParams(HttpParams params) {
+        httpClient.getParams().setParameter(CoreProtocolPNames.USER_AGENT, userAgent);
+        httpClient.getParams().setParameter(CoreProtocolPNames.USE_EXPECT_CONTINUE, false);
+        httpClient.getParams().setParameter(CoreConnectionPNames.STALE_CONNECTION_CHECK, false);
+        httpClient.getParams().setIntParameter(CoreConnectionPNames.MAX_HEADER_COUNT, 256);
+        httpClient.getParams().setIntParameter(CoreConnectionPNames.MAX_LINE_LENGTH, 5 * 1024);
+        httpClient.getParams().setIntParameter(CoreConnectionPNames.SO_TIMEOUT, 20000);
+        httpClient.getParams().setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, 20000);
+    }
+
+
+    /**
+     * You can force that a site is allowed (ignoring the robots.txt). This should
+     * only be used on server that you control and where you have the permission
+     * to ignore the robots.txt.
+     *
+     * @return <code>true</code> if you are rude and ignore robots.txt.
+     *         <code>false</code> if you are playing nice.
+     */
+    public boolean isForceAllow() {
+        return forceAllow;
+    }
+
+    /**
+     * You can force that a site is allowed (ignoring the robot.txt). This should
+     * only be used on server that you control and where you have the permission
+     * to ignore the robots.txt.
+     *
+     * @param forceAllow if you want to force an allow and ignore the robot.txt set
+     *                   to <code>true</code>. If you want to obey the rules and
+     *                   be polite set to <code>false</code>.
+     */
+    public void setForceAllow(boolean forceAllow) {
+        this.forceAllow = forceAllow;
+    }
+
+}

Propchange: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/fetcher/CrawlingFetcher.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/fetcher/CrawlingFetcher.java
------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Propchange: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/fetcher/CrawlingFetcher.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/fetcher/DroidsHttpClient.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/fetcher/DroidsHttpClient.java?rev=1440010&view=auto
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/fetcher/DroidsHttpClient.java (added)
+++ incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/fetcher/DroidsHttpClient.java Tue Jan 29 17:27:43 2013
@@ -0,0 +1,27 @@
+package org.apache.droids.fetcher;
+
+import org.apache.http.impl.client.SystemDefaultHttpClient;
+import org.apache.http.params.CoreConnectionPNames;
+import org.apache.http.params.CoreProtocolPNames;
+import org.apache.http.params.HttpParams;
+
+import javax.annotation.concurrent.ThreadSafe;
+
+/**
+ *
+ *
+ *
+ */
+@ThreadSafe
+public class DroidsHttpClient extends SystemDefaultHttpClient {
+
+    public DroidsHttpClient() {
+        this(null);
+    }
+
+    public DroidsHttpClient(final HttpParams params) {
+        super(params);
+    }
+
+
+}

Propchange: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/fetcher/DroidsHttpClient.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/fetcher/DroidsHttpClient.java
------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Propchange: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/fetcher/DroidsHttpClient.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/norobots/HttpClientContentLoader.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/norobots/HttpClientContentLoader.java?rev=1440010&view=auto
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/norobots/HttpClientContentLoader.java (added)
+++ incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/norobots/HttpClientContentLoader.java Tue Jan 29 17:27:43 2013
@@ -0,0 +1,74 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*/
+
+package org.apache.droids.norobots;
+
+import org.apache.http.HttpEntity;
+import org.apache.http.HttpResponse;
+import org.apache.http.HttpStatus;
+import org.apache.http.StatusLine;
+import org.apache.http.client.HttpClient;
+import org.apache.http.client.HttpResponseException;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.client.methods.HttpHead;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URI;
+
+
+import org.apache.droids.norobots.ContentLoader;
+import org.slf4j.spi.LocationAwareLogger;
+
+/**
+ * {@link ContentLoader} based on HttpClient 4.0.
+ */
+public class HttpClientContentLoader implements ContentLoader {
+
+    private final HttpClient httpclient;
+
+    public HttpClientContentLoader(HttpClient httpclient) {
+        super();
+        this.httpclient = httpclient;
+    }
+
+    public boolean exists(URI uri) throws IOException {
+        HttpHead httphead = new HttpHead(uri);
+        HttpResponse response = httpclient.execute(httphead);
+        return response.getStatusLine().getStatusCode() == HttpStatus.SC_OK;
+    }
+
+    public InputStream load(URI uri) throws IOException {
+        HttpGet httpget = new HttpGet(uri);
+        HttpResponse response = httpclient.execute(httpget);
+        StatusLine statusline = response.getStatusLine();
+        if (statusline.getStatusCode() == HttpStatus.SC_NOT_FOUND) {
+            return null;
+        } else if (statusline.getStatusCode() > HttpStatus.SC_BAD_REQUEST) {
+            throw new HttpResponseException(statusline.getStatusCode(), statusline.getReasonPhrase());
+        }
+        HttpEntity entity = response.getEntity();
+        if (entity != null) {
+            return entity.getContent();
+        } else {
+            return null;
+        }
+    }
+
+}

Propchange: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/norobots/HttpClientContentLoader.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/norobots/HttpClientContentLoader.java
------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Propchange: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/norobots/HttpClientContentLoader.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/java/org/apache/droids/crawler/CrawlingDroidTest.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/java/org/apache/droids/crawler/CrawlingDroidTest.java?rev=1440010&view=auto
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/java/org/apache/droids/crawler/CrawlingDroidTest.java (added)
+++ incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/java/org/apache/droids/crawler/CrawlingDroidTest.java Tue Jan 29 17:27:43 2013
@@ -0,0 +1,78 @@
+package org.apache.droids.crawler;
+
+import junit.framework.Assert;
+import org.apache.droids.core.Droid;
+import org.apache.droids.core.SimpleTaskQueueWithHistory;
+import org.apache.droids.core.TaskMaster;
+import org.apache.droids.crawler.localserver.LocalHttpServer;
+import org.apache.droids.crawler.localserver.ResourceHandler;
+import org.apache.droids.filter.HostFilter;
+import org.apache.droids.handle.ReportHandler;
+import org.apache.droids.handle.SysoutHandler;
+import org.apache.droids.parse.SimpleLinkParser;
+import org.apache.droids.taskmaster.SequentialTaskMaster;
+import static org.junit.Assert.*;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.util.*;
+
+/**
+ *
+ *
+ *
+ */
+public class CrawlingDroidTest {
+
+    protected LocalHttpServer testserver;
+
+    @Before
+    public void initializeLocalTestServer() {
+        this.testserver = new LocalHttpServer();
+    }
+
+    @After
+    public void shutdownLocalTestServer() throws IOException {
+        this.testserver.stop();
+    }
+
+    @Test
+    public void test() throws IOException {
+        this.testserver.register("*", new ResourceHandler());
+        this.testserver.start();
+
+        String baseURI = "http:/" + this.testserver.getServiceAddress();
+        String targetURI = baseURI + "/start_html";
+
+
+        Queue<LinkTask> queue = new SimpleTaskQueueWithHistory<LinkTask>();
+        TaskMaster<LinkTask> taskMaster = new SequentialTaskMaster<LinkTask>();
+
+        Collection<String> initialLocations = new LinkedList<String>();
+        initialLocations.add(targetURI);
+
+
+        SimpleCrawlingDroid droid = new SimpleCrawlingDroid(queue, taskMaster);
+        droid.setInitialLocations(initialLocations);
+        droid.addParsers(new SimpleLinkParser());
+
+
+        // just output the filename
+        droid.addHandlers(new ReportHandler());
+
+        droid.start();
+
+
+        assertFalse(ReportHandler.getReport().isEmpty());
+        assertEquals(5, ReportHandler.getReport().size());
+        assertTrue(ReportHandler.getReport().contains(baseURI + "/start_html"));
+        assertTrue(ReportHandler.getReport().contains(baseURI + "/page1_html"));
+        assertTrue(ReportHandler.getReport().contains(baseURI + "/page2_html"));
+        assertTrue(ReportHandler.getReport().contains(baseURI + "/page3_html"));
+        assertTrue(ReportHandler.getReport().contains(baseURI + "/page4_html"));
+
+        ReportHandler.recycle();
+    }
+}
\ No newline at end of file

Propchange: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/java/org/apache/droids/crawler/CrawlingDroidTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/java/org/apache/droids/crawler/CrawlingDroidTest.java
------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Propchange: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/java/org/apache/droids/crawler/CrawlingDroidTest.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/java/org/apache/droids/crawler/localserver/LocalHttpServer.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/java/org/apache/droids/crawler/localserver/LocalHttpServer.java?rev=1440010&view=auto
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/java/org/apache/droids/crawler/localserver/LocalHttpServer.java (added)
+++ incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/java/org/apache/droids/crawler/localserver/LocalHttpServer.java Tue Jan 29 17:27:43 2013
@@ -0,0 +1,297 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.droids.crawler.localserver;
+
+import com.google.common.base.Preconditions;
+import org.apache.http.HttpException;
+import org.apache.http.HttpServerConnection;
+import org.apache.http.impl.DefaultConnectionReuseStrategy;
+import org.apache.http.impl.DefaultHttpResponseFactory;
+import org.apache.http.impl.DefaultHttpServerConnection;
+import org.apache.http.params.BasicHttpParams;
+import org.apache.http.params.CoreConnectionPNames;
+import org.apache.http.params.CoreProtocolPNames;
+import org.apache.http.params.HttpParams;
+import org.apache.http.protocol.*;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.net.ServerSocket;
+import java.net.Socket;
+import java.net.SocketAddress;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicInteger;
+
+/**
+ * Local HTTP server for tests that require one.
+ */
+public class LocalHttpServer {
+
+    private final Logger log = LoggerFactory.getLogger(LocalHttpServer.class);
+    /**
+     * The local address to bind to. The host is an IP number rather than
+     * "localhost" to avoid surprises on hosts that map "localhost" to an IPv6
+     * address or something else. The port is 0 to let the system pick one.
+     */
+    public final static InetSocketAddress TEST_SERVER_ADDR = new InetSocketAddress("127.0.0.1", 0);
+    /**
+     * The request handler registry.
+     */
+    private final HttpRequestHandlerRegistry handlerRegistry;
+    /**
+     * The HTTP processor. If the interceptors are thread safe and the list is not
+     * modified during operation, the processor is thread safe.
+     */
+    private final BasicHttpProcessor httpProcessor;
+    /**
+     * The server parameters.
+     */
+    private final HttpParams params;
+    /**
+     * The server socket, while being served.
+     */
+    private volatile ServerSocket servicedSocket;
+    /**
+     * The request listening thread, while listening.
+     */
+    private volatile Thread listenerThread;
+    /**
+     * The number of connections this accepted.
+     */
+    private final AtomicInteger acceptedConnections = new AtomicInteger(0);
+
+    /**
+     * Creates a new test server.
+     */
+    public LocalHttpServer() {
+        this.handlerRegistry = new HttpRequestHandlerRegistry();
+        this.httpProcessor = new BasicHttpProcessor();
+        this.httpProcessor.addInterceptor(new ResponseDate());
+        this.httpProcessor.addInterceptor(new ResponseServer());
+        this.httpProcessor.addInterceptor(new ResponseContent());
+        this.httpProcessor.addInterceptor(new ResponseConnControl());
+        this.params = new BasicHttpParams();
+        this.params.setIntParameter(CoreConnectionPNames.SO_TIMEOUT, 5000).setIntParameter(CoreConnectionPNames.SOCKET_BUFFER_SIZE, 8 * 1024).setBooleanParameter(CoreConnectionPNames.STALE_CONNECTION_CHECK, false).setBooleanParameter(CoreConnectionPNames.TCP_NODELAY, true).setParameter(CoreProtocolPNames.ORIGIN_SERVER, "LocalTestServer/1.1");
+    }
+
+    /**
+     * Returns the number of connections this test server has accepted.
+     */
+    public int getAcceptedConnectionCount() {
+        return this.acceptedConnections.get();
+    }
+
+    /**
+     * Registers a handler with the local registry.
+     *
+     * @param pattern the URL pattern to match
+     * @param handler the handler to apply
+     */
+    public void register(String pattern, HttpRequestHandler handler) {
+        this.handlerRegistry.register(pattern, handler);
+    }
+
+    /**
+     * Unregisters a handler from the local registry.
+     *
+     * @param pattern the URL pattern
+     */
+    public void unregister(String pattern) {
+        this.handlerRegistry.unregister(pattern);
+    }
+
+    /**
+     * Starts this test server. Use {@link #getServicePort getServicePort} to
+     * obtain the port number afterwards.
+     */
+    public void start() throws IOException {
+        if (servicedSocket != null) {
+            return; // Already running
+        }
+
+        ServerSocket ssock = new ServerSocket();
+        ssock.setReuseAddress(true); // probably pointless for port '0'
+        ssock.bind(TEST_SERVER_ADDR);
+        this.servicedSocket = ssock;
+
+        this.listenerThread = new Thread(new RequestListener());
+        this.listenerThread.setDaemon(false);
+        this.listenerThread.start();
+    }
+
+    /**
+     * Stops this test server.
+     */
+    public void stop() throws IOException {
+        if (this.servicedSocket == null) {
+            return; // not running
+        }
+
+        try {
+            this.servicedSocket.close();
+        } catch (IOException ex) {
+            log.error(ex.getMessage(), ex);
+        } finally {
+            this.servicedSocket = null;
+        }
+
+        if (this.listenerThread != null) {
+            this.listenerThread.interrupt();
+            this.listenerThread = null;
+        }
+    }
+
+    @Override
+    public String toString() {
+        ServerSocket ssock = servicedSocket; // avoid synchronization
+        StringBuffer sb = new StringBuffer(80);
+        sb.append("LocalTestServer/");
+        if (ssock == null) {
+            sb.append("stopped");
+        } else {
+            sb.append(ssock.getLocalSocketAddress());
+        }
+        return sb.toString();
+    }
+
+    /**
+     * Obtains the port this server is servicing.
+     *
+     * @return the service port
+     */
+    public int getServicePort() {
+        ServerSocket ssock = this.servicedSocket; // avoid synchronization
+        Preconditions.checkState(ssock != null, "not running");
+        return ssock.getLocalPort();
+    }
+
+    /**
+     * Obtains the local address the server is listening on
+     *
+     * @return the service address
+     */
+    public SocketAddress getServiceAddress() {
+        ServerSocket ssock = this.servicedSocket; // avoid synchronization
+        Preconditions.checkState(ssock != null, "not running");
+        return ssock.getLocalSocketAddress();
+    }
+
+    /**
+     * The request listener. Accepts incoming connections and launches a service
+     * thread.
+     */
+    public class RequestListener implements Runnable {
+
+        /**
+         * The workers launched from here.
+         */
+        private final Set<Thread> workerThreads;
+
+        public RequestListener() {
+            super();
+            this.workerThreads = Collections.synchronizedSet(new HashSet<Thread>());
+        }
+
+        public void run() {
+            try {
+                while ((servicedSocket != null) && (listenerThread == Thread.currentThread())
+                        && !Thread.interrupted()) {
+                    try {
+                        accept();
+                    } catch (Exception ex) {
+                        ServerSocket ssock = servicedSocket;
+                        if ((ssock != null) && !ssock.isClosed()) {
+                            log.error(LocalHttpServer.this.toString() + " could not accept", ex);
+                        }
+                        // otherwise ignore the exception silently
+                        break;
+                    }
+                }
+            } finally {
+                cleanup();
+            }
+        }
+
+        protected void accept() throws IOException {
+            // Set up HTTP connection
+            Socket socket = servicedSocket.accept();
+            acceptedConnections.incrementAndGet();
+            DefaultHttpServerConnection conn = new DefaultHttpServerConnection();
+            conn.bind(socket, params);
+
+            // Set up the HTTP service
+            HttpService httpService = new HttpService(httpProcessor,
+                    new DefaultConnectionReuseStrategy(), new DefaultHttpResponseFactory());
+            httpService.setParams(params);
+            httpService.setHandlerResolver(handlerRegistry);
+
+            // Start worker thread
+            Thread t = new Thread(new Worker(httpService, conn));
+            workerThreads.add(t);
+            t.setDaemon(true);
+            t.start();
+
+        }
+
+        protected void cleanup() {
+            Thread[] threads = workerThreads.toArray(new Thread[0]);
+            for (int i = 0; i < threads.length; i++) {
+                if (threads[i] != null) {
+                    threads[i].interrupt();
+                }
+            }
+        }
+
+        /**
+         * A worker for serving incoming requests.
+         */
+        public class Worker implements Runnable {
+
+            private final HttpService httpservice;
+            private final HttpServerConnection conn;
+
+            public Worker(final HttpService httpservice, final HttpServerConnection conn) {
+
+                this.httpservice = httpservice;
+                this.conn = conn;
+            }
+
+            public void run() {
+                HttpContext context = new BasicHttpContext(null);
+                try {
+                    while ((servicedSocket != null) && this.conn.isOpen() && !Thread.interrupted()) {
+                        this.httpservice.handleRequest(this.conn, context);
+                    }
+                } catch (IOException ex) {
+                    // ignore silently
+                } catch (HttpException ex) {
+                    // ignore silently
+                } finally {
+                    workerThreads.remove(Thread.currentThread());
+                    try {
+                        this.conn.shutdown();
+                    } catch (IOException ignore) {
+                    }
+                }
+            }
+        }
+    }
+}

Propchange: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/java/org/apache/droids/crawler/localserver/LocalHttpServer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/java/org/apache/droids/crawler/localserver/LocalHttpServer.java
------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Propchange: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/java/org/apache/droids/crawler/localserver/LocalHttpServer.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/java/org/apache/droids/crawler/localserver/ResourceHandler.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/java/org/apache/droids/crawler/localserver/ResourceHandler.java?rev=1440010&view=auto
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/java/org/apache/droids/crawler/localserver/ResourceHandler.java (added)
+++ incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/java/org/apache/droids/crawler/localserver/ResourceHandler.java Tue Jan 29 17:27:43 2013
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.droids.crawler.localserver;
+
+import org.apache.http.*;
+import org.apache.http.entity.InputStreamEntity;
+import org.apache.http.entity.StringEntity;
+import org.apache.http.protocol.HttpContext;
+import org.apache.http.protocol.HttpRequestHandler;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+import java.util.Locale;
+
+/**
+ * A handler that serves out a resource
+ */
+public class ResourceHandler implements HttpRequestHandler {
+
+    public void handle(final HttpRequest request, final HttpResponse response,
+                       final HttpContext context) throws HttpException, IOException {
+
+        String method = request.getRequestLine().getMethod().toUpperCase(Locale.ENGLISH);
+        if (!"GET".equals(method) && !"HEAD".equals(method)) {
+            throw new MethodNotSupportedException(method + " not supported by " + getClass().getName());
+        }
+        String requestURI = request.getRequestLine().getUri();
+        String s = requestURI;
+        if (!s.startsWith("/")) {
+            s = "/" + s;
+        }
+        s = "data" + s;
+
+        ClassLoader cl = ResourceHandler.class.getClassLoader();
+        URL resource = cl.getResource(s);
+
+        if (resource != null) {
+            InputStream instream = resource.openStream();
+            InputStreamEntity entity = new InputStreamEntity(instream, -1);
+            if (requestURI.endsWith("_html")) {
+                entity.setContentType("text/html");
+                entity.setChunked(true);
+            }
+            response.setEntity(entity);
+
+        } else {
+            response.setStatusCode(HttpStatus.SC_NOT_FOUND);
+            StringEntity entity = new StringEntity(requestURI + " not found", "US-ASCII");
+            entity.setContentType("text/html");
+            response.setEntity(entity);
+        }
+    }
+
+}

Propchange: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/java/org/apache/droids/crawler/localserver/ResourceHandler.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/java/org/apache/droids/crawler/localserver/ResourceHandler.java
------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Propchange: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/java/org/apache/droids/crawler/localserver/ResourceHandler.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/resources/data/page1_html
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/resources/data/page1_html?rev=1440010&view=auto
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/resources/data/page1_html (added)
+++ incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/resources/data/page1_html Tue Jan 29 17:27:43 2013
@@ -0,0 +1,22 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+  <head>Page 1</head>
+  <body>
+    <a href="/page3_html">Page1</a>
+  </body>
+</html>

Added: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/resources/data/page2_html
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/resources/data/page2_html?rev=1440010&view=auto
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/resources/data/page2_html (added)
+++ incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/resources/data/page2_html Tue Jan 29 17:27:43 2013
@@ -0,0 +1,22 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+  <head>Page 2</head>
+  <body>
+    <a href="/page4_html">Page1</a>
+  </body>
+</html>

Added: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/resources/data/page3_html
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/resources/data/page3_html?rev=1440010&view=auto
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/resources/data/page3_html (added)
+++ incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/resources/data/page3_html Tue Jan 29 17:27:43 2013
@@ -0,0 +1,23 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<html>
+  <head>Page 4</head>
+  <body>
+    <p>Yada yada</p>
+  </body>
+</html>

Added: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/resources/data/page4_html
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/resources/data/page4_html?rev=1440010&view=auto
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/resources/data/page4_html (added)
+++ incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/resources/data/page4_html Tue Jan 29 17:27:43 2013
@@ -0,0 +1,22 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+  <head>Page 4</head>
+  <body>
+    <p>Blah blah blah</p>
+  </body>
+</html>

Added: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/resources/data/start_html
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/resources/data/start_html?rev=1440010&view=auto
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/resources/data/start_html (added)
+++ incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/resources/data/start_html Tue Jan 29 17:27:43 2013
@@ -0,0 +1,23 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+  <head>Start page</head>
+  <body>
+    <a href="/page1_html">Page1</a>
+    <a href="/page2_html">Page1</a>
+  </body>
+</html>

Added: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/resources/logback.xml
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/resources/logback.xml?rev=1440010&view=auto
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/resources/logback.xml (added)
+++ incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/resources/logback.xml Tue Jan 29 17:27:43 2013
@@ -0,0 +1,18 @@
+<configuration>
+
+    <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
+        <!-- encoders are assigned the type
+             ch.qos.logback.classic.encoder.PatternLayoutEncoder by default -->
+        <encoder>
+            <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
+        </encoder>
+    </appender>
+
+    <logger name="org.apache.droids" level="info"/>
+        <logger name="org.apache.http.wire" level="info"/>
+
+
+    <root level="info">
+        <appender-ref ref="STDOUT"/>
+    </root>
+</configuration>
\ No newline at end of file

Propchange: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/resources/logback.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/resources/logback.xml
------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Propchange: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/test/resources/logback.xml
------------------------------------------------------------------------------
    svn:mime-type = text/xml



Mime
View raw message