manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1417060 [4/4] - in /manifoldcf/trunk: ./ connectors/livelink/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/livelink/ connectors/meridio/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/meridio/ connectors...
Date Tue, 04 Dec 2012 17:47:30 GMT
Modified: manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java?rev=1417060&r1=1417059&r2=1417060&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java (original)
+++ manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java Tue Dec  4 17:47:28 2012
@@ -1,6 +1,6 @@
 /* $Id: ThrottledFetcher.java 989847 2010-08-26 17:52:30Z kwright $ */
 
-/**
+/**`
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
@@ -19,6 +19,7 @@
 package org.apache.manifoldcf.crawler.connectors.webcrawler;
 
 import org.apache.manifoldcf.core.interfaces.*;
+import org.apache.manifoldcf.core.common.XThreadInputStream;
 import org.apache.manifoldcf.agents.interfaces.*;
 import org.apache.manifoldcf.crawler.interfaces.*;
 import org.apache.manifoldcf.crawler.system.Logging;
@@ -26,13 +27,63 @@ import org.apache.manifoldcf.crawler.sys
 import java.util.*;
 import java.io.*;
 import java.net.*;
+import java.util.concurrent.TimeUnit;
 
-import org.apache.commons.httpclient.*;
-import org.apache.commons.httpclient.methods.*;
-import org.apache.commons.httpclient.params.*;
-import org.apache.commons.httpclient.protocol.*;
-import org.apache.commons.httpclient.cookie.CookiePolicy;
-import org.apache.commons.httpclient.auth.AuthScope;
+import org.apache.http.conn.ClientConnectionManager;
+import org.apache.http.client.HttpClient;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.client.methods.HttpPost;
+import org.apache.http.NameValuePair;
+import org.apache.http.impl.conn.PoolingClientConnectionManager;
+import org.apache.http.conn.scheme.Scheme;
+import org.apache.http.conn.ssl.SSLSocketFactory;
+import org.apache.http.conn.ssl.AllowAllHostnameVerifier;
+import org.apache.http.conn.ssl.BrowserCompatHostnameVerifier;
+import org.apache.http.HttpEntity;
+import org.apache.http.HttpResponse;
+import org.apache.http.auth.AuthScope;
+import org.apache.http.auth.NTCredentials;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.client.methods.HttpRequestBase;
+import org.apache.http.impl.client.DefaultHttpClient;
+import org.apache.http.impl.client.AbstractHttpClient;
+import org.apache.http.impl.client.DefaultRedirectStrategy;
+import org.apache.http.util.EntityUtils;
+import org.apache.http.params.BasicHttpParams;
+import org.apache.http.params.HttpParams;
+import org.apache.http.params.CoreConnectionPNames;
+import org.apache.http.HttpStatus;
+import org.apache.http.HttpHost;
+import org.apache.http.Header;
+import org.apache.http.conn.params.ConnRoutePNames;
+import org.apache.http.message.BasicHeader;
+import org.apache.http.client.params.ClientPNames;
+import org.apache.http.client.params.HttpClientParams;
+import org.apache.http.client.params.CookiePolicy;
+import org.apache.http.cookie.params.CookieSpecPNames;
+import org.apache.http.impl.cookie.BasicClientCookie;
+import org.apache.http.message.BasicNameValuePair;
+import org.apache.http.protocol.HTTP;
+import org.apache.http.client.entity.UrlEncodedFormEntity;
+import org.apache.http.cookie.CookieOrigin;
+import org.apache.http.cookie.ClientCookie;
+import org.apache.http.cookie.Cookie;
+import org.apache.http.impl.cookie.BasicPathHandler;
+import org.apache.http.impl.cookie.BrowserCompatSpec;
+import org.apache.http.cookie.CookieSpecFactory;
+import org.apache.http.cookie.CookieSpec;
+import org.apache.http.client.CookieStore;
+import org.apache.http.protocol.HttpContext;
+import org.apache.http.protocol.BasicHttpContext;
+import org.apache.http.client.protocol.ClientContext;
+import org.apache.http.cookie.CookieIdentityComparator;
+
+import org.apache.http.cookie.MalformedCookieException;
+import org.apache.http.conn.ConnectTimeoutException;
+import org.apache.http.client.RedirectException;
+import org.apache.http.client.CircularRedirectException;
+import org.apache.http.NoHttpResponseException;
+import org.apache.http.HttpException;
 
 /** This class uses httpclient to fetch stuff from webservers.  However, it additionally controls the fetch
 * rate in two ways: first, controlling the overall bandwidth used per server, and second, limiting the number
@@ -114,23 +165,20 @@ public class ThrottledFetcher
     String proxyHost, int proxyPort, String proxyAuthDomain, String proxyAuthUsername, String proxyAuthPassword)
     throws ManifoldCFException
   {
-    // First, create a protocol factory object, if we can
-    ProtocolFactory myFactory = new ProtocolFactory();
+    // Create the https scheme for this connection
+    javax.net.ssl.SSLSocketFactory baseFactory;
     String trustStoreString;
-    ProtocolSocketFactory secureSocketFactory;
     if (trustStore != null)
     {
-      secureSocketFactory = new WebSecureSocketFactory(trustStore.getSecureSocketFactory());
+      baseFactory = trustStore.getSecureSocketFactory();
       trustStoreString = trustStore.getString();
     }
     else
     {
+      baseFactory = KeystoreManagerFactory.getTrustingSecureSocketFactory();
       trustStoreString = null;
-      secureSocketFactory = new WebSecureSocketFactory(KeystoreManagerFactory.getTrustingSecureSocketFactory());
     }
 
-    Protocol myHttpsProtocol = new Protocol("https", (ProtocolSocketFactory)secureSocketFactory, 443);
-    myFactory.registerProtocol("https",myHttpsProtocol);
 
     ConnectionBin[] bins = new ConnectionBin[binNames.length];
 
@@ -370,7 +418,7 @@ public class ThrottledFetcher
 
           // If we have a connection located, activate it.
           if (connectionToReuse == null)
-            connectionToReuse = new ThrottledConnection(protocol,server,port,authentication,myFactory,trustStoreString,bins,
+            connectionToReuse = new ThrottledConnection(protocol,server,port,authentication,baseFactory,trustStoreString,bins,
               proxyHost,proxyPort,proxyAuthDomain,proxyAuthUsername,proxyAuthPassword);
           connectionToReuse.setup(throttleDescription);
           return connectionToReuse;
@@ -853,126 +901,6 @@ public class ThrottledFetcher
 
   }
 
-  // Where to record result info/file data
-  protected final static String resultLogFile = "/common/web/resultlog";
-  // Where to record the actual data
-  protected final static String dataFileFolder = "/common/web/data/";
-
-  // This is the one instance of the output class
-  protected static DataRecorder dataRecorder = new DataRecorder();
-
-  /** This class takes care of recording data and results for posterity */
-  protected static class DataRecorder
-  {
-    protected int documentNumber = 0;
-
-    public DataRecorder()
-    {
-    }
-
-    public DataSession getSession(String url)
-      throws ManifoldCFException
-    {
-      return new DataSession(this,url);
-    }
-
-    /** Atomically write resultlog record, returning data file name to use */
-    public synchronized String writeResponseRecord(String url, int responseCode, ArrayList headerNames, ArrayList headerValues)
-      throws ManifoldCFException
-    {
-      // Open log file
-      try
-      {
-        OutputStream os = new FileOutputStream(resultLogFile,true);
-        try
-        {
-          OutputStreamWriter writer = new OutputStreamWriter(os,"utf-8");
-          try
-          {
-            String documentName = Integer.toString(documentNumber++);
-            writer.write("URI: "+url+"\n");
-            writer.write("File: "+documentName+"\n");
-            writer.write("Code: "+Integer.toString(responseCode)+"\n");
-            int i = 0;
-            while (i < headerNames.size())
-            {
-              writer.write("Header: "+(String)headerNames.get(i)+":"+(String)headerValues.get(i)+"\n");
-              i++;
-            }
-            return documentName;
-          }
-          finally
-          {
-            writer.close();
-          }
-        }
-        finally
-        {
-          os.close();
-        }
-      }
-      catch (IOException e)
-      {
-        throw new ManifoldCFException("Error recording file info: "+e.getMessage(),e);
-      }
-
-    }
-
-
-  }
-
-  /** Helper class for the above */
-  protected static class DataSession
-  {
-    protected DataRecorder dr;
-    protected String url;
-    protected int responseCode = 0;
-    protected ArrayList headerNames = new ArrayList();
-    protected ArrayList headerValues = new ArrayList();
-    protected String documentName = null;
-
-    public DataSession(DataRecorder dr, String url)
-    {
-      this.dr = dr;
-      this.url = url;
-    }
-
-    public void setResponseCode(int responseCode)
-    {
-      this.responseCode = responseCode;
-    }
-
-    public void addHeader(String headerName, String headerValue)
-    {
-      headerNames.add(headerName);
-      headerValues.add(headerValue);
-    }
-
-    public void endHeader()
-      throws ManifoldCFException
-    {
-      documentName = dr.writeResponseRecord(url,responseCode,headerNames,headerValues);
-    }
-
-    public void write(byte[] theBytes, int off, int length)
-      throws IOException
-    {
-      if (documentName == null)
-        throw new IOException("Must end header before reading data!");
-      OutputStream os = new FileOutputStream(dataFileFolder+documentName,true);
-      try
-      {
-        os.write(theBytes,off,length);
-      }
-      finally
-      {
-        os.close();
-      }
-    }
-
-  }
-
-
   /** Throttled connections.  Each instance of a connection describes the bins to which it belongs,
   * along with the actual open connection itself, and the last time the connection was used. */
   protected static class ThrottledConnection implements IThrottledConnection
@@ -1004,9 +932,11 @@ public class ThrottledFetcher
     protected String trustStoreString;
 
     /** The http connection manager.  The pool is of size 1.  */
-    protected MultiThreadedHttpConnectionManager connManager = null;
+    protected PoolingClientConnectionManager connManager = null;
+    /** The http client object. */
+    protected AbstractHttpClient httpClient = null;
     /** The method object */
-    protected HttpMethodBase fetchMethod = null;
+    protected HttpRequestBase fetchMethod = null;
     /** The error trace, if any */
     protected Throwable throwable = null;
     /** The current URL being fetched */
@@ -1031,19 +961,19 @@ public class ThrottledFetcher
     protected final String proxyAuthUsername;
     /** Proxy auth password */
     protected final String proxyAuthPassword;
-    
-    /** Protocol socket factory */
-    protected ProtocolSocketFactory secureSocketFactory = null;
-    protected ProtocolFactory myFactory = null;
+    /** Https protocol */
+    protected final javax.net.ssl.SSLSocketFactory httpsSocketFactory;
 
-
-    /** Hack added to record all access data from current crawler */
-    protected DataSession dataSession = null;
+    /** The thread that is actually doing the work */
+    protected ExecuteMethodThread methodThread = null;
+    /** Set if thread has been started */
+    protected boolean threadStarted = false;
+    
 
     /** Constructor.  Create a connection with a specific server and port, and
     * register it as active against all bins. */
     public ThrottledConnection(String protocol, String server, int port, PageCredentials authentication,
-      ProtocolFactory myFactory, String trustStoreString, ConnectionBin[] connectionBins,
+      javax.net.ssl.SSLSocketFactory httpsSocketFactory, String trustStoreString, ConnectionBin[] connectionBins,
       String proxyHost, int proxyPort, String proxyAuthDomain, String proxyAuthUsername, String proxyAuthPassword)
     {
       this.proxyHost = proxyHost;
@@ -1055,7 +985,7 @@ public class ThrottledFetcher
       this.server = server;
       this.port = port;
       this.authentication = authentication;
-      this.myFactory = myFactory;
+      this.httpsSocketFactory = httpsSocketFactory;
       this.trustStoreString = trustStoreString;
       this.connectionBinArray = connectionBins;
       this.throttleBinArray = new ThrottleBin[connectionBins.length];
@@ -1210,10 +1140,11 @@ public class ThrottledFetcher
 
       if (connManager != null)
       {
-        connManager.closeIdleConnections(idleTimeout);
-        connManager.deleteClosedConnections();
+        connManager.closeIdleConnections(idleTimeout, TimeUnit.MILLISECONDS);
+        connManager.closeExpiredConnections();
         // Need to determine if there's a valid connection in the connection manager still, or if it is empty.
-        return connManager.getConnectionsInPool() == 0;
+        //return connManager.getConnectionsInPool() == 0;
+        return true;
       }
       else
         return true;
@@ -1312,47 +1243,6 @@ public class ThrottledFetcher
       }
     }
 
-    protected static class ExecuteMethodThread extends Thread
-    {
-      protected HttpClient client;
-      protected HostConfiguration hostConfiguration;
-      protected HttpMethodBase executeMethod;
-      protected Throwable exception = null;
-      protected int rval = 0;
-
-      public ExecuteMethodThread(HttpClient client, HostConfiguration hostConfiguration, HttpMethodBase executeMethod)
-      {
-        super();
-        setDaemon(true);
-        this.client = client;
-        this.hostConfiguration = hostConfiguration;
-        this.executeMethod = executeMethod;
-      }
-
-      public void run()
-      {
-        try
-        {
-          // Call the execute method appropriately
-          rval = client.executeMethod(hostConfiguration,executeMethod,null);
-        }
-        catch (Throwable e)
-        {
-          this.exception = e;
-        }
-      }
-
-      public Throwable getException()
-      {
-        return exception;
-      }
-
-      public int getResponse()
-      {
-        return rval;
-      }
-    }
-
     /** Execute the fetch and get the return code.  This method uses the
     * standard logging mechanism to keep track of the fetch attempt.  It also
     * signals the following conditions: ServiceInterruption (if a dynamic
@@ -1375,225 +1265,267 @@ public class ThrottledFetcher
       LoginCookies loginCookies)
       throws ManifoldCFException, ServiceInterruption
     {
-      StringBuilder sb = new StringBuilder(protocol);
-      sb.append("://").append(server);
+      // Set up scheme
+      SSLSocketFactory myFactory = new SSLSocketFactory(new InterruptibleSocketFactory(httpsSocketFactory,connectionTimeoutMilliseconds),
+        new AllowAllHostnameVerifier());
+      Scheme myHttpsProtocol = new Scheme("https", 443, myFactory);
+
+      int resolvedPort;
+      String displayedPort;
       if (port != -1)
       {
         if (!(protocol.equals("http") && port == 80) &&
           !(protocol.equals("https") && port == 443))
-        sb.append(":").append(Integer.toString(port));
+          displayedPort = ":"+Integer.toString(port);
+        else
+          displayedPort = "";
+        resolvedPort = port;
+      }
+      else
+      {
+        if (protocol.equals("http"))
+          resolvedPort = 80;
+        else if (protocol.equals("https"))
+          resolvedPort = 443;
+        else
+          throw new IllegalArgumentException("Unexpected protocol: "+protocol);
+        displayedPort = "";
       }
-      sb.append(urlPath);
+
+      StringBuilder sb = new StringBuilder(protocol);
+      sb.append("://").append(server).append(displayedPort).append(urlPath);
       String fetchUrl = sb.toString();
+
+      HttpHost fetchHost = new HttpHost(server,resolvedPort,protocol);
+      HttpHost hostHost;
+      
       if (host != null)
       {
         sb.setLength(0);
-        sb.append(protocol).append("://").append(host);
-        if (port != -1)
-        {
-          if (!(protocol.equals("http") && port == 80) &&
-            !(protocol.equals("https") && port == 443))
-          sb.append(":").append(Integer.toString(port));
-        }
-        sb.append(urlPath);
+        sb.append(protocol).append("://").append(host).append(displayedPort).append(urlPath);
         myUrl = sb.toString();
+        hostHost = new HttpHost(host,resolvedPort,protocol);
       }
       else
+      {
         myUrl = fetchUrl;
-
-      if (recordEverything)
-        // Start a new data session
-        dataSession = dataRecorder.getSession(myUrl);
-
-      try
+        hostHost = fetchHost;
+      }
+      
+      if (connManager == null)
       {
-        if (connManager == null)
-          connManager = new MultiThreadedHttpConnectionManager();
-        HttpConnectionManagerParams httpConParam = connManager.getParams();
-        httpConParam.setDefaultMaxConnectionsPerHost(1);
-        httpConParam.setMaxTotalConnections(1);
-        httpConParam.setConnectionTimeout(connectionTimeoutMilliseconds);
-        httpConParam.setSoTimeout(socketTimeoutMilliseconds);
-        connManager.setParams(httpConParam);
-
-        long startTime = 0L;
-        if (Logging.connectors.isDebugEnabled())
-        {
-          startTime = System.currentTimeMillis();
-          Logging.connectors.debug("WEB: Waiting for an HttpClient object");
-        }
+        PoolingClientConnectionManager localConnManager = new PoolingClientConnectionManager();
+        localConnManager.setMaxTotal(1);
+        localConnManager.setDefaultMaxPerRoute(1);
+        connManager = localConnManager;
+      }
+      
+      // Set up protocol registry
+      connManager.getSchemeRegistry().register(myHttpsProtocol);
+      
+      long startTime = 0L;
+      if (Logging.connectors.isDebugEnabled())
+      {
+        startTime = System.currentTimeMillis();
+        Logging.connectors.debug("WEB: Waiting for an HttpClient object");
+      }
 
+      // If we already have an httpclient object, great.  Otherwise we have to get one, and initialize it with
+      // those parameters that aren't expected to change.
+      if (httpClient == null)
+      {
+        BasicHttpParams params = new BasicHttpParams();
+        params.setParameter(ClientPNames.DEFAULT_HOST,fetchHost);
+        params.setBooleanParameter(CoreConnectionPNames.TCP_NODELAY,true);
+        params.setBooleanParameter(CoreConnectionPNames.STALE_CONNECTION_CHECK,false);
+        params.setBooleanParameter(ClientPNames.ALLOW_CIRCULAR_REDIRECTS,true);
+        // MEDIUM_SECURITY compatibility level not supported in HttpComponents.  Try BROWSER_NETSCAPE?
+        HttpClientParams.setCookiePolicy(params,CookiePolicy.BROWSER_COMPATIBILITY);
+        params.setBooleanParameter(CookieSpecPNames.SINGLE_COOKIE_HEADER,new Boolean(true));
 
-        HttpClient client = new HttpClient(connManager);
-        // Permit circular redirections, because that is how some sites set cookies
-        client.getParams().setParameter(org.apache.commons.httpclient.params.HttpClientParams.ALLOW_CIRCULAR_REDIRECTS,new Boolean(true));
-        // If there are redirects, this is essential to make sure the right socket factory gets used
-        client.getParams().setParameter(org.apache.commons.httpclient.params.HttpClientParams.PROTOCOL_FACTORY,myFactory);
+        DefaultHttpClient localHttpClient = new DefaultHttpClient(connManager,params);
+        localHttpClient.setRedirectStrategy(new DefaultRedirectStrategy());
+        localHttpClient.getCookieSpecs().register(CookiePolicy.BROWSER_COMPATIBILITY, new CookieSpecFactory()
+          {
 
-        HostConfiguration clientConf = new HostConfiguration();
-        // clientConf.setLocalAddress(currentAddr);
+            public CookieSpec newInstance(HttpParams params)
+            {
+              return new LaxBrowserCompatSpec();
+            }
+    
+          }
+        );
 
-        // Set up protocol to use
-        clientConf.setParams(new HostParams());
-        clientConf.setHost(server,port,myFactory.getProtocol(protocol));
         // If there's a proxy, set that too.
         if (proxyHost != null && proxyHost.length() > 0)
         {
-          clientConf.setProxy(proxyHost,proxyPort);
+          // Configure proxy authentication
           if (proxyAuthUsername != null && proxyAuthUsername.length() > 0)
           {
-            // Set up NTLM credentials for this fetch too.
-            client.getState().setProxyCredentials(AuthScope.ANY,
-              new NTCredentials(proxyAuthUsername,(proxyAuthPassword==null)?"":proxyAuthPassword,currentHost,(proxyAuthDomain==null)?"":proxyAuthDomain));
+            localHttpClient.getCredentialsProvider().setCredentials(
+              new AuthScope(proxyHost, proxyPort),
+              new NTCredentials(proxyAuthUsername, (proxyAuthPassword==null)?"":proxyAuthPassword, currentHost, (proxyAuthDomain==null)?"":proxyAuthDomain));
           }
+
+          HttpHost proxy = new HttpHost(proxyHost, proxyPort);
+
+          localHttpClient.getParams().setParameter(ConnRoutePNames.DEFAULT_PROXY, proxy);
         }
 
+        // Set up authentication to use
+        if (authentication != null)
+        {
+          if (Logging.connectors.isDebugEnabled())
+            Logging.connectors.debug("WEB: For "+myUrl+", discovered matching authentication credentials");
+          localHttpClient.getCredentialsProvider().setCredentials(AuthScope.ANY,
+            authentication.makeCredentialsObject(host));
+        }
+          
+        httpClient = localHttpClient;
+      }
+
 
+      // Set the parameters we haven't keyed on (so these can change from request to request)
+      httpClient.getParams().setIntParameter(CoreConnectionPNames.SO_TIMEOUT,socketTimeoutMilliseconds);
+      httpClient.getParams().setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT,connectionTimeoutMilliseconds);
+      httpClient.getParams().setBooleanParameter(ClientPNames.HANDLE_REDIRECTS,redirectOK);
+
+      if (host != null)
+      {
         if (Logging.connectors.isDebugEnabled())
-          Logging.connectors.debug("WEB: Got an HttpClient object after "+new Long(System.currentTimeMillis()-startTime).toString()+" ms.");
+          Logging.connectors.debug("WEB: For "+myUrl+", setting virtual host to "+host);
+        httpClient.getParams().setParameter(ClientPNames.VIRTUAL_HOST,hostHost);
+      }
 
-        startFetchTime = System.currentTimeMillis();
 
-        int pageFetchMethod = FormData.SUBMITMETHOD_GET;
+      if (Logging.connectors.isDebugEnabled())
+        Logging.connectors.debug("WEB: Got an HttpClient object after "+new Long(System.currentTimeMillis()-startTime).toString()+" ms.");
+
+      startFetchTime = System.currentTimeMillis();
+
+      int pageFetchMethod = FormData.SUBMITMETHOD_GET;
+      if (formData != null)
+        pageFetchMethod = formData.getSubmitMethod();
+      switch (pageFetchMethod)
+      {
+      case FormData.SUBMITMETHOD_GET:
+        // MUST be just the path, or apparently we wind up resetting the HostConfiguration
+        // Add additional parameters to url path
+        String fullUrlPath;
         if (formData != null)
-          pageFetchMethod = formData.getSubmitMethod();
-        switch (pageFetchMethod)
         {
-        case FormData.SUBMITMETHOD_GET:
-          // MUST be just the path, or apparently we wind up resetting the HostConfiguration
-          // Add additional parameters to url path
-          String fullUrlPath;
-          if (formData != null)
-          {
-            StringBuilder psb = new StringBuilder(urlPath);
-            Iterator iter = formData.getElementIterator();
-            char appendChar;
-            if (urlPath.indexOf("?") == -1)
-              appendChar = '?';
-            else
-              appendChar = '&';
+          StringBuilder psb = new StringBuilder(urlPath);
+          Iterator iter = formData.getElementIterator();
+          char appendChar;
+          if (urlPath.indexOf("?") == -1)
+            appendChar = '?';
+          else
+            appendChar = '&';
+          try
+          {
             while (iter.hasNext())
             {
-              FormDataElement e = (FormDataElement)iter.next();
+              FormDataElement el = (FormDataElement)iter.next();
               psb.append(appendChar);
               appendChar = '&';
-              String param = e.getElementName();
-              String value = e.getElementValue();
+              String param = el.getElementName();
+              String value = el.getElementValue();
               psb.append(java.net.URLEncoder.encode(param,"utf-8"));
               if (value != null)
+              {
                 psb.append('=').append(java.net.URLEncoder.encode(value,"utf-8"));
+              }
             }
-            fullUrlPath = psb.toString();
-          }
-          else
-          {
-            fullUrlPath = urlPath;
           }
-          // Hack; apparently httpclient treats // as a protocol specifier and so it rips off the first section of the path in that case.
-          while (fullUrlPath.startsWith("//"))
-            fullUrlPath = fullUrlPath.substring(1);
-          if (Logging.connectors.isDebugEnabled())
-            Logging.connectors.debug("WEB: Get method for '"+fullUrlPath+"'");
-          fetchMethod = new GetMethod(fullUrlPath);
-          break;
-        case FormData.SUBMITMETHOD_POST:
-          if (Logging.connectors.isDebugEnabled())
-            Logging.connectors.debug("WEB: Post method for '"+urlPath+"'");
-          // MUST be just the path, or apparently we wind up resetting the HostConfiguration
-          PostMethod postMethod = new PostMethod(urlPath);
-          // Add parameters to post variables
-          if (formData != null)
+          catch (java.io.UnsupportedEncodingException e)
           {
-            Iterator iter = formData.getElementIterator();
-            while (iter.hasNext())
-            {
-              FormDataElement e = (FormDataElement)iter.next();
-              String param = e.getElementName();
-              String value = e.getElementValue();
-              if (Logging.connectors.isDebugEnabled())
-                Logging.connectors.debug("WEB: Post parameter name '"+param+"' value '"+value+"' for '"+urlPath+"'");
-              postMethod.addParameter(param,value);
-            }
+            throw new ManifoldCFException("Unsupported encoding: "+e.getMessage(),e);
           }
-          fetchMethod = postMethod;
-          break;
-        default:
-          throw new ManifoldCFException("Illegal method type: "+Integer.toString(pageFetchMethod));
-        }
 
-        // Set all appropriate headers and parameters
-        fetchMethod.setRequestHeader("User-Agent",userAgent);
-        fetchMethod.setRequestHeader("From",from);
-        HttpMethodParams params = fetchMethod.getParams();
-        if (host != null)
+          fullUrlPath = psb.toString();
+        }
+        else
         {
-          if (Logging.connectors.isDebugEnabled())
-            Logging.connectors.debug("WEB: For "+myUrl+", setting virtual host to "+host);
-          params.setVirtualHost(host);
+          fullUrlPath = urlPath;
         }
-        params.setSoTimeout(socketTimeoutMilliseconds);
-        params.setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY_MEDIUM_SECURITY);
-        params.setParameter(HttpMethodParams.SINGLE_COOKIE_HEADER,new Boolean(true));
-        fetchMethod.setParams(params);
-        fetchMethod.setFollowRedirects(redirectOK);
-
-        // Clear all current cookies
-        HttpState state = client.getState();
-        state.clearCookies();
+        // Hack; apparently httpclient treats // as a protocol specifier and so it rips off the first section of the path in that case.
+        while (fullUrlPath.startsWith("//"))
+          fullUrlPath = fullUrlPath.substring(1);
+        if (Logging.connectors.isDebugEnabled())
+          Logging.connectors.debug("WEB: Get method for '"+fullUrlPath+"'");
+        fetchMethod = new HttpGet(fullUrlPath);
+        break;
+      case FormData.SUBMITMETHOD_POST:
+        if (Logging.connectors.isDebugEnabled())
+          Logging.connectors.debug("WEB: Post method for '"+urlPath+"'");
+        // MUST be just the path, or apparently we wind up resetting the HostConfiguration
+        HttpPost postMethod = new HttpPost(urlPath);
+        List<NameValuePair> nvps = new ArrayList<NameValuePair>();
 
-        // If we have any cookies to set, set them.
-        if (loginCookies != null)
+        // Add parameters to post variables
+        if (formData != null)
         {
-          if (Logging.connectors.isDebugEnabled())
-            Logging.connectors.debug("WEB: Adding "+Integer.toString(loginCookies.getCookieCount())+" cookies for '"+urlPath+"'");
-          int h = 0;
-          while (h < loginCookies.getCookieCount())
+          Iterator iter = formData.getElementIterator();
+          while (iter.hasNext())
           {
-            state.addCookie(loginCookies.getCookie(h++));
+            FormDataElement e = (FormDataElement)iter.next();
+            String param = e.getElementName();
+            String value = e.getElementValue();
+            if (Logging.connectors.isDebugEnabled())
+              Logging.connectors.debug("WEB: Post parameter name '"+param+"' value '"+value+"' for '"+urlPath+"'");
+            nvps.add(new BasicNameValuePair(param,value));
           }
         }
-
-        // Copy out the current cookies, in case the fetch fails
-        lastFetchCookies = loginCookies;
-
-        // Set up authentication to use
-        if (authentication != null)
+        try
+        {
+          postMethod.setEntity(new UrlEncodedFormEntity(nvps,HTTP.UTF_8));
+        }
+        catch (java.io.UnsupportedEncodingException e)
+        {
+          throw new ManifoldCFException("Unsupported UTF-8 encoding: "+e.getMessage(),e);
+        }
+        fetchMethod = postMethod;
+        break;
+      default:
+        throw new ManifoldCFException("Illegal method type: "+Integer.toString(pageFetchMethod));
+      }
+
+      // Set all appropriate headers and parameters
+      fetchMethod.setHeader(new BasicHeader("User-Agent",userAgent));
+      fetchMethod.setHeader(new BasicHeader("From",from));
+        
+      // Use a custom cookie store
+      CookieStore cookieStore = new OurBasicCookieStore();
+      // If we have any cookies to set, set them.
+      if (loginCookies != null)
+      {
+        if (Logging.connectors.isDebugEnabled())
+          Logging.connectors.debug("WEB: Adding "+Integer.toString(loginCookies.getCookieCount())+" cookies for '"+urlPath+"'");
+        int h = 0;
+        while (h < loginCookies.getCookieCount())
         {
           if (Logging.connectors.isDebugEnabled())
-            Logging.connectors.debug("WEB: For "+myUrl+", discovered matching authentication credentials");
-          state.setCredentials(AuthScope.ANY, authentication.makeCredentialsObject(host));
+            Logging.connectors.debug("WEB:  Cookie '"+loginCookies.getCookie(h)+"' added");
+          cookieStore.addCookie(loginCookies.getCookie(h++));
         }
+      }
 
-        // Set the state.  May not be necessary, but I'd rather not depend on undocumented httpclient implementation details.
-        client.setState(state);
 
-        // Fire it off!
+      // Copy out the current cookies, in case the fetch fails
+      lastFetchCookies = loginCookies;
+
+      //httpClient.setCookieStore(cookieStore);
+      
+      // Create the thread
+      methodThread = new ExecuteMethodThread(this, httpClient, fetchMethod, cookieStore);
+      try
+      {
+        methodThread.start();
+        threadStarted = true;
         try
         {
-          ExecuteMethodThread t = new ExecuteMethodThread(client,clientConf,fetchMethod);
-          try
-          {
-            t.start();
-            t.join();
-            Throwable thr = t.getException();
-            if (thr != null)
-            {
-              throw thr;
-            }
-            statusCode = t.getResponse();
-            if (recordEverything)
-              dataSession.setResponseCode(statusCode);
-          }
-          catch (InterruptedException e)
-          {
-            t.interrupt();
-            // We need the caller to abandon any connections left around, so rethrow in a way that forces them to process the event properly.
-            throw e;
-          }
-
-          // At least we didn't get an exception!  Copy out the current cookies for later reference.
-          lastFetchCookies = new CookieSet(client.getState().getCookies());
-
+          statusCode = methodThread.getResponseCode();
+          lastFetchCookies = methodThread.getCookies();
           switch (statusCode)
           {
           case HttpStatus.SC_REQUEST_TIMEOUT:
@@ -1616,112 +1548,85 @@ public class ThrottledFetcher
           default:
             return;
           }
-
-        }
-        catch (java.net.SocketTimeoutException e)
-        {
-          throwable = e;
-          long currentTime = System.currentTimeMillis();
-          throw new ServiceInterruption("Timed out waiting for IO for '"+myUrl+"': "+e.getMessage(), e, currentTime + TIME_5MIN,
-            currentTime + TIME_2HRS,-1,false);
-        }
-        catch (org.apache.commons.httpclient.ConnectTimeoutException e)
-        {
-          throwable = e;
-          long currentTime = System.currentTimeMillis();
-          throw new ServiceInterruption("Timed out waiting for connection for '"+myUrl+"': "+e.getMessage(), e, currentTime + TIME_5MIN,
-            currentTime + TIME_2HRS,-1,false);
-        }
-        catch (InterruptedIOException e)
-        {
-          //Logging.connectors.warn("IO interruption seen",e);
-          throwable = new ManifoldCFException("Interrupted: "+e.getMessage(),e);
-          statusCode = FETCH_INTERRUPTED;
-          throw new ManifoldCFException("Interrupted",ManifoldCFException.INTERRUPTED);
-        }
-        catch (org.apache.commons.httpclient.RedirectException e)
-        {
-          throwable = e;
-          statusCode = FETCH_CIRCULAR_REDIRECT;
-          if (recordEverything)
-            dataSession.setResponseCode(statusCode);
-          return;
-        }
-        catch (org.apache.commons.httpclient.NoHttpResponseException e)
-        {
-          throwable = e;
-          long currentTime = System.currentTimeMillis();
-          throw new ServiceInterruption("Timed out waiting for response for '"+myUrl+"': "+e.getMessage(), e, currentTime + TIME_15MIN,
-            currentTime + TIME_2HRS,-1,false);
-        }
-        catch (java.net.ConnectException e)
-        {
-          throwable = e;
-          long currentTime = System.currentTimeMillis();
-          throw new ServiceInterruption("Timed out waiting for a connection for '"+myUrl+"': "+e.getMessage(), e, currentTime + TIME_2HRS,
-            currentTime + TIME_6HRS,-1,false);
-        }
-        catch (javax.net.ssl.SSLException e)
-        {
-          // Probably this is an incorrectly configured trust store
-          throwable = new ManifoldCFException("SSL handshake error: "+e.getMessage()+"; check your connection's Certificate configuration",e);
-          statusCode = FETCH_IO_ERROR;
-          if (recordEverything)
-            dataSession.setResponseCode(statusCode);
-          return;
         }
-        catch (IOException e)
+        catch (InterruptedException e)
         {
-          // Treat this as a bad url.  We don't know what happened, but it isn't something we are going to naively
-          // retry on.
-          throwable = e;
-          statusCode = FETCH_IO_ERROR;
-          if (recordEverything)
-            dataSession.setResponseCode(statusCode);
-          return;
+          methodThread.interrupt();
+          methodThread = null;
+          threadStarted = false;
+          throw e;
         }
 
       }
       catch (InterruptedException e)
       {
-        // Drop the current connection, and in fact the whole pool, on the floor.
+        // Drop the current connection on the floor, so it cannot be reused.
         fetchMethod = null;
-        connManager = null;
         throwable = new ManifoldCFException("Interrupted: "+e.getMessage(),e);
         statusCode = FETCH_INTERRUPTED;
         throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
       }
-      catch (IllegalArgumentException e)
+      catch (java.net.SocketTimeoutException e)
+      {
+        throwable = e;
+        long currentTime = System.currentTimeMillis();
+        throw new ServiceInterruption("Timed out waiting for IO for '"+myUrl+"': "+e.getMessage(), e, currentTime + TIME_5MIN,
+          currentTime + TIME_2HRS,-1,false);
+      }
+      catch (ConnectTimeoutException e)
       {
-        throwable = new ManifoldCFException("Illegal URI: '"+myUrl+"'",e);
-        statusCode = FETCH_BAD_URI;
-        if (recordEverything)
-          dataSession.setResponseCode(statusCode);
-        return;
+        throwable = e;
+        long currentTime = System.currentTimeMillis();
+        throw new ServiceInterruption("Timed out waiting for connection for '"+myUrl+"': "+e.getMessage(), e, currentTime + TIME_5MIN,
+          currentTime + TIME_2HRS,-1,false);
+      }
+      catch (InterruptedIOException e)
+      {
+        //Logging.connectors.warn("IO interruption seen",e);
+        throwable = new ManifoldCFException("Interrupted: "+e.getMessage(),e);
+        statusCode = FETCH_INTERRUPTED;
+        throw new ManifoldCFException("Interrupted",ManifoldCFException.INTERRUPTED);
       }
-      catch (IllegalStateException e)
+      catch (RedirectException e)
       {
-        throwable = new ManifoldCFException("Illegal state while fetching URI: '"+myUrl+"'",e);
-        statusCode = FETCH_SEQUENCE_ERROR;
-        if (recordEverything)
-          dataSession.setResponseCode(statusCode);
+        throwable = e;
+        statusCode = FETCH_CIRCULAR_REDIRECT;
         return;
       }
-      catch (ServiceInterruption e)
+      catch (NoHttpResponseException e)
       {
-        throw e;
+        throwable = e;
+        long currentTime = System.currentTimeMillis();
+        throw new ServiceInterruption("Timed out waiting for response for '"+myUrl+"': "+e.getMessage(), e, currentTime + TIME_15MIN,
+          currentTime + TIME_2HRS,-1,false);
       }
-      catch (ManifoldCFException e)
+      catch (java.net.ConnectException e)
       {
-        throw e;
+        throwable = e;
+        long currentTime = System.currentTimeMillis();
+        throw new ServiceInterruption("Timed out waiting for a connection for '"+myUrl+"': "+e.getMessage(), e, currentTime + TIME_2HRS,
+          currentTime + TIME_6HRS,-1,false);
+      }
+      catch (javax.net.ssl.SSLException e)
+      {
+        // Probably this is an incorrectly configured trust store
+        throwable = new ManifoldCFException("SSL handshake error: "+e.getMessage()+"; check your connection's Certificate configuration",e);
+        statusCode = FETCH_IO_ERROR;
+        return;
+      }
+      catch (IOException e)
+      {
+        // Treat this as a bad url.  We don't know what happened, but it isn't something we are going to naively
+        // retry on.
+        throwable = e;
+        statusCode = FETCH_IO_ERROR;
+        return;
       }
       catch (Throwable e)
       {
         Logging.connectors.debug("WEB: Caught an unexpected exception: "+e.getMessage(),e);
         throwable = e;
         statusCode = FETCH_UNKNOWN_ERROR;
-        if (recordEverything)
-          dataSession.setResponseCode(statusCode);
         return;
       }
 
@@ -1744,6 +1649,14 @@ public class ThrottledFetcher
     public LoginCookies getLastFetchCookies()
       throws ManifoldCFException, ServiceInterruption
     {
+      if (Logging.connectors.isDebugEnabled())
+      {
+        Logging.connectors.debug("WEB: Retrieving cookies...");
+        for (int i = 0; i < lastFetchCookies.getCookieCount(); i++)
+        {
+          Logging.connectors.debug("WEB:   Cookie '"+lastFetchCookies.getCookie(i)+"'");
+        }
+      }
       return lastFetchCookies;
     }
 
@@ -1754,23 +1667,28 @@ public class ThrottledFetcher
     public Map<String,List<String>> getResponseHeaders()
       throws ManifoldCFException, ServiceInterruption
     {
-      Header[] headers = fetchMethod.getResponseHeaders();
-      Map<String,List<String>> rval = new HashMap<String,List<String>>();
-      int i = 0;
-      while (i < headers.length)
+      if (fetchMethod == null)
+        throw new ManifoldCFException("Attempt to get headers when there is no method");
+      if (methodThread == null || threadStarted == false)
+        throw new ManifoldCFException("Attempt to get headers when no method thread");
+      try
       {
-        Header h = headers[i++];
-        String name = h.getName();
-        String value = h.getValue();
-        List<String> values = rval.get(name);
-        if (values == null)
-        {
-          values = new ArrayList<String>();
-          rval.put(name,values);
-        }
-        values.add(value);
+        return methodThread.getResponseHeaders();
       }
-      return rval;
+      catch (InterruptedException e)
+      {
+        methodThread.interrupt();
+        throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+      }
+      catch (HttpException e)
+      {
+        handleHTTPException(e,"reading headers");
+      }
+      catch (IOException e)
+      {
+        handleIOException(e,"reading headers");
+      }
+      return null;
     }
 
     /** Get a specified response header, if it exists.
@@ -1781,14 +1699,30 @@ public class ThrottledFetcher
     public String getResponseHeader(String headerName)
       throws ManifoldCFException, ServiceInterruption
     {
-      Header h = fetchMethod.getResponseHeader(headerName);
-      if (h == null)
-        return null;
-      if (recordEverything)
-        dataSession.addHeader(headerName,h.getValue());
-      return h.getValue();
-    }
-
+      if (fetchMethod == null)
+        throw new ManifoldCFException("Attempt to get a header when there is no method");
+      if (methodThread == null || threadStarted == false)
+        throw new ManifoldCFException("Attempt to get a header when no method thread");
+      try
+      {
+        return methodThread.getFirstHeader(headerName);
+      }
+      catch (InterruptedException e)
+      {
+        methodThread.interrupt();
+        throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+      }
+      catch (HttpException e)
+      {
+        handleHTTPException(e,"reading header");
+      }
+      catch (IOException e)
+      {
+        handleIOException(e,"reading header");
+      }
+      return null;
+    }
+
     /** Get the response input stream.  It is the responsibility of the caller
     * to close this stream when done.
     */
@@ -1797,44 +1731,27 @@ public class ThrottledFetcher
       throws ManifoldCFException, ServiceInterruption
     {
       if (fetchMethod == null)
-        throw new ManifoldCFException("Attempt to get a response when there is no method");
+        throw new ManifoldCFException("Attempt to get an input stream when there is no method");
+      if (methodThread == null || threadStarted == false)
+        throw new ManifoldCFException("Attempt to get an input stream when no method thread");
       try
       {
-        if (recordEverything)
-          dataSession.endHeader();
-        InputStream bodyStream = fetchMethod.getResponseBodyAsStream();
-        if (bodyStream == null)
-        {
-          Logging.connectors.debug("Web: Couldn't set up response stream for '"+myUrl+"', retrying");
-          throw new ServiceInterruption("Failed to set up body response stream for "+myUrl,null,TIME_5MIN,-1L,2,false);
-        }
-        return new ThrottledInputstream(this,bodyStream,dataSession);
-      }
-      catch (java.net.SocketTimeoutException e)
-      {
-        Logging.connectors.debug("Web: Socket timeout exception setting up response stream for '"+myUrl+"', retrying");
-        throw new ServiceInterruption("Socket timeout exception setting up response stream: "+e.getMessage(),e,System.currentTimeMillis()+TIME_5MIN,-1L,2,false);
+        return methodThread.getSafeInputStream();
       }
-      catch (org.apache.commons.httpclient.ConnectTimeoutException e)
-      {
-        Logging.connectors.debug("Web: Connect timeout exception setting up response stream for '"+myUrl+"', retrying");
-        throw new ServiceInterruption("Connect timeout exception setting up response stream: "+e.getMessage(),e,System.currentTimeMillis()+TIME_5MIN,-1L,2,false);
-      }
-      catch (InterruptedIOException e)
+      catch (InterruptedException e)
       {
-        //Logging.connectors.warn("IO interruption seen: "+e.getMessage(),e);
+        methodThread.interrupt();
         throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
       }
       catch (IOException e)
       {
-        Logging.connectors.debug("Web: IO exception setting up response stream for '"+myUrl+"', retrying");
-        throw new ServiceInterruption("IO exception setting up response stream: "+e.getMessage(),e,System.currentTimeMillis()+TIME_5MIN,-1L,2,false);
+        handleIOException(e, "reading response stream");
       }
-      catch (IllegalStateException e)
+      catch (HttpException e)
       {
-        Logging.connectors.debug("Web: State error getting response body for '"+myUrl+"', retrying");
-        throw new ServiceInterruption("State error getting response body: "+e.getMessage(),e,TIME_5MIN,-1L,2,false);
+        handleHTTPException(e, "reading response stream");
       }
+      return null;
     }
 
     /** Get limited response as a string.
@@ -1860,31 +1777,11 @@ public class ThrottledFetcher
           is.close();
         }
       }
-      catch (java.net.SocketTimeoutException e)
-      {
-        Logging.connectors.debug("Web: Socket timeout exception reading response stream for '"+myUrl+"', retrying");
-        throw new ServiceInterruption("Socket timeout exception reading response stream: "+e.getMessage(),e,System.currentTimeMillis()+TIME_5MIN,-1L,2,false);
-      }
-      catch (org.apache.commons.httpclient.ConnectTimeoutException e)
-      {
-        Logging.connectors.debug("Web: Connect timeout exception reading response stream for '"+myUrl+"', retrying");
-        throw new ServiceInterruption("Connect timeout exception reading response stream: "+e.getMessage(),e,System.currentTimeMillis()+TIME_5MIN,-1L,2,false);
-      }
-      catch (InterruptedIOException e)
-      {
-        //Logging.connectors.warn("IO interruption seen: "+e.getMessage(),e);
-        throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
-      }
       catch (IOException e)
       {
-        Logging.connectors.debug("Web: IO exception reading response stream for '"+myUrl+"', retrying");
-        throw new ServiceInterruption("IO exception reading response stream: "+e.getMessage(),e,System.currentTimeMillis()+TIME_5MIN,-1L,2,false);
-      }
-      catch (IllegalStateException e)
-      {
-        Logging.connectors.debug("Web: State error reading response body for '"+myUrl+"', retrying");
-        throw new ServiceInterruption("State error reading response body: "+e.getMessage(),e,TIME_5MIN,-1L,2,false);
+        handleIOException(e,"reading limited response");
       }
+      return null;
     }
 
     /** Note that the connection fetch was interrupted by something.
@@ -1908,6 +1805,9 @@ public class ThrottledFetcher
     {
       if (fetchType != null)
       {
+        // Abort the connection, if not already complete
+        methodThread.abort();
+
         long endTime = System.currentTimeMillis();
         int i = 0;
         while (i < throttleBinArray.length)
@@ -1932,20 +1832,25 @@ public class ThrottledFetcher
             Logging.connectors.debug("WEB: Fetch exception for '"+myUrl+"'",throwable);
         }
 
-
-        // Clear out all the parameters
-        if (fetchMethod != null)
+        // Shut down (join) the connection thread, if any, and if it started
+        if (methodThread != null)
         {
-          try
-          {
-            fetchMethod.releaseConnection();
-          }
-          catch (IllegalStateException e)
+          if (threadStarted)
           {
-            // looks like the fetch method didn't have one, or it was already released.  Just eat the exception.
+            try
+            {
+              methodThread.finishUp();
+            }
+            catch (InterruptedException e)
+            {
+              throw new ManifoldCFException(e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+            }
+            threadStarted = false;
           }
-          fetchMethod = null;
+          methodThread = null;
         }
+        
+        fetchMethod = null;
         throwable = null;
         startFetchTime = -1L;
         myUrl = null;
@@ -1999,6 +1904,61 @@ public class ThrottledFetcher
         poolLock.notifyAll();
       }
     }
+    
+    protected void handleHTTPException(HttpException e, String activity)
+      throws ServiceInterruption, ManifoldCFException
+    {
+      long currentTime = System.currentTimeMillis();
+      Logging.connectors.debug("Web: HTTP exception "+activity+" for '"+myUrl+"', retrying");
+      throw new ServiceInterruption("HTTP exception "+activity+": "+e.getMessage(),e,currentTime+TIME_5MIN,-1L,2,false);
+    }
+
+    protected void handleIOException(IOException e, String activity)
+      throws ServiceInterruption, ManifoldCFException
+    {
+      if (e instanceof java.net.SocketTimeoutException)
+      {
+        long currentTime = System.currentTimeMillis();
+        Logging.connectors.debug("Web: Socket timeout exception "+activity+" for '"+myUrl+"', retrying");
+        throw new ServiceInterruption("Socket timeout exception "+activity+": "+e.getMessage(),e,currentTime+TIME_5MIN,-1L,2,false);
+      }
+      if (e instanceof ConnectTimeoutException)
+      {
+        long currentTime = System.currentTimeMillis();
+        Logging.connectors.debug("Web: Connect timeout exception "+activity+" for '"+myUrl+"', retrying");
+        throw new ServiceInterruption("Connect timeout exception "+activity+": "+e.getMessage(),e,currentTime+TIME_5MIN,-1L,2,false);
+      }
+      if (e instanceof InterruptedIOException)
+      {
+        methodThread.interrupt();
+        throw new ManifoldCFException("Interrupted",ManifoldCFException.INTERRUPTED);
+      }
+      if (e instanceof NoHttpResponseException)
+      {
+        // Give up after 2 hours.
+        long currentTime = System.currentTimeMillis();
+        throw new ServiceInterruption("Timed out "+activity+" for '"+myUrl+"'", e, currentTime + 15L * 60000L,
+          currentTime + 120L * 60000L,-1,false);
+      }
+      if (e instanceof java.net.ConnectException)
+      {
+        // Give up after 6 hours.
+        long currentTime = System.currentTimeMillis();
+        throw new ServiceInterruption("Timed out "+activity+" for '"+myUrl+"'", e, currentTime + 1000000L,
+          currentTime + 720L * 60000L,-1,false);
+      }
+      if (e instanceof java.net.NoRouteToHostException)
+      {
+        // This exception means we know the IP address but can't get there.  That's either a firewall issue, or it's something transient
+        // with the network.  Some degree of retry is probably wise.
+        long currentTime = System.currentTimeMillis();
+        throw new ServiceInterruption("No route to host during "+activity+" for '"+myUrl+"'", e, currentTime + 1000000L,
+          currentTime + 720L * 60000L,-1,false);
+      }
+      long currentTime = System.currentTimeMillis();
+      Logging.connectors.debug("Web: IO exception "+activity+" for '"+myUrl+"', retrying");
+      throw new ServiceInterruption("IO exception "+activity+": "+e.getMessage(),e,currentTime+TIME_5MIN,-1L,2,false);
+    }
 
   }
 
@@ -2015,15 +1975,12 @@ public class ThrottledFetcher
     /** The stream we are wrapping. */
     protected InputStream inputStream;
 
-    protected DataSession dataSession;
-
     /** Constructor.
     */
-    public ThrottledInputstream(ThrottledConnection connection, InputStream is, DataSession dataSession)
+    public ThrottledInputstream(ThrottledConnection connection, InputStream is)
     {
       this.throttledConnection = connection;
       this.inputStream = is;
-      this.dataSession = dataSession;
     }
 
     /** Read a byte.
@@ -2094,8 +2051,6 @@ public class ThrottledFetcher
         try
         {
           amt = inputStream.read(b,off,len);
-          if (recordEverything && amt != -1)
-            dataSession.write(b,off,amt);
           return amt;
         }
         finally
@@ -2175,7 +2130,7 @@ public class ThrottledFetcher
       {
         Logging.connectors.debug("Socket timeout exception trying to close connection: "+e.getMessage(),e);
       }
-      catch (org.apache.commons.httpclient.ConnectTimeoutException e)
+      catch (ConnectTimeoutException e)
       {
         Logging.connectors.debug("Socket connection timeout exception trying to close connection: "+e.getMessage(),e);
       }
@@ -2221,143 +2176,128 @@ public class ThrottledFetcher
     }
   }
 
-  /** HTTPClient secure socket factory, which implements SecureProtocolSocketFactory
+  /** SSL Socket factory which wraps another socket factory but allows timeout on socket
+  * creation.
   */
-  protected static class WebSecureSocketFactory implements SecureProtocolSocketFactory
+  protected static class InterruptibleSocketFactory extends javax.net.ssl.SSLSocketFactory
   {
-    /** This is the javax.net socket factory.
-    */
-    protected javax.net.ssl.SSLSocketFactory socketFactory;
-
-    /** Constructor */
-    public WebSecureSocketFactory(javax.net.ssl.SSLSocketFactory socketFactory)
+    protected final javax.net.ssl.SSLSocketFactory wrappedFactory;
+    protected final long connectTimeoutMilliseconds;
+    
+    public InterruptibleSocketFactory(javax.net.ssl.SSLSocketFactory wrappedFactory, long connectTimeoutMilliseconds)
     {
-      this.socketFactory = socketFactory;
+      this.wrappedFactory = wrappedFactory;
+      this.connectTimeoutMilliseconds = connectTimeoutMilliseconds;
     }
 
-    public Socket createSocket(
-      String host,
-      int port,
-      InetAddress clientHost,
-      int clientPort)
+    @Override
+    public Socket createSocket()
+      throws IOException
+    {
+      // Socket isn't open
+      return wrappedFactory.createSocket();
+    }
+    
+    @Override
+    public Socket createSocket(String host, int port)
       throws IOException, UnknownHostException
     {
-      return socketFactory.createSocket(
-        host,
-        port,
-        clientHost,
-        clientPort
-      );
+      return fireOffThread(InetAddress.getByName(host),port,null,-1);
     }
 
-    public Socket createSocket(
-      final String host,
-      final int port,
-      final InetAddress localAddress,
-      final int localPort,
-      final HttpConnectionParams params
-    ) throws IOException, UnknownHostException, ConnectTimeoutException
+    @Override
+    public Socket createSocket(InetAddress host, int port)
+      throws IOException
     {
-      if (params == null)
-      {
-        throw new IllegalArgumentException("Parameters may not be null");
-      }
-      int timeout = params.getConnectionTimeout();
-      if (timeout == 0)
-      {
-        return createSocket(host, port, localAddress, localPort);
-      }
-      else
-      {
-        // We need to implement a connection timeout somehow - probably with a new thread.
-        SocketCreateThread thread = new SocketCreateThread(socketFactory,host,port,localAddress,localPort);
-        thread.start();
-        try
-        {
-          // Wait for thread to complete for only a certain amount of time!
-          thread.join(timeout);
-          // If join() times out, then the thread is going to still be alive.
-          if (thread.isAlive())
-          {
-            // Kill the thread - not that this will necessarily work, but we need to try
-            thread.interrupt();
-            throw new ConnectTimeoutException("Secure connection timed out");
-          }
-          // The thread terminated.  Throw an error if there is one, otherwise return the result.
-          Throwable t = thread.getException();
-          if (t != null)
-          {
-            if (t instanceof java.net.SocketTimeoutException)
-              throw (java.net.SocketTimeoutException)t;
-            else if (t instanceof org.apache.commons.httpclient.ConnectTimeoutException)
-              throw (org.apache.commons.httpclient.ConnectTimeoutException)t;
-            else if (t instanceof InterruptedIOException)
-              throw (InterruptedIOException)t;
-            else if (t instanceof IOException)
-              throw (IOException)t;
-            else if (t instanceof UnknownHostException)
-              throw (UnknownHostException)t;
-            else if (t instanceof Error)
-              throw (Error)t;
-            else if (t instanceof RuntimeException)
-              throw (RuntimeException)t;
-            throw new Error("Received an unexpected exception: "+t.getMessage(),t);
-          }
-          return thread.getResult();
-        }
-        catch (InterruptedException e)
-        {
-          throw new InterruptedIOException("Interrupted: "+e.getMessage());
-        }
-      }
+      return fireOffThread(host,port,null,-1);
     }
-
-    public Socket createSocket(String host, int port)
+    
+    @Override
+    public Socket createSocket(String host, int port, InetAddress localHost, int localPort)
       throws IOException, UnknownHostException
     {
-      return socketFactory.createSocket(
-        host,
-        port
-      );
+      return fireOffThread(InetAddress.getByName(host),port,localHost,localPort);
     }
-
-    public Socket createSocket(
-      Socket socket,
-      String host,
-      int port,
-      boolean autoClose)
-      throws IOException, UnknownHostException
+    
+    @Override
+    public Socket createSocket(InetAddress address, int port, InetAddress localAddress, int localPort)
+      throws IOException
     {
-      return socketFactory.createSocket(
-        socket,
-        host,
-        port,
-        autoClose
-      );
+      return fireOffThread(address,port,localAddress,localPort);
     }
-
-    public boolean equals(Object obj)
+    
+    @Override
+    public Socket createSocket(Socket s, String host, int port, boolean autoClose)
+      throws IOException
     {
-      if (obj == null || !(obj instanceof WebSecureSocketFactory))
-        return false;
-      // Each object is unique
-      return super.equals(obj);
+      // Socket's already open
+      return wrappedFactory.createSocket(s,host,port,autoClose);
     }
-
-    public int hashCode()
+    
+    @Override
+    public String[] getDefaultCipherSuites()
     {
-      return super.hashCode();
+      return wrappedFactory.getDefaultCipherSuites();
     }
+    
+    @Override
+    public String[] getSupportedCipherSuites()
+    {
+      return wrappedFactory.getSupportedCipherSuites();
+    }
+    
+    protected Socket fireOffThread(InetAddress address, int port, InetAddress localHost, int localPort)
+      throws IOException
+    {
+      SocketCreateThread thread = new SocketCreateThread(wrappedFactory,address,port,localHost,localPort);
+      thread.start();
+      try
+      {
+        // Wait for thread to complete for only a certain amount of time!
+        thread.join(connectTimeoutMilliseconds);
+        // If join() times out, then the thread is going to still be alive.
+        if (thread.isAlive())
+        {
+          // Kill the thread - not that this will necessarily work, but we need to try
+          thread.interrupt();
+          throw new ConnectTimeoutException("Secure connection timed out");
+        }
+        // The thread terminated.  Throw an error if there is one, otherwise return the result.
+        Throwable t = thread.getException();
+        if (t != null)
+        {
+          if (t instanceof java.net.SocketTimeoutException)
+            throw (java.net.SocketTimeoutException)t;
+          else if (t instanceof ConnectTimeoutException)
+            throw (ConnectTimeoutException)t;
+          else if (t instanceof InterruptedIOException)
+            throw (InterruptedIOException)t;
+          else if (t instanceof IOException)
+            throw (IOException)t;
+          else if (t instanceof Error)
+            throw (Error)t;
+          else if (t instanceof RuntimeException)
+            throw (RuntimeException)t;
+          throw new Error("Received an unexpected exception: "+t.getMessage(),t);
+        }
+        return thread.getResult();
+      }
+      catch (InterruptedException e)
+      {
+        throw new InterruptedIOException("Interrupted: "+e.getMessage());
+      }
 
+    }
+    
   }
-
+  
   /** Create a secure socket in a thread, so that we can "give up" after a while if the socket fails to connect.
   */
   protected static class SocketCreateThread extends Thread
   {
     // Socket factory
     protected javax.net.ssl.SSLSocketFactory socketFactory;
-    protected String host;
+    protected InetAddress host;
     protected int port;
     protected InetAddress clientHost;
     protected int clientPort;
@@ -2369,7 +2309,7 @@ public class ThrottledFetcher
 
     /** Create the thread */
     public SocketCreateThread(javax.net.ssl.SSLSocketFactory socketFactory,
-      String host,
+      InetAddress host,
       int port,
       InetAddress clientHost,
       int clientPort)
@@ -2386,7 +2326,10 @@ public class ThrottledFetcher
     {
       try
       {
-        rval = socketFactory.createSocket(host,port,clientHost,clientPort);
+        if (clientHost == null)
+          rval = socketFactory.createSocket(host,port);
+        else
+          rval = socketFactory.createSocket(host,port,clientHost,clientPort);
       }
       catch (Throwable e)
       {
@@ -2405,4 +2348,451 @@ public class ThrottledFetcher
     }
   }
 
+  /** Class to override browser compatibility to make it not check cookie paths.  See CONNECTORS-97.
+  */
+  protected static class LaxBrowserCompatSpec extends BrowserCompatSpec
+  {
+
+    public LaxBrowserCompatSpec()
+    {
+      super();
+      registerAttribHandler(ClientCookie.PATH_ATTR, new BasicPathHandler()
+        {
+          @Override
+          public void validate(Cookie cookie, CookieOrigin origin) throws MalformedCookieException
+          {
+            // No validation
+          }
+              
+        }
+      );
+    }
+    
+  }
+
+  /** This thread does the actual socket communication with the server.
+  * It's set up so that it can be abandoned at shutdown time.
+  *
+  * The way it works is as follows:
+  * - it starts the transaction
+  * - it receives the response, and saves that for the calling class to inspect
+  * - it transfers the data part to an input stream provided to the calling class
+  * - it shuts the connection down
+  *
+  * If there is an error, the sequence is aborted, and an exception is recorded
+  * for the calling class to examine.
+  *
+  * The calling class basically accepts the sequence above.  It starts the
+  * thread, and tries to get a response code.  If instead an exception is seen,
+  * the exception is thrown up the stack.
+  */
+  protected static class ExecuteMethodThread extends Thread
+  {
+    /** The connection */
+    protected final ThrottledConnection theConnection;
+    /** Client and method, all preconfigured */
+    protected final AbstractHttpClient httpClient;
+    protected final HttpRequestBase executeMethod;
+    protected final CookieStore cookieStore;
+    
+    protected HttpResponse response = null;
+    protected Throwable responseException = null;
+    protected LoginCookies cookies = null;
+    protected Throwable cookieException = null;
+    protected XThreadInputStream threadStream = null;
+    protected boolean streamCreated = false;
+    protected Throwable streamException = null;
+    protected boolean abortThread = false;
+
+    protected Throwable shutdownException = null;
+
+    protected Throwable generalException = null;
+    
+    public ExecuteMethodThread(ThrottledConnection theConnection,
+      AbstractHttpClient httpClient, HttpRequestBase executeMethod, CookieStore cookieStore)
+    {
+      super();
+      setDaemon(true);
+      this.theConnection = theConnection;
+      this.httpClient = httpClient;
+      this.executeMethod = executeMethod;
+      this.cookieStore = cookieStore;
+    }
+
+    public void run()
+    {
+      try
+      {
+        try
+        {
+          // Call the execute method appropriately
+          synchronized (this)
+          {
+            if (!abortThread)
+            {
+              try
+              {
+                HttpContext context = new BasicHttpContext();
+                context.setAttribute(ClientContext.COOKIE_STORE,cookieStore);
+                response = httpClient.execute(executeMethod,context);
+              }
+              catch (java.net.SocketTimeoutException e)
+              {
+                responseException = e;
+              }
+              catch (ConnectTimeoutException e)
+              {
+                responseException = e;
+              }
+              catch (InterruptedIOException e)
+              {
+                throw e;
+              }
+              catch (Throwable e)
+              {
+                responseException = e;
+              }
+              this.notifyAll();
+            }
+          }
+          
+          // Fetch the cookies
+          if (responseException == null)
+          {
+            synchronized (this)
+            {
+              if (!abortThread)
+              {
+                try
+                {
+                  cookies = new CookieSet(cookieStore.getCookies());
+                }
+                catch (Throwable e)
+                {
+                  cookieException = e;
+                }
+                this.notifyAll();
+              }
+            }
+          }
+
+          // Start the transfer of the content
+          if (cookieException == null && responseException == null)
+          {
+            synchronized (this)
+            {
+              if (!abortThread)
+              {
+                try
+                {
+                  InputStream bodyStream = response.getEntity().getContent();
+                  if (bodyStream != null)
+                  {
+                    bodyStream = new ThrottledInputstream(theConnection,bodyStream);
+                    threadStream = new XThreadInputStream(bodyStream);
+                  }
+                  streamCreated = true;
+                }
+                catch (java.net.SocketTimeoutException e)
+                {
+                  streamException = e;
+                }
+                catch (ConnectTimeoutException e)
+                {
+                  streamException = e;
+                }
+                catch (InterruptedIOException e)
+                {
+                  throw e;
+                }
+                catch (Throwable e)
+                {
+                  streamException = e;
+                }
+                this.notifyAll();
+              }
+            }
+          }
+          
+          if (cookieException == null && responseException == null && streamException == null)
+          {
+            if (threadStream != null)
+            {
+              // Stuff the content until we are done
+              threadStream.stuffQueue();
+            }
+          }
+          
+        }
+        finally
+        {
+          synchronized (this)
+          {
+            try
+            {
+              executeMethod.abort();
+            }
+            catch (Throwable e)
+            {
+              shutdownException = e;
+            }
+            this.notifyAll();
+          }
+        }
+      }
+      catch (Throwable e)
+      {
+        // We catch exceptions here that should ONLY be InterruptedExceptions, as a result of the thread being aborted.
+        this.generalException = e;
+      }
+    }
+
+    public int getResponseCode()
+      throws InterruptedException, IOException, HttpException
+    {
+      // Must wait until the response object is there
+      while (true)
+      {
+        synchronized (this)
+        {
+          checkException(responseException);
+          if (response != null)
+            return response.getStatusLine().getStatusCode();
+          wait();
+        }
+      }
+    }
+
+    public Map<String,List<String>> getResponseHeaders()
+      throws InterruptedException, IOException, HttpException
+    {
+      // Must wait for the response object to appear
+      while (true)
+      {
+        synchronized (this)
+        {
+          checkException(responseException);
+          if (response != null)
+          {
+            Header[] headers = response.getAllHeaders();
+            Map<String,List<String>> rval = new HashMap<String,List<String>>();
+            int i = 0;
+            while (i < headers.length)
+            {
+              Header h = headers[i++];
+              String name = h.getName();
+              String value = h.getValue();
+              List<String> values = rval.get(name);
+              if (values == null)
+              {
+                values = new ArrayList<String>();
+                rval.put(name,values);
+              }
+              values.add(value);
+            }
+            return rval;
+          }
+          wait();
+        }
+      }
+
+    }
+    
+    public String getFirstHeader(String headerName)
+      throws InterruptedException, IOException, HttpException
+    {
+      // Must wait for the response object to appear
+      while (true)
+      {
+        synchronized (this)
+        {
+          checkException(responseException);
+          if (response != null)
+          {
+            Header h = response.getFirstHeader(headerName);
+            if (h == null)
+              return null;
+            return h.getValue();
+          }
+          wait();
+        }
+      }
+    }
+
+    public LoginCookies getCookies()
+      throws InterruptedException, IOException, HttpException
+    {
+      while (true)
+      {
+        synchronized (this)
+        {
+          if (responseException != null)
+            throw new IllegalStateException("Check for response before getting cookies");
+          checkException(cookieException);
+          if (cookies != null)
+            return cookies;
+          wait();
+        }
+      }
+    }
+    
+    public InputStream getSafeInputStream()
+      throws InterruptedException, IOException, HttpException
+    {
+      // Must wait until stream is created, or until we note an exception was thrown.
+      while (true)
+      {
+        synchronized (this)
+        {
+          if (responseException != null)
+            throw new IllegalStateException("Check for response before getting stream");
+          if (cookieException != null)
+            throw new IllegalStateException("Check for cookies before getting stream");
+          checkException(streamException);
+          if (streamCreated)
+            return threadStream;
+          wait();
+        }
+      }
+    }
+    
+    public void abort()
+    {
+      // This will be called during the finally
+      // block in the case where all is well (and
+      // the stream completed) and in the case where
+      // there were exceptions.
+      synchronized (this)
+      {
+        if (streamCreated)
+        {
+          if (threadStream != null)
+            threadStream.abort();
+        }
+        abortThread = true;
+      }
+    }
+    
+    public void finishUp()
+      throws InterruptedException
+    {
+      join();
+    }
+    
+    protected synchronized void checkException(Throwable exception)
+      throws IOException, HttpException
+    {
+      if (exception != null)
+      {
+        // Throw the current exception, but clear it, so no further throwing is possible on the same problem.
+        Throwable e = exception;
+        if (e instanceof IOException)
+          throw (IOException)e;
+        else if (e instanceof HttpException)
+          throw (HttpException)e;
+        else if (e instanceof RuntimeException)
+          throw (RuntimeException)e;
+        else if (e instanceof Error)
+          throw (Error)e;
+        else
+          throw new RuntimeException("Unhandled exception of type: "+e.getClass().getName(),e);
+      }
+    }
+
+  }
+
+  protected static class OurBasicCookieStore implements CookieStore, Serializable {
+
+    private static final long serialVersionUID = -7581093305228232025L;
+
+    private final TreeSet<Cookie> cookies;
+
+    public OurBasicCookieStore() {
+      super();
+      this.cookies = new TreeSet<Cookie>(new CookieIdentityComparator());
+    }
+
+    /**
+     * Adds an {@link Cookie HTTP cookie}, replacing any existing equivalent cookies.
+     * If the given cookie has already expired it will not be added, but existing
+     * values will still be removed.
+     *
+     * @param cookie the {@link Cookie cookie} to be added
+     *
+     * @see #addCookies(Cookie[])
+     *
+     */
+    public synchronized void addCookie(Cookie cookie) {
+      if (cookie != null) {
+        // first remove any old cookie that is equivalent
+        cookies.remove(cookie);
+        cookies.add(cookie);
+      }
+    }
+
+    /**
+     * Adds an array of {@link Cookie HTTP cookies}. Cookies are added individually and
+     * in the given array order. If any of the given cookies has already expired it will
+     * not be added, but existing values will still be removed.
+     *
+     * @param cookies the {@link Cookie cookies} to be added
+     *
+     * @see #addCookie(Cookie)
+     *
+     */
+    public synchronized void addCookies(Cookie[] cookies) {
+      if (cookies != null) {
+        for (Cookie cooky : cookies) {
+          this.addCookie(cooky);
+        }
+      }
+    }
+
+    /**
+     * Returns an immutable array of {@link Cookie cookies} that this HTTP
+     * state currently contains.
+     *
+     * @return an array of {@link Cookie cookies}.
+     */
+    public synchronized List<Cookie> getCookies() {
+      //create defensive copy so it won't be concurrently modified
+      return new ArrayList<Cookie>(cookies);
+    }
+
+    /**
+     * Removes all of {@link Cookie cookies} in this HTTP state
+     * that have expired by the specified {@link java.util.Date date}.
+     *
+     * @return true if any cookies were purged.
+     *
+     * @see Cookie#isExpired(Date)
+     */
+    public synchronized boolean clearExpired(final Date date) {
+      if (date == null) {
+        return false;
+      }
+      boolean removed = false;
+      for (Iterator<Cookie> it = cookies.iterator(); it.hasNext();) {
+        if (it.next().isExpired(date)) {
+          it.remove();
+            removed = true;
+        }
+      }
+      return removed;
+    }
+
+    /**
+     * Clears all cookies.
+     */
+    public synchronized void clear() {
+      cookies.clear();
+    }
+
+    @Override
+    public synchronized String toString() {
+      return cookies.toString();
+    }
+
+  }
+
 }

Modified: manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/TrustsDescription.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/TrustsDescription.java?rev=1417060&r1=1417059&r2=1417060&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/TrustsDescription.java (original)
+++ manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/TrustsDescription.java Tue Dec  4 17:47:28 2012
@@ -23,10 +23,6 @@ import org.apache.manifoldcf.crawler.sys
 import java.util.*;
 import java.util.regex.*;
 
-import org.apache.commons.httpclient.*;
-import org.apache.commons.httpclient.methods.*;
-import org.apache.commons.httpclient.params.*;
-
 /** This class describes trust information pulled from a configuration.
 * The data contained is organized by regular expression performed on a url.  What we store
 * for each regular expression is a Pattern, for efficiency.

Modified: manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java?rev=1417060&r1=1417059&r2=1417060&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java (original)
+++ manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java Tue Dec  4 17:47:28 2012
@@ -32,15 +32,17 @@ import org.apache.manifoldcf.agents.comm
 import org.apache.manifoldcf.agents.common.XMLStringContext;
 import org.apache.manifoldcf.agents.common.XMLFileContext;
 
+import org.apache.http.conn.ConnectTimeoutException;
+import org.apache.http.client.RedirectException;
+import org.apache.http.client.CircularRedirectException;
+import org.apache.http.NoHttpResponseException;
+import org.apache.http.HttpException;
+
 import java.io.*;
 import java.util.*;
 import java.net.*;
 import java.util.regex.*;
 
-import org.apache.commons.httpclient.*;
-import org.apache.commons.httpclient.methods.*;
-import org.apache.commons.httpclient.params.*;
-
 /** This is the Web Crawler implementation of the IRepositoryConnector interface.
 * This connector may be superceded by one that calls out to python, or by a entirely
 * python Connector Framework, depending on how the winds blow.
@@ -1412,7 +1414,7 @@ public class WebcrawlerConnector extends
               {
                 throw new ManifoldCFException("Socket timeout error closing stream: "+e.getMessage(),e);
               }
-              catch (org.apache.commons.httpclient.ConnectTimeoutException e)
+              catch (ConnectTimeoutException e)
               {
                 throw new ManifoldCFException("Socket connect timeout error closing stream: "+e.getMessage(),e);
               }
@@ -5174,7 +5176,7 @@ public class WebcrawlerConnector extends
             {
               Logging.connectors.warn("Web: Couldn't clear robots cache: "+e2.getMessage(),e2);
             }
-            catch (org.apache.commons.httpclient.ConnectTimeoutException e2)
+            catch (ConnectTimeoutException e2)
             {
               Logging.connectors.warn("Web: Couldn't clear robots cache: "+e2.getMessage(),e2);
             }
@@ -5188,7 +5190,7 @@ public class WebcrawlerConnector extends
               Logging.connectors.warn("Web: Couldn't clear robots cache: "+e2.getMessage(),e2);
             }
           }
-          catch (org.apache.commons.httpclient.ConnectTimeoutException e)
+          catch (ConnectTimeoutException e)
           {
             if (Logging.connectors.isDebugEnabled())
               Logging.connectors.debug("Web: Fetch of robots.txt from "+protocol+"://"+hostIPAddressAndPort+"(host='"+hostName+"') generated Socket Connect Timeout Exception: "+e.getMessage(),e);
@@ -5202,7 +5204,7 @@ public class WebcrawlerConnector extends
             {
               Logging.connectors.warn("Web: Couldn't clear robots cache: "+e2.getMessage(),e2);
             }
-            catch (org.apache.commons.httpclient.ConnectTimeoutException e2)
+            catch (ConnectTimeoutException e2)
             {
               Logging.connectors.warn("Web: Couldn't clear robots cache: "+e2.getMessage(),e2);
             }
@@ -5235,7 +5237,7 @@ public class WebcrawlerConnector extends
             {
               Logging.connectors.warn("Web: Couldn't clear robots cache: "+e2.getMessage(),e2);
             }
-            catch (org.apache.commons.httpclient.ConnectTimeoutException e2)
+            catch (ConnectTimeoutException e2)
             {
               Logging.connectors.warn("Web: Couldn't clear robots cache: "+e2.getMessage(),e2);
             }
@@ -6062,7 +6064,7 @@ public class WebcrawlerConnector extends
     {
       throw new ManifoldCFException("Socket timeout exception: "+e.getMessage(),e);
     }
-    catch (org.apache.commons.httpclient.ConnectTimeoutException e)
+    catch (ConnectTimeoutException e)
     {
       throw new ManifoldCFException("Socket connect timeout exception: "+e.getMessage(),e);
     }
@@ -6801,7 +6803,7 @@ public class WebcrawlerConnector extends
     {
       throw new ManifoldCFException("Socket timeout exception: "+e.getMessage(),e);
     }
-    catch (org.apache.commons.httpclient.ConnectTimeoutException e)
+    catch (ConnectTimeoutException e)
     {
       throw new ManifoldCFException("Socket connect timeout exception: "+e.getMessage(),e);
     }
@@ -6856,7 +6858,7 @@ public class WebcrawlerConnector extends
     {
       throw new ManifoldCFException("Socket timeout exception accessing cached document: "+e.getMessage(),e);
     }
-    catch (org.apache.commons.httpclient.ConnectTimeoutException e)
+    catch (ConnectTimeoutException e)
     {
       throw new ManifoldCFException("Socket timeout exception accessing cached document: "+e.getMessage(),e);
     }

Modified: manifoldcf/trunk/connectors/webcrawler/pom.xml
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/pom.xml?rev=1417060&r1=1417059&r2=1417060&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/webcrawler/pom.xml (original)
+++ manifoldcf/trunk/connectors/webcrawler/pom.xml Tue Dec  4 17:47:28 2012
@@ -89,9 +89,9 @@
       <version>${project.version}</version>
     </dependency>
     <dependency>
-      <groupId>commons-httpclient</groupId>
-      <artifactId>commons-httpclient</artifactId>
-      <version>${commons-httpclient.version}</version>
+      <groupId>org.apache.httpcomponents</groupId>
+      <artifactId>httpclient</artifactId>
+      <version>${httpcomponent.version}</version>
     </dependency>
     <dependency>
       <groupId>commons-logging</groupId>



Mime
View raw message