manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1431176 - in /manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler: ThrottledFetcher.java WebcrawlerConnector.java
Date Thu, 10 Jan 2013 01:47:34 GMT
Author: kwright
Date: Thu Jan 10 01:47:34 2013
New Revision: 1431176

URL: http://svn.apache.org/viewvc?rev=1431176&view=rev
Log:
More changes to bring Web connector back into line with MCF 1.0.1.  Part of CONNECTORS-604.
 Note well: This change REQUIRES a new version of httpclient, downloaded via ant make-core-deps!!

Modified:
    manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java
    manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java

Modified: manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java?rev=1431176&r1=1431175&r2=1431176&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java
(original)
+++ manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java
Thu Jan 10 01:47:34 2013
@@ -1270,33 +1270,33 @@ public class ThrottledFetcher
         new AllowAllHostnameVerifier());
       Scheme myHttpsProtocol = new Scheme("https", 443, myFactory);
 
-      int resolvedPort;
+      int hostPort;
       String displayedPort;
       if (port != -1)
       {
         if (!(protocol.equals("http") && port == 80) &&
           !(protocol.equals("https") && port == 443))
+        {
           displayedPort = ":"+Integer.toString(port);
+          hostPort = port;
+        }
         else
+        {
           displayedPort = "";
-        resolvedPort = port;
+          hostPort = -1;
+        }
       }
       else
       {
-        if (protocol.equals("http"))
-          resolvedPort = 80;
-        else if (protocol.equals("https"))
-          resolvedPort = 443;
-        else
-          throw new IllegalArgumentException("Unexpected protocol: "+protocol);
         displayedPort = "";
+        hostPort = -1;
       }
 
       StringBuilder sb = new StringBuilder(protocol);
       sb.append("://").append(server).append(displayedPort).append(urlPath);
       String fetchUrl = sb.toString();
 
-      HttpHost fetchHost = new HttpHost(server,port,protocol);
+      HttpHost fetchHost = new HttpHost(server,hostPort,protocol);
       HttpHost hostHost;
       
       if (host != null)
@@ -1304,7 +1304,7 @@ public class ThrottledFetcher
         sb.setLength(0);
         sb.append(protocol).append("://").append(host).append(displayedPort).append(urlPath);
         myUrl = sb.toString();
-        hostHost = new HttpHost(host,resolvedPort,protocol);
+        hostHost = new HttpHost(host,hostPort,protocol);
       }
       else
       {

Modified: manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java?rev=1431176&r1=1431175&r2=1431176&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
(original)
+++ manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
Thu Jan 10 01:47:34 2013
@@ -630,7 +630,8 @@ public class WebcrawlerConnector extends
 
         if (Logging.connectors.isDebugEnabled())
         {
-          Logging.connectors.debug("Web: For document identifier '"+documentIdentifier+"'
found session credential key '"+sessionCredential.getSequenceKey()+"'");
+          if (sessionCredential != null)
+            Logging.connectors.debug("Web: For document identifier '"+documentIdentifier+"'
found session credential key '"+sessionCredential.getSequenceKey()+"'");
         }
         
         // Set up the initial state and state variables.



Mime
View raw message