manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1603285 - /manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
Date Tue, 17 Jun 2014 20:23:11 GMT
Author: kwright
Date: Tue Jun 17 20:23:11 2014
New Revision: 1603285

URL: http://svn.apache.org/r1603285
Log:
Fix CONNECTORS-966 for web connector

Modified:
    manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java

Modified: manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java?rev=1603285&r1=1603284&r2=1603285&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
(original)
+++ manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
Tue Jun 17 20:23:11 2014
@@ -1435,30 +1435,24 @@ public class WebcrawlerConnector extends
             try
             {
               rd.setBinary(is,length);
-              activities.ingestDocument(documentIdentifier,version,documentIdentifier,rd);
-            }
-            finally
-            {
               try
               {
-                is.close();
+                activities.ingestDocumentWithException(documentIdentifier,version,documentIdentifier,rd);
               }
-              catch (java.net.SocketException e)
-              {
-                throw new ManifoldCFException("Socket timeout error closing stream: "+e.getMessage(),e);
-              }
-              catch (ConnectTimeoutException e)
+              catch (IOException e)
               {
-                throw new ManifoldCFException("Socket connect timeout error closing stream:
"+e.getMessage(),e);
+                handleIOException(e,"reading data");
               }
-              catch (InterruptedIOException e)
+            }
+            finally
+            {
+              try
               {
-                //Logging.connectors.warn("IO interruption seen",e);
-                throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+                is.close();
               }
               catch (IOException e)
               {
-                throw new ManifoldCFException("IO error closing stream: "+e.getMessage(),e);
+                handleIOException(e,"closing stream");
               }
             }
           }
@@ -1483,6 +1477,19 @@ public class WebcrawlerConnector extends
     }
   }
 
+  protected static void handleIOException(IOException e, String context)
+    throws ManifoldCFException, ServiceInterruption
+  {
+    if (e instanceof java.net.SocketException)
+      throw new ManifoldCFException("Socket timeout error "+context+": "+e.getMessage(),e);
+    else if (e instanceof ConnectTimeoutException)
+      throw new ManifoldCFException("Socket connect timeout error "+context+": "+e.getMessage(),e);
+    else if (e instanceof InterruptedIOException)
+      throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+    else
+      throw new ManifoldCFException("IO error "+context+": "+e.getMessage(),e);
+  }
+  
   /** Free a set of documents.  This method is called for all documents whose versions have
been fetched using
   * the getDocumentVersions() method, including those that returned null versions.  It may
be used to free resources
   * committed during the getDocumentVersions() method.  It is guaranteed to be called AFTER
any calls to



Mime
View raw message