manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1435542 - in /manifoldcf/branches/release-1.1-branch: ./ CHANGES.txt connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java
Date Sat, 19 Jan 2013 13:30:08 GMT
Author: kwright
Date: Sat Jan 19 13:30:07 2013
New Revision: 1435542

URL: http://svn.apache.org/viewvc?rev=1435542&view=rev
Log:
Pull up fix for CONNECTORS-616 from trunk.

Modified:
    manifoldcf/branches/release-1.1-branch/   (props changed)
    manifoldcf/branches/release-1.1-branch/CHANGES.txt
    manifoldcf/branches/release-1.1-branch/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java

Propchange: manifoldcf/branches/release-1.1-branch/
------------------------------------------------------------------------------
  Merged /manifoldcf/trunk:r1435540

Modified: manifoldcf/branches/release-1.1-branch/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/branches/release-1.1-branch/CHANGES.txt?rev=1435542&r1=1435541&r2=1435542&view=diff
==============================================================================
--- manifoldcf/branches/release-1.1-branch/CHANGES.txt (original)
+++ manifoldcf/branches/release-1.1-branch/CHANGES.txt Sat Jan 19 13:30:07 2013
@@ -3,6 +3,12 @@ $Id$
 
 ======================= Release 1.1 =====================
 
+CONNECTORS-616: Work around Solr 4.0 or Jetty bug where connections
+are dropped randomly under multithreaded load.  Broken pipe exceptions
+are now retried after a minute, for up to three times, before the Solr
+connector concludes that the document cannot be indexed and skips it.
+(Shinichiro Abe, Karl Wright)
+
 CONNECTORS-613: Add a way of getting a document's mime type
 to Solr, since Tika needs mime type in order to extract content
 since Solr 4.0.0.

Modified: manifoldcf/branches/release-1.1-branch/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/release-1.1-branch/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java?rev=1435542&r1=1435541&r2=1435542&view=diff
==============================================================================
--- manifoldcf/branches/release-1.1-branch/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java
(original)
+++ manifoldcf/branches/release-1.1-branch/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java
Sat Jan 19 13:30:07 2013
@@ -403,14 +403,24 @@ public class HttpPoster
     
     if (e.getClass().getName().equals("java.net.SocketException"))
     {
-      // Intercept "broken pipe" exception, since that seems to be what we get if the ingestion
API kills the socket right after a 400 goes out.
-      // Basically, we have no choice but to interpret that in the same manner as a 400,
since no matter how we do it, it's a race and the 'broken pipe'
-      // result is always possible.  So we might as well expect it and treat it properly.
+      // In the past we would have treated this as a straight document rejection, and
+      // treated it in the same manner as a 400.  The reasoning is that the server can
+      // perfectly legally send out a 400 and drop the connection immediately thereafter,
+      // this a race condition.
+      // However, Solr 4.0 (or the Jetty version that the example runs on) seems
+      // to have a bug where it drops the connection when two simultaneous documents come
in
+      // at the same time.  This is the final version of Solr 4.0 so we need to deal with
+      // this.
       if (e.getMessage().toLowerCase().indexOf("broken pipe") != -1)
-        // We've seen what looks like the ingestion interface forcibly closing the socket.
-        // We *choose* to interpret this just like a 400 response.  However, we log in the
history using a different code,
-        // since we really don't know what happened for sure.
-        return;
+        // Treat it as a service interruption, but with a limited number of retries.
+        // In that way we won't burden the user with a huge retry interval; it should
+        // give up fairly quickly, and yet NOT give up if the error was merely transient
+        throw new ServiceInterruption("Server dropped connection during "+context+": "+e.getMessage(),
+          e,
+          currentTime + interruptionRetryTime,
+          -1L,
+          3,
+          false);
       
       // Other socket exceptions are service interruptions - but if we keep getting them,
it means 
       // that a socket timeout is probably set too low to accept this particular document.
 So
@@ -418,7 +428,7 @@ public class HttpPoster
       throw new ServiceInterruption("Socket timeout exception during "+context+": "+e.getMessage(),
         e,
         currentTime + interruptionRetryTime,
-        currentTime + 1L * 60L * 60000L,
+        currentTime + 20L * 60000L,
         -1,
         false);
     }
@@ -811,10 +821,17 @@ public class HttpPoster
             // Log what happened to us
             activityStart = new Long(fullStartTime);
             activityBytes = new Long(length);
-            activityCode = "FAILED";
             activityDetails = e.getMessage() +
               ((e.getCause() != null)?": "+e.getCause().getMessage():"");
             
+            // Broken pipe exceptions we log specially because they usually mean
+            // Solr has rejected the document, and the user will want to know that.
+            if (e.getCause() != null && e.getCause().getClass().getName().equals("java.net.SocketException")
&&
+              activityDetails.indexOf("broken pipe") != -1)
+              activityCode = "SOLR REJECT";
+            else
+              activityCode = "FAILED";
+
             // Rethrow; will interpret at a higher level
             throw e;
           }



Mime
View raw message