manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1585925 - /manifoldcf/branches/CONNECTORS-916/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java
Date Wed, 09 Apr 2014 09:57:54 GMT
Author: kwright
Date: Wed Apr  9 09:57:54 2014
New Revision: 1585925

URL: http://svn.apache.org/r1585925
Log:
More exception handling changes

Modified:
    manifoldcf/branches/CONNECTORS-916/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java

Modified: manifoldcf/branches/CONNECTORS-916/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-916/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java?rev=1585925&r1=1585924&r2=1585925&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-916/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java
(original)
+++ manifoldcf/branches/CONNECTORS-916/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java
Wed Apr  9 09:57:54 2014
@@ -17,23 +17,23 @@
 * limitations under the License.
 */
 package org.apache.manifoldcf.agents.output.amazoncloudsearch;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InterruptedIOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 
 import org.apache.http.Consts;
 import org.apache.http.HttpEntity;
-import org.apache.http.HttpHost;
-import org.apache.http.HttpResponse;
-import org.apache.http.client.ClientProtocolException;
-import org.apache.http.client.HttpClient;
-import org.apache.http.client.config.RequestConfig;
-import org.apache.http.client.methods.HttpPost;
-import org.apache.http.entity.StringEntity;
+import org.apache.http.HttpHost;
+import org.apache.http.HttpResponse;
+import org.apache.http.client.ClientProtocolException;
+import org.apache.http.client.config.RequestConfig;
+import org.apache.http.client.methods.HttpPost;
+import org.apache.http.entity.StringEntity;
 import org.apache.http.impl.DefaultHttpClientConnection;
 import org.apache.http.impl.client.CloseableHttpClient;
 import org.apache.http.impl.client.DefaultHttpClient;
@@ -45,18 +45,17 @@ import org.apache.manifoldcf.agents.inte
 import org.apache.manifoldcf.agents.interfaces.RepositoryDocument;
 import org.apache.manifoldcf.agents.interfaces.ServiceInterruption;
 import org.apache.manifoldcf.agents.output.BaseOutputConnector;
-import org.apache.manifoldcf.agents.output.amazoncloudsearch.SDFModel.Document;
-import org.apache.manifoldcf.core.interfaces.ConfigParams;
-import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.AutoDetectParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.parser.ParsingReader;
-import org.apache.tika.parser.html.HtmlParser;
-import org.apache.tika.sax.BodyContentHandler;
-import org.xml.sax.ContentHandler;
+import org.apache.manifoldcf.agents.output.amazoncloudsearch.SDFModel.Document;
+import org.apache.manifoldcf.core.interfaces.ConfigParams;
+import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
+import org.apache.manifoldcf.crawler.system.Logging;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.html.HtmlParser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
 import com.fasterxml.jackson.core.JsonFactory;
@@ -154,43 +153,68 @@ public class AmazonCloudSearchConnector 
     throws ManifoldCFException
   {
     try
-    {
-      getSession();
-      String responsbody = postData("[]");
-      
-      String status = "";
-      String message = "";
-      JsonFactory factory = new JsonFactory();
-      JsonParser parser = factory.createJsonParser(responsbody);
-      while (parser.nextToken() != JsonToken.END_OBJECT) {
-        String name = parser.getCurrentName();
-        if("status".equalsIgnoreCase(name)){
-          status = parser.getValueAsString();
-        }else if("errors".equalsIgnoreCase(name)){
-          message = parseMessage(parser);
-        }
-      }
-      if("error".equalsIgnoreCase(status) &&
-          "Encountered unexpected end of file".equals(message)){
-        return "Connection working.";
-      }
-      return "Connection NOT working.";
+    {
+      getSession();
+      String responsbody = postData("[]");
+      String status = getStatusFromJsonResponse(responsbody);
+      
+      //check status message
+      String message = "";
+      if("error".equals(status))
+      {
+        JsonParser parser = new JsonFactory().createJsonParser(responsbody);
+        while (parser.nextToken() != JsonToken.END_OBJECT) {
+          String name = parser.getCurrentName();
+          if("errors".equalsIgnoreCase(name)){
+            message = parseMessage(parser);
+            break;
+          }
+        }
+      }
+      
+      if("error".equalsIgnoreCase(status) &&
+          "batch must contain at least one operation".equals(message)){
+        return "Connection working.";
+      }
+      return "Connection NOT working.";
     }
     catch (ClientProtocolException e) {
-      throw new ManifoldCFException(e);
-    } catch (IOException e) {
-      throw new ManifoldCFException(e);
-    }
-  }
-  
-  private String parseMessage(JsonParser parser) throws JsonParseException, IOException {
-    while(parser.nextToken() != JsonToken.END_ARRAY){
-      String name = parser.getCurrentName();
-      if("message".equalsIgnoreCase(name)){
-        return parser.getValueAsString();
-      }
-    }
-    return null;
+      throw new ManifoldCFException(e);
+    } catch (IOException e) {
+      throw new ManifoldCFException(e);
+    } catch (ServiceInterruption e) {
+      throw new ManifoldCFException(e);
+    }
+  }
+  
+  private String getStatusFromJsonResponse(String responsbody) throws ManifoldCFException
{
+    try {
+      JsonParser parser = new JsonFactory().createJsonParser(responsbody);
+      while (parser.nextToken() != JsonToken.END_OBJECT)
+      {
+        String name = parser.getCurrentName();
+        if("status".equalsIgnoreCase(name)){
+          parser.nextToken();
+          return parser.getText();
+        }
+      }
+    } catch (JsonParseException e) {
+      throw new ManifoldCFException(e);
+    } catch (IOException e) {
+      throw new ManifoldCFException(e);
+    }
+    return null;
+  }
+  
+  private String parseMessage(JsonParser parser) throws JsonParseException, IOException {
+    while(parser.nextToken() != JsonToken.END_ARRAY){
+      String name = parser.getCurrentName();
+      if("message".equalsIgnoreCase(name)){
+        parser.nextToken();
+        return parser.getText();
+      }
+    }
+    return null;
   }
 
   /** Get an output version string, given an output specification.  The output version string
is used to uniquely describe the pertinent details of
@@ -246,12 +270,13 @@ public class AmazonCloudSearchConnector 
   @Override
   public int addOrReplaceDocument(String documentURI, String outputDescription, RepositoryDocument
document, String authorityNameString, IOutputAddActivity activities)
     throws ManifoldCFException, ServiceInterruption
-  {
-    // Establish a session
-    getSession();
-    try {
-      InputStream is = document.getBinaryStream();
-      Parser parser = new HtmlParser();
+  {
+    // Establish a session
+    getSession();
+    String jsondata = "";
+    try {
+      InputStream is = document.getBinaryStream();
+      Parser parser = new HtmlParser();
       ContentHandler handler = new BodyContentHandler();
       Metadata metadata = new Metadata();
       parser.parse(is, handler, metadata, new ParseContext());
@@ -270,13 +295,13 @@ public class AmazonCloudSearchConnector 
         fields.put("body", bodyStr);
       }
       
-      //mapping metadata to SDF fields.
-      String contenttype = metadata.get("Content-Style-Type");
-      String title = metadata.get("dc:title");
-      String size = metadata.get("Content-Length");
-      String description = metadata.get("description");
-      String keywords = metadata.get("keywords");
-      if(contenttype != null && !"".equals(contenttype)) fields.put("content_type",
contenttype);
+      //mapping metadata to SDF fields.
+      String contenttype = metadata.get("Content-Style-Type");
+      String title = metadata.get("dc:title");
+      String size = String.valueOf(bodyStr.length());
+      String description = metadata.get("description");
+      String keywords = metadata.get("keywords");
+      if(contenttype != null && !"".equals(contenttype)) fields.put("content_type",
contenttype);
       if(title != null && !"".equals(title)) fields.put("title", title);
       if(size != null && !"".equals(size)) fields.put("size", size);
       if(description != null && !"".equals(description)) fields.put("description",
description);
@@ -289,16 +314,10 @@ public class AmazonCloudSearchConnector 
         fields.put("keywords", keywordList);
       }
       doc.setFields(fields);
-      model.addDocument(doc);
-      
-      //generate json data.
-      String jsondata = model.toJSON();
-      
-      //post data..
-      String responsbody = postData(jsondata);
-            
-      activities.recordActivity(null,INGEST_ACTIVITY,new Long(document.getBinaryLength()),documentURI,"OK",null);
-      return DOCUMENTSTATUS_ACCEPTED;
+      model.addDocument(doc);
+      
+      //generate json data.
+      jsondata = model.toJSON();
       
     } 
     catch (SAXException e) {
@@ -314,6 +333,20 @@ public class AmazonCloudSearchConnector 
       // if document data could not be read when the document parsing by tika.
       throw new ManifoldCFException(e);
     }
+    
+    //post data..
+    String responsbody = postData(jsondata);
+    
+    // check status
+    String status = getStatusFromJsonResponse(responsbody);
+    if("success".equals(status))
+    {
+      activities.recordActivity(null,INGEST_ACTIVITY,new Long(document.getBinaryLength()),documentURI,"OK",null);
+      return DOCUMENTSTATUS_ACCEPTED;
+    }
+    else {
+      throw new ManifoldCFException("recieved error status from service after feeding document.");
+    }
   }
 
   /** Remove a document using the connector.
@@ -340,33 +373,54 @@ public class AmazonCloudSearchConnector 
       jsonData = model.toJSON();
     } catch (JsonProcessingException e) {
       throw new ManifoldCFException(e);
-    }
-    String responsbody = postData(jsonData);
-    
-    
-    activities.recordActivity(null,REMOVE_ACTIVITY,null,documentURI,"OK",null);
-  }
-
-  private String postData(String jsonData) throws ManifoldCFException {
-    CloseableHttpClient httpclient = HttpClients.createDefault();
-    try {
-      poster.setEntity(new StringEntity(jsonData, Consts.UTF_8));
+    }
+    String responsbody = postData(jsonData);
+    
+    // check status
+    String status = getStatusFromJsonResponse(responsbody);
+    if("success".equals(status))
+    {
+      activities.recordActivity(null,REMOVE_ACTIVITY,null,documentURI,"OK",null);
+    }
+    else {
+      throw new ManifoldCFException("recieved error status from service after feeding document.");
+    }
+  }
+
+  private String postData(String jsonData) throws ServiceInterruption, ManifoldCFException
{
+    CloseableHttpClient httpclient = HttpClients.createDefault();
+    try {
+      poster.setEntity(new StringEntity(jsonData, Consts.UTF_8));
       HttpResponse res = httpclient.execute(poster);
-      
-      HttpEntity resEntity = res.getEntity();
-      return EntityUtils.toString(resEntity);
-      
-    } catch (ClientProtocolException e) {
-      throw new ManifoldCFException(e);
-    } catch (IOException e) {
-      throw new ManifoldCFException(e);
-    } finally {
-      try {
-        httpclient.close();
+      
+      HttpEntity resEntity = res.getEntity();
+      return EntityUtils.toString(resEntity);
+    } catch (ClientProtocolException e) {
+      throw new ManifoldCFException(e);
+    } catch (IOException e) {
+      handleIOException(e);
+    } finally {
+      try {
+        httpclient.close();
       } catch (IOException e) {
-        //do nothing
-      }
-    }
-  }
-  
+        //do nothing
+      }
+    }
+    return null;
+  }
+  
+  private static void handleIOException(IOException e)
+      throws ManifoldCFException, ServiceInterruption {
+    if (!(e instanceof java.net.SocketTimeoutException)
+        && (e instanceof InterruptedIOException)) {
+      throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
+          ManifoldCFException.INTERRUPTED);
+    }
+    Logging.connectors.warn(
+        "Amazon CloudSearch: IO exception: " + e.getMessage(), e);
+    long currentTime = System.currentTimeMillis();
+    throw new ServiceInterruption("IO exception: " + e.getMessage(), e,
+        currentTime + 300000L, currentTime + 3 * 60 * 60000L, -1, false);
+  }
+  
 }
\ No newline at end of file



Mime
View raw message