manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1492461 - /manifoldcf/branches/CONNECTORS-714/connectors/livelink/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/livelink/LivelinkConnector.java
Date Wed, 12 Jun 2013 22:51:07 GMT
Author: kwright
Date: Wed Jun 12 22:51:07 2013
New Revision: 1492461

URL: http://svn.apache.org/r1492461
Log:
Rearrange indexing so that we can handle both types of indexing.

Modified:
    manifoldcf/branches/CONNECTORS-714/connectors/livelink/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/livelink/LivelinkConnector.java

Modified: manifoldcf/branches/CONNECTORS-714/connectors/livelink/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/livelink/LivelinkConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-714/connectors/livelink/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/livelink/LivelinkConnector.java?rev=1492461&r1=1492460&r2=1492461&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-714/connectors/livelink/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/livelink/LivelinkConnector.java
(original)
+++ manifoldcf/branches/CONNECTORS-714/connectors/livelink/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/livelink/LivelinkConnector.java
Wed Jun 12 22:51:07 2013
@@ -4192,13 +4192,17 @@ public class LivelinkConnector extends o
       return;
     }
 
-    RepositoryDocument rd = new RepositoryDocument();
-
-    int colonPos = documentIdentifier.indexOf(":",1);
-    
+    // Fetch logging
+    long startTime = System.currentTimeMillis();
+    String resultCode = "FAILED";
+    String resultDescription = null;
+    Long readSize = null;
+    boolean wasInterrupted = false;
     int objID;
     int vol;
 
+    int colonPos = documentIdentifier.indexOf(":",1);
+        
     if (colonPos == -1)
     {
       objID = new Integer(documentIdentifier.substring(1)).intValue();
@@ -4209,356 +4213,494 @@ public class LivelinkConnector extends o
       objID = new Integer(documentIdentifier.substring(colonPos+1)).intValue();
       vol = new Integer(documentIdentifier.substring(1,colonPos)).intValue();
     }
-
-    // Add general metadata
-    ObjectInformation objInfo = llc.getObjectInformation(vol,objID);
-    VersionInformation versInfo = llc.getVersionInformation(vol,objID,0);
-    if (!objInfo.exists())
-    {
-      Logging.connectors.debug("Livelink: No object "+contextMsg+": not ingesting");
-      return;
-    }
-    if (!versInfo.exists())
-    {
-      Logging.connectors.debug("Livelink: No version data "+contextMsg+": not ingesting");
-      return;
-    }
     
-    // Add general data we need for the output connector
-    String mimeType = versInfo.getMimeType();
-    if (mimeType != null)
-      rd.setMimeType(mimeType);
-    String fileName = versInfo.getFileName();
-    if (fileName != null)
-      rd.setFileName(fileName);
-    Date creationDate = objInfo.getCreationDate();
-    if (creationDate != null)
-      rd.setCreatedDate(creationDate);
-    Date modifyDate = versInfo.getModifyDate();
-    if (modifyDate != null)
-      rd.setModifiedDate(modifyDate);
-    
-    rd.addField(GENERAL_NAME_FIELD,objInfo.getName());
-    rd.addField(GENERAL_DESCRIPTION_FIELD,objInfo.getComments());
-    if (creationDate != null)
-      rd.addField(GENERAL_CREATIONDATE_FIELD,creationDate.toString());
-    if (modifyDate != null)
-      rd.addField(GENERAL_MODIFYDATE_FIELD,modifyDate.toString());
-    UserInformation owner = llc.getUserInformation(objInfo.getOwnerId().intValue());
-    UserInformation creator = llc.getUserInformation(objInfo.getCreatorId().intValue());
-    UserInformation modifier = llc.getUserInformation(versInfo.getOwnerId().intValue());
-    if (owner != null)
-      rd.addField(GENERAL_OWNER,owner.getName());
-    if (creator != null)
-      rd.addField(GENERAL_CREATOR,creator.getName());
-    if (modifier != null)
-      rd.addField(GENERAL_MODIFIER,modifier.getName());
-
-    // Iterate over the metadata items.  These are organized by category
-    // for speed of lookup.
-
-    // Unpack version string
-    int startPos = 0;
-
-    // Metadata items first
-    ArrayList metadataItems = new ArrayList();
-    startPos = unpackList(metadataItems,version,startPos,'+');
-    Iterator catIter = desc.getItems(metadataItems);
-    while (catIter.hasNext())
-    {
-      MetadataItem item = (MetadataItem)catIter.next();
-      MetadataPathItem pathItem = item.getPathItem();
-      if (pathItem != null)
-      {
-        int catID = pathItem.getCatID();
-        // grab the associated catversion
-        LLValue catVersion = getCatVersion(objID,catID);
-        if (catVersion != null)
-        {
-          // Go through attributes now
-          Iterator attrIter = item.getAttributeNames();
-          while (attrIter.hasNext())
-          {
-            String attrName = (String)attrIter.next();
-            // Create a unique metadata name
-            String metadataName = pathItem.getCatName()+":"+attrName;
-            // Fetch the metadata and stuff it into the RepositoryData structure
-            String[] metadataValue = getAttributeValue(catVersion,attrName);
-            if (metadataValue != null)
-              rd.addField(metadataName,metadataValue);
-            else
-              Logging.connectors.warn("Livelink: Metadata attribute '"+metadataName+"' does
not seem to exist; please correct the job");
-          }
-        }
-
-      }
-    }
-
-    // Unpack acls (conditionally)
-    if (startPos < version.length())
+    // Try/finally for fetch logging
+    try
     {
-      char x = version.charAt(startPos++);
-      if (x == '+')
+      // Check URL first
+      if (activities.checkURLIndexable(viewHttpAddress))
       {
-        ArrayList acls = new ArrayList();
-        startPos = unpackList(acls,version,startPos,'+');
-        // Turn into acls and add into description
-        String[] aclArray = new String[acls.size()];
-        int j = 0;
-        while (j < aclArray.length)
+
+        // Add general metadata
+        ObjectInformation objInfo = llc.getObjectInformation(vol,objID);
+        VersionInformation versInfo = llc.getVersionInformation(vol,objID,0);
+        if (!objInfo.exists())
         {
-          aclArray[j] = (String)acls.get(j);
-          j++;
+          resultCode = "OBJECTNOTFOUND";
+          Logging.connectors.debug("Livelink: No object "+contextMsg+": not ingesting");
+          return;
+        }
+        if (!versInfo.exists())
+        {
+          resultCode = "VERSIONNOTFOUND";
+          Logging.connectors.debug("Livelink: No version data "+contextMsg+": not ingesting");
+          return;
         }
-        rd.setACL(aclArray);
 
-        StringBuilder denyBuffer = new StringBuilder();
-        startPos = unpack(denyBuffer,version,startPos,'+');
-        String denyAcl = denyBuffer.toString();
-        String[] denyAclArray = new String[1];
-        denyAclArray[0] = denyAcl;
-        rd.setDenyACL(denyAclArray);
-      }
-    }
+        String mimeType = versInfo.getMimeType();
+        if (activities.checkMimeTypeIndexable(mimeType))
+        {
+          Long dataSize = versInfo.getDataSize();
+          if (dataSize != null && activities.checkLengthIndexable(dataSize.longValue()))
+          {
+            String fileName = versInfo.getFileName();
+            Date creationDate = objInfo.getCreationDate();
+            Date modifyDate = versInfo.getModifyDate();
+            RepositoryDocument rd = new RepositoryDocument();
+
+            
+            // Add general data we need for the output connector
+            if (mimeType != null)
+              rd.setMimeType(mimeType);
+            if (fileName != null)
+              rd.setFileName(fileName);
+            if (creationDate != null)
+              rd.setCreatedDate(creationDate);
+            if (modifyDate != null)
+              rd.setModifiedDate(modifyDate);
+            
+            rd.addField(GENERAL_NAME_FIELD,objInfo.getName());
+            rd.addField(GENERAL_DESCRIPTION_FIELD,objInfo.getComments());
+            if (creationDate != null)
+              rd.addField(GENERAL_CREATIONDATE_FIELD,creationDate.toString());
+            if (modifyDate != null)
+              rd.addField(GENERAL_MODIFYDATE_FIELD,modifyDate.toString());
+            UserInformation owner = llc.getUserInformation(objInfo.getOwnerId().intValue());
+            UserInformation creator = llc.getUserInformation(objInfo.getCreatorId().intValue());
+            UserInformation modifier = llc.getUserInformation(versInfo.getOwnerId().intValue());
+            if (owner != null)
+              rd.addField(GENERAL_OWNER,owner.getName());
+            if (creator != null)
+              rd.addField(GENERAL_CREATOR,creator.getName());
+            if (modifier != null)
+              rd.addField(GENERAL_MODIFIER,modifier.getName());
+
+            // Iterate over the metadata items.  These are organized by category
+            // for speed of lookup.
+
+            // Unpack version string
+            int startPos = 0;
+
+            // Metadata items first
+            ArrayList metadataItems = new ArrayList();
+            startPos = unpackList(metadataItems,version,startPos,'+');
+            Iterator catIter = desc.getItems(metadataItems);
+            while (catIter.hasNext())
+            {
+              MetadataItem item = (MetadataItem)catIter.next();
+              MetadataPathItem pathItem = item.getPathItem();
+              if (pathItem != null)
+              {
+                int catID = pathItem.getCatID();
+                // grab the associated catversion
+                LLValue catVersion = getCatVersion(objID,catID);
+                if (catVersion != null)
+                {
+                  // Go through attributes now
+                  Iterator attrIter = item.getAttributeNames();
+                  while (attrIter.hasNext())
+                  {
+                    String attrName = (String)attrIter.next();
+                    // Create a unique metadata name
+                    String metadataName = pathItem.getCatName()+":"+attrName;
+                    // Fetch the metadata and stuff it into the RepositoryData structure
+                    String[] metadataValue = getAttributeValue(catVersion,attrName);
+                    if (metadataValue != null)
+                      rd.addField(metadataName,metadataValue);
+                    else
+                      Logging.connectors.warn("Livelink: Metadata attribute '"+metadataName+"'
does not seem to exist; please correct the job");
+                  }
+                }
 
-    // Add the path metadata item into the mix, if enabled
-    String pathAttributeName = sDesc.getPathAttributeName();
-    if (pathAttributeName != null && pathAttributeName.length() > 0)
-    {
-      String pathString = sDesc.getPathAttributeValue(documentIdentifier);
-      if (pathString != null)
-      {
-        if (Logging.connectors.isDebugEnabled())
-          Logging.connectors.debug("Livelink: Path attribute name is '"+pathAttributeName+"'"+contextMsg+",
value is '"+pathString+"'");
-        rd.addField(pathAttributeName,pathString);
-      }
-    }
+              }
+            }
 
-    // Set up connection
-    HttpClient client = getInitializedClient(contextMsg);
+            // Unpack acls (conditionally)
+            if (startPos < version.length())
+            {
+              char x = version.charAt(startPos++);
+              if (x == '+')
+              {
+                ArrayList acls = new ArrayList();
+                startPos = unpackList(acls,version,startPos,'+');
+                // Turn into acls and add into description
+                String[] aclArray = new String[acls.size()];
+                int j = 0;
+                while (j < aclArray.length)
+                {
+                  aclArray[j] = (String)acls.get(j);
+                  j++;
+                }
+                rd.setACL(aclArray);
 
-    long currentTime;
+                StringBuilder denyBuffer = new StringBuilder();
+                startPos = unpack(denyBuffer,version,startPos,'+');
+                String denyAcl = denyBuffer.toString();
+                String[] denyAclArray = new String[1];
+                denyAclArray[0] = denyAcl;
+                rd.setDenyACL(denyAclArray);
+              }
+            }
 
-    if (Logging.connectors.isInfoEnabled())
-      Logging.connectors.info("Livelink: " + ingestHttpAddress);
+            // Add the path metadata item into the mix, if enabled
+            String pathAttributeName = sDesc.getPathAttributeName();
+            if (pathAttributeName != null && pathAttributeName.length() > 0)
+            {
+              String pathString = sDesc.getPathAttributeValue(documentIdentifier);
+              if (pathString != null)
+              {
+                if (Logging.connectors.isDebugEnabled())
+                  Logging.connectors.debug("Livelink: Path attribute name is '"+pathAttributeName+"'"+contextMsg+",
value is '"+pathString+"'");
+                rd.addField(pathAttributeName,pathString);
+              }
+            }
 
-    long startTime = System.currentTimeMillis();
-    String resultCode = "OK";
-    String resultDescription = null;
-    Long readSize = null;
+            if (true)
+            {
+              // Use HTTP to fetch document!
 
-    HttpGet method = new HttpGet(getHost().toURI() + ingestHttpAddress);
-    method.setHeader(new BasicHeader("Accept","*/*"));
+              // Set up connection
+              HttpClient client = getInitializedClient(contextMsg);
 
-    ExecuteMethodThread methodThread = new ExecuteMethodThread(client,method);
-    methodThread.start();
-    try
-    {
+              long currentTime;
 
-      int statusCode = methodThread.getResponseCode();
-      switch (statusCode)
-      {
-      case 500:
-      case 502:
-        Logging.connectors.warn("Livelink: Service interruption during fetch "+contextMsg+"
with Livelink HTTP Server, retrying...");
-        throw new ServiceInterruption("Service interruption during fetch",new ManifoldCFException(Integer.toString(statusCode)+"
error while fetching"),System.currentTimeMillis()+60000L,
-          System.currentTimeMillis()+600000L,-1,true);
+              if (Logging.connectors.isInfoEnabled())
+                Logging.connectors.info("Livelink: " + ingestHttpAddress);
 
-      case HttpStatus.SC_UNAUTHORIZED:
-        Logging.connectors.warn("Livelink: Document fetch unauthorized for "+ingestHttpAddress+"
("+contextMsg+")");
-        // Since we logged in, we should fail here if the ingestion user doesn't have access
to the
-        // the document, but if we do, don't fail hard.
-        resultCode = "UNAUTHORIZED";
-        activities.deleteDocument(documentIdentifier,version);
-        return;
 
-      case HttpStatus.SC_OK:
-        if (Logging.connectors.isDebugEnabled())
-          Logging.connectors.debug("Livelink: Created http document connection to Livelink
"+contextMsg);
-        long dataSize = methodThread.getResponseContentLength();
-        // The above replaces this, which required another access:
-        // long dataSize = (long)value.toInteger("DataSize");
-        // A non-existent content length will cause a value of -1 to be returned.  This seems
to indicate that the session login did not work right.
-        if (dataSize >= 0)
-        {
-          if (Logging.connectors.isDebugEnabled())
-            Logging.connectors.debug("Livelink: Content length from livelink server "+contextMsg+"'
= "+new Long(dataSize).toString());
-          if (activities.checkLengthIndexable(dataSize))
-          {
-            try
-            {
-              InputStream is = methodThread.getSafeInputStream();
+              HttpGet method = new HttpGet(getHost().toURI() + ingestHttpAddress);
+              method.setHeader(new BasicHeader("Accept","*/*"));
+
+              ExecuteMethodThread methodThread = new ExecuteMethodThread(client,method);
+              methodThread.start();
               try
               {
-                rd.setBinary(is,dataSize);
-                
-                activities.ingestDocument(documentIdentifier,version,viewHttpAddress,rd);
-
-                if (Logging.connectors.isDebugEnabled())
-                  Logging.connectors.debug("Livelink: Ingesting done "+contextMsg);
 
+                int statusCode = methodThread.getResponseCode();
+                switch (statusCode)
+                {
+                case 500:
+                case 502:
+                  Logging.connectors.warn("Livelink: Service interruption during fetch "+contextMsg+"
with Livelink HTTP Server, retrying...");
+                  throw new ServiceInterruption("Service interruption during fetch",new ManifoldCFException(Integer.toString(statusCode)+"
error while fetching"),System.currentTimeMillis()+60000L,
+                    System.currentTimeMillis()+600000L,-1,true);
+
+                case HttpStatus.SC_UNAUTHORIZED:
+                  Logging.connectors.warn("Livelink: Document fetch unauthorized for "+ingestHttpAddress+"
("+contextMsg+")");
+                  // Since we logged in, we should fail here if the ingestion user doesn't
have access to the
+                  // the document, but if we do, don't fail hard.
+                  resultCode = "UNAUTHORIZED";
+                  activities.deleteDocument(documentIdentifier,version);
+                  return;
+
+                case HttpStatus.SC_OK:
+                  if (Logging.connectors.isDebugEnabled())
+                    Logging.connectors.debug("Livelink: Created http document connection
to Livelink "+contextMsg);
+                  // A non-existent content length will cause a value of -1 to be returned.
 This seems to indicate that the session login did not work right.
+                  if (methodThread.getResponseContentLength() >= 0)
+                  {
+                    try
+                    {
+                      InputStream is = methodThread.getSafeInputStream();
+                      try
+                      {
+                        rd.setBinary(is,dataSize);
+                          
+                        activities.ingestDocument(documentIdentifier,version,viewHttpAddress,rd);
+
+                        if (Logging.connectors.isDebugEnabled())
+                          Logging.connectors.debug("Livelink: Ingesting done "+contextMsg);
+
+                      }
+                      finally
+                      {
+                        // Close stream via thread, since otherwise this can hang
+                        is.close();
+                      }
+                    }
+                    catch (java.net.SocketTimeoutException e)
+                    {
+                      resultCode = "DATATIMEOUT";
+                      resultDescription = e.getMessage();
+                      currentTime = System.currentTimeMillis();
+                      Logging.connectors.warn("Livelink: Livelink socket timed out ingesting
from the Livelink HTTP Server "+contextMsg+": "+e.getMessage(), e);
+                      throw new ServiceInterruption("Socket timed out: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,false);
+                    }
+                    catch (java.net.SocketException e)
+                    {
+                      resultCode = "DATASOCKETERROR";
+                      resultDescription = e.getMessage();
+                      currentTime = System.currentTimeMillis();
+                      Logging.connectors.warn("Livelink: Livelink socket error ingesting
from the Livelink HTTP Server "+contextMsg+": "+e.getMessage(), e);
+                      throw new ServiceInterruption("Socket error: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,false);
+                    }
+                    catch (javax.net.ssl.SSLHandshakeException e)
+                    {
+                      resultCode = "DATASSLHANDSHAKEERROR";
+                      resultDescription = e.getMessage();
+                      currentTime = System.currentTimeMillis();
+                      Logging.connectors.warn("Livelink: SSL handshake failed authenticating
"+contextMsg+": "+e.getMessage(),e);
+                      throw new ServiceInterruption("SSL handshake error: "+e.getMessage(),e,currentTime+60000L,currentTime+300000L,-1,true);
+                    }
+                    catch (ConnectTimeoutException e)
+                    {
+                      resultCode = "CONNECTTIMEOUT";
+                      resultDescription = e.getMessage();
+                      currentTime = System.currentTimeMillis();
+                      Logging.connectors.warn("Livelink: Livelink socket timed out connecting
to the Livelink HTTP Server "+contextMsg+": "+e.getMessage(), e);
+                      throw new ServiceInterruption("Connect timed out: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,false);
+                    }
+                    catch (InterruptedException e)
+                    {
+                      wasInterrupted = true;
+                      throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+                    }
+                    catch (InterruptedIOException e)
+                    {
+                      wasInterrupted = true;
+                      throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+                    }
+                    catch (HttpException e)
+                    {
+                      resultCode = "HTTPEXCEPTION";
+                      resultDescription = e.getMessage();
+                      // Treat unknown error ingesting data as a transient condition
+                      currentTime = System.currentTimeMillis();
+                      Logging.connectors.warn("Livelink: HTTP exception ingesting "+contextMsg+":
"+e.getMessage(),e);
+                      throw new ServiceInterruption("HTTP exception ingesting "+contextMsg+":
"+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,false);
+                    }
+                    catch (IOException e)
+                    {
+                      resultCode = "DATAEXCEPTION";
+                      resultDescription = e.getMessage();
+                      // Treat unknown error ingesting data as a transient condition
+                      currentTime = System.currentTimeMillis();
+                      Logging.connectors.warn("Livelink: IO exception ingesting "+contextMsg+":
"+e.getMessage(),e);
+                      throw new ServiceInterruption("IO exception ingesting "+contextMsg+":
"+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,false);
+                    }
+                    readSize = dataSize;
+                  }
+                  else
+                  {
+                    resultCode = "SESSIONLOGINFAILED";
+                    activities.deleteDocument(documentIdentifier,version);
+                  }
+                  break;
+                case HttpStatus.SC_BAD_REQUEST:
+                case HttpStatus.SC_USE_PROXY:
+                case HttpStatus.SC_GONE:
+                  resultCode = "ERROR "+Integer.toString(statusCode);
+                  throw new ManifoldCFException("Unrecoverable request failure; error = "+Integer.toString(statusCode));
+                default:
+                  resultCode = "UNKNOWN";
+                  Logging.connectors.warn("Livelink: Attempt to retrieve document from '"+ingestHttpAddress+"'
received a response of "+Integer.toString(statusCode)+"; retrying in one minute");
+                  currentTime = System.currentTimeMillis();
+                  throw new ServiceInterruption("Fetch failed; retrying in 1 minute",new
ManifoldCFException("Fetch failed with unknown code "+Integer.toString(statusCode)),
+                    currentTime+60000L,currentTime+600000L,-1,true);
+                }
+              }
+              catch (InterruptedException e)
+              {
+                // Drop the connection on the floor
+                methodThread.interrupt();
+                methodThread = null;
+                throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+              }
+              catch (java.net.SocketTimeoutException e)
+              {
+                Logging.connectors.warn("Livelink: Socket timed out reading from the Livelink
HTTP Server "+contextMsg+": "+e.getMessage(), e);
+                resultCode = "TIMEOUT";
+                resultDescription = e.getMessage();
+                currentTime = System.currentTimeMillis();
+                throw new ServiceInterruption("Socket timed out: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,true);
+              }
+              catch (java.net.SocketException e)
+              {
+                Logging.connectors.warn("Livelink: Socket error reading from Livelink HTTP
Server "+contextMsg+": "+e.getMessage(), e);
+                resultCode = "SOCKETERROR";
+                resultDescription = e.getMessage();
+                currentTime = System.currentTimeMillis();
+                throw new ServiceInterruption("Socket error: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,true);
+              }
+              catch (javax.net.ssl.SSLHandshakeException e)
+              {
+                currentTime = System.currentTimeMillis();
+                Logging.connectors.warn("Livelink: SSL handshake failed "+contextMsg+": "+e.getMessage(),e);
+                resultCode = "SSLHANDSHAKEERROR";
+                resultDescription = e.getMessage();
+                throw new ServiceInterruption("SSL handshake error: "+e.getMessage(),e,currentTime+60000L,currentTime+300000L,-1,true);
+              }
+              catch (ConnectTimeoutException e)
+              {
+                Logging.connectors.warn("Livelink: Connect timed out reading from the Livelink
HTTP Server "+contextMsg+": "+e.getMessage(), e);
+                resultCode = "CONNECTTIMEOUT";
+                resultDescription = e.getMessage();
+                currentTime = System.currentTimeMillis();
+                throw new ServiceInterruption("Connect timed out: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,true);
+              }
+              catch (InterruptedIOException e)
+              {
+                methodThread.interrupt();
+                throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+              }
+              catch (HttpException e)
+              {
+                resultCode = "EXCEPTION";
+                resultDescription = e.getMessage();
+                throw new ManifoldCFException("Exception getting response "+contextMsg+":
"+e.getMessage(), e);
+              }
+              catch (IOException e)
+              {
+                resultCode = "EXCEPTION";
+                resultDescription = e.getMessage();
+                throw new ManifoldCFException("Exception getting response "+contextMsg+":
"+e.getMessage(), e);
               }
               finally
               {
-                // Close stream via thread, since otherwise this can hang
-                is.close();
+                if (methodThread != null)
+                {
+                  methodThread.abort();
+                  if (!wasInterrupted)
+                  {
+                    try
+                    {
+                     methodThread.finishUp();
+                    }
+                    catch (InterruptedException e)
+                    {
+                      wasInterrupted = true;
+                      throw new ManifoldCFException(e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+                    }
+                  }
+                }
               }
+              
             }
-            catch (java.net.SocketTimeoutException e)
-            {
-              resultCode = "DATATIMEOUT";
-              resultDescription = e.getMessage();
-              currentTime = System.currentTimeMillis();
-              Logging.connectors.warn("Livelink: Livelink socket timed out ingesting from
the Livelink HTTP Server "+contextMsg+": "+e.getMessage(), e);
-              throw new ServiceInterruption("Socket timed out: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,false);
-            }
-            catch (java.net.SocketException e)
-            {
-              resultCode = "DATASOCKETERROR";
-              resultDescription = e.getMessage();
-              currentTime = System.currentTimeMillis();
-              Logging.connectors.warn("Livelink: Livelink socket error ingesting from the
Livelink HTTP Server "+contextMsg+": "+e.getMessage(), e);
-              throw new ServiceInterruption("Socket error: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,false);
-            }
-            catch (javax.net.ssl.SSLHandshakeException e)
-            {
-              resultCode = "DATASSLHANDSHAKEERROR";
-              resultDescription = e.getMessage();
-              currentTime = System.currentTimeMillis();
-              Logging.connectors.warn("Livelink: SSL handshake failed authenticating "+contextMsg+":
"+e.getMessage(),e);
-              throw new ServiceInterruption("SSL handshake error: "+e.getMessage(),e,currentTime+60000L,currentTime+300000L,-1,true);
-            }
-            catch (ConnectTimeoutException e)
-            {
-              resultCode = "CONNECTTIMEOUT";
-              resultDescription = e.getMessage();
-              currentTime = System.currentTimeMillis();
-              Logging.connectors.warn("Livelink: Livelink socket timed out connecting to
the Livelink HTTP Server "+contextMsg+": "+e.getMessage(), e);
-              throw new ServiceInterruption("Connect timed out: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,false);
-            }
-            catch (InterruptedException e)
-            {
-              throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
-            }
-            catch (InterruptedIOException e)
-            {
-              throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
-            }
-            catch (HttpException e)
-            {
-              resultCode = "HTTPEXCEPTION";
-              resultDescription = e.getMessage();
-              // Treat unknown error ingesting data as a transient condition
-              currentTime = System.currentTimeMillis();
-              Logging.connectors.warn("Livelink: HTTP exception ingesting "+contextMsg+":
"+e.getMessage(),e);
-              throw new ServiceInterruption("HTTP exception ingesting "+contextMsg+": "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,false);
-            }
-            catch (IOException e)
+            else
             {
-              resultCode = "DATAEXCEPTION";
-              resultDescription = e.getMessage();
-              // Treat unknown error ingesting data as a transient condition
-              currentTime = System.currentTimeMillis();
-              Logging.connectors.warn("Livelink: IO exception ingesting "+contextMsg+": "+e.getMessage(),e);
-              throw new ServiceInterruption("IO exception ingesting "+contextMsg+": "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,false);
+              // Use FetchVersion instead
+              long currentTime;
+              
+              // Fire up the document reading thread
+              DocumentReadingThread t = new DocumentReadingThread(vol,objID,0);
+              try 
+              {
+                t.start();
+                try
+                {
+                  InputStream is = t.getSafeInputStream();
+                  try 
+                  {
+                    // Can only index while background thread is running!
+                    rd.setBinary(is, dataSize);
+                    activities.ingestDocument(documentIdentifier, version, viewHttpAddress,
rd);
+                  }
+                  finally
+                  {
+                    is.close();
+                  }
+                }
+                catch (ManifoldCFException e)
+                {
+                  if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
+                    wasInterrupted = true;
+                  throw e;
+                }
+                catch (java.net.SocketTimeoutException e)
+                {
+                  throw e;
+                }
+                catch (InterruptedIOException e)
+                {
+                  wasInterrupted = true;
+                  throw e;
+                }
+                finally
+                {
+                  if (!wasInterrupted)
+                    t.finishUp();
+                }
+
+                // No errors.  Record the fact that we made it.
+                resultCode = "OK";
+                readSize = dataSize;
+              }
+              catch (InterruptedException e) 
+              {
+                t.interrupt();
+                throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
+                  ManifoldCFException.INTERRUPTED);
+              }
+              catch (ConnectTimeoutException e)
+              {
+                Logging.connectors.warn("Livelink: Connect timed out "+contextMsg+": "+e.getMessage(),
e);
+                resultCode = "CONNECTTIMEOUT";
+                resultDescription = e.getMessage();
+                currentTime = System.currentTimeMillis();
+                throw new ServiceInterruption("Connect timed out: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,true);
+              }
+              catch (InterruptedIOException e)
+              {
+                t.interrupt();
+                throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+              }
+              catch (IOException e)
+              {
+                resultCode = "EXCEPTION";
+                resultDescription = e.getMessage();
+                throw new ManifoldCFException("Exception getting response "+contextMsg+":
"+e.getMessage(), e);
+              }
+              catch (ManifoldCFException e)
+              {
+                if (e.getErrorCode() != ManifoldCFException.INTERRUPTED)
+                {
+                  resultCode = "EXCEPTION";
+                  resultDescription = e.getMessage();
+                }
+                throw e;
+              }
+              catch (RuntimeException e)
+              {
+                resultCode = "EXCEPTION";
+                resultDescription = e.getMessage();
+                handleLivelinkRuntimeException(e,0,true);
+              }
             }
-            readSize = new Long(dataSize);
           }
           else
           {
+            // Document not indexable because of its length
+            resultDescription = "Document length ("+dataSize+") was rejected by output connector";
+            if (Logging.connectors.isDebugEnabled())
+              Logging.connectors.debug("Livelink: Excluding document "+documentIdentifier+"
because its length ("+dataSize+") was rejected by output connector");
             resultCode = "DOCUMENTTOOLONG";
             activities.deleteDocument(documentIdentifier,version);
           }
         }
         else
         {
-          resultCode = "SESSIONLOGINFAILED";
+          // Document not indexable because of its mime type
+          resultDescription = "Mime type ("+mimeType+") was rejected by output connector";
+          if (Logging.connectors.isDebugEnabled())
+            Logging.connectors.debug("Livelink: Excluding document "+documentIdentifier+"
because its mime type ("+mimeType+") was rejected by output connector");
+          resultCode = "MIMETYPEEXCLUSION";
           activities.deleteDocument(documentIdentifier,version);
         }
-        break;
-      case HttpStatus.SC_BAD_REQUEST:
-      case HttpStatus.SC_USE_PROXY:
-      case HttpStatus.SC_GONE:
-        resultCode = "ERROR "+Integer.toString(statusCode);
-        throw new ManifoldCFException("Unrecoverable request failure; error = "+Integer.toString(statusCode));
-      default:
-        resultCode = "UNKNOWN";
-        Logging.connectors.warn("Livelink: Attempt to retrieve document from '"+ingestHttpAddress+"'
received a response of "+Integer.toString(statusCode)+"; retrying in one minute");
-        currentTime = System.currentTimeMillis();
-        throw new ServiceInterruption("Fetch failed; retrying in 1 minute",new ManifoldCFException("Fetch
failed with unknown code "+Integer.toString(statusCode)),
-          currentTime+60000L,currentTime+600000L,-1,true);
       }
-    }
-    catch (InterruptedException e)
-    {
-      // Drop the connection on the floor
-      methodThread.interrupt();
-      methodThread = null;
-      throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
-    }
-    catch (java.net.SocketTimeoutException e)
-    {
-      Logging.connectors.warn("Livelink: Socket timed out reading from the Livelink HTTP
Server "+contextMsg+": "+e.getMessage(), e);
-      resultCode = "TIMEOUT";
-      resultDescription = e.getMessage();
-      currentTime = System.currentTimeMillis();
-      throw new ServiceInterruption("Socket timed out: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,true);
-    }
-    catch (java.net.SocketException e)
-    {
-      Logging.connectors.warn("Livelink: Socket error reading from Livelink HTTP Server "+contextMsg+":
"+e.getMessage(), e);
-      resultCode = "SOCKETERROR";
-      resultDescription = e.getMessage();
-      currentTime = System.currentTimeMillis();
-      throw new ServiceInterruption("Socket error: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,true);
-    }
-    catch (javax.net.ssl.SSLHandshakeException e)
-    {
-      currentTime = System.currentTimeMillis();
-      Logging.connectors.warn("Livelink: SSL handshake failed "+contextMsg+": "+e.getMessage(),e);
-      resultCode = "SSLHANDSHAKEERROR";
-      resultDescription = e.getMessage();
-      throw new ServiceInterruption("SSL handshake error: "+e.getMessage(),e,currentTime+60000L,currentTime+300000L,-1,true);
-    }
-    catch (ConnectTimeoutException e)
-    {
-      Logging.connectors.warn("Livelink: Connect timed out reading from the Livelink HTTP
Server "+contextMsg+": "+e.getMessage(), e);
-      resultCode = "CONNECTTIMEOUT";
-      resultDescription = e.getMessage();
-      currentTime = System.currentTimeMillis();
-      throw new ServiceInterruption("Connect timed out: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,true);
-    }
-    catch (InterruptedIOException e)
-    {
-      throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
-    }
-    catch (HttpException e)
-    {
-      resultCode = "EXCEPTION";
-      resultDescription = e.getMessage();
-      throw new ManifoldCFException("Exception getting response "+contextMsg+": "+e.getMessage(),
e);
-    }
-    catch (IOException e)
-    {
-      resultCode = "EXCEPTION";
-      resultDescription = e.getMessage();
-      throw new ManifoldCFException("Exception getting response "+contextMsg+": "+e.getMessage(),
e);
+      else
+      {
+        // Document not ingestable due to URL
+        resultDescription = "URL ("+viewHttpAddress+") was rejected by output connector";
+        if (Logging.connectors.isDebugEnabled())
+          Logging.connectors.debug("Livelink: Excluding document "+documentIdentifier+" because
its URL ("+viewHttpAddress+") was rejected by output connector");
+        resultCode = "URLEXCLUSION";
+        activities.deleteDocument(documentIdentifier,version);
+      }
     }
     finally
     {
-      if (methodThread != null)
-      {
-        methodThread.abort();
-        activities.recordActivity(new Long(startTime),ACTIVITY_FETCH,readSize,Integer.toString(objID),resultCode,resultDescription,null);
-        try
-        {
-         methodThread.finishUp();
-        }
-        catch (InterruptedException e)
-        {
-          throw new ManifoldCFException(e.getMessage(),e,ManifoldCFException.INTERRUPTED);
-        }
-      }
+      if (!wasInterrupted)
+        activities.recordActivity(new Long(startTime),ACTIVITY_FETCH,readSize,vol+":"+objID,resultCode,resultDescription,null);
     }
   }
 
@@ -5432,6 +5574,17 @@ public class LivelinkConnector extends o
       return getVersionValue() != null;
     }
     
+    /** Get data size.
+    */
+    public Long getDataSize()
+      throws ServiceInterruption, ManifoldCFException
+    {
+      LLValue elem = getVersionValue();
+      if (elem == null)
+        return null;
+      return new Long(elem.toInteger("DATASIZE"));
+    }
+
     /** Get file name.
     */
     public String getFileName()
@@ -7306,8 +7459,11 @@ public class LivelinkConnector extends o
         XThreadOutputStream outputStream = new XThreadOutputStream(stream);
         try 
         {
-          int rval = LLDocs.FetchVersion(volumeID, docID, versionNumber, outputStream);
-          // MHL to do something with rval
+          int status = LLDocs.FetchVersion(volumeID, docID, versionNumber, outputStream);
+          if (status != 0)
+          {
+            throw new ManifoldCFException("Error retrieving contents of document "+Integer.toString(volumeID)+":"+Integer.toString(docID)+"
revision "+versionNumber+" : Status="+Integer.toString(status)+" ("+llServer.getErrors()+")");
+          }
         }
         finally
         {
@@ -7323,7 +7479,7 @@ public class LivelinkConnector extends o
     }
     
     public void finishUp()
-      throws InterruptedException, IOException
+      throws InterruptedException, ManifoldCFException
     {
       // This will be called during the finally
       // block in the case where all is well (and
@@ -7333,8 +7489,8 @@ public class LivelinkConnector extends o
       join();
       Throwable thr = exception;
       if (thr != null) {
-        if (thr instanceof IOException)
-          throw (IOException) thr;
+        if (thr instanceof ManifoldCFException)
+          throw (ManifoldCFException) thr;
         else if (thr instanceof RuntimeException)
           throw (RuntimeException) thr;
         else if (thr instanceof Error)



Mime
View raw message