incubator-connectors-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1140925 - in /incubator/lcf/trunk/connectors: livelink/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/livelink/ meridio/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/meridio/ sharepoint/connector/src/ma...
Date Wed, 29 Jun 2011 00:56:26 GMT
Author: kwright
Date: Wed Jun 29 00:56:26 2011
New Revision: 1140925

URL: http://svn.apache.org/viewvc?rev=1140925&view=rev
Log:
Hook up length check in the remaining connectors.

Modified:
    incubator/lcf/trunk/connectors/livelink/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/livelink/LivelinkConnector.java
    incubator/lcf/trunk/connectors/meridio/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/meridio/MeridioConnector.java
    incubator/lcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java

Modified: incubator/lcf/trunk/connectors/livelink/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/livelink/LivelinkConnector.java
URL: http://svn.apache.org/viewvc/incubator/lcf/trunk/connectors/livelink/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/livelink/LivelinkConnector.java?rev=1140925&r1=1140924&r2=1140925&view=diff
==============================================================================
--- incubator/lcf/trunk/connectors/livelink/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/livelink/LivelinkConnector.java
(original)
+++ incubator/lcf/trunk/connectors/livelink/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/livelink/LivelinkConnector.java
Wed Jun 29 00:56:26 2011
@@ -3868,6 +3868,7 @@ public class LivelinkConnector extends o
           // Since we logged in, we should fail here if the ingestion user doesn't have access
to the
           // the document, but if we do, don't fail hard.
           resultCode = "UNAUTHORIZED";
+          activities.deleteDocument(documentIdentifier,version);
           return;
 
         case HttpStatus.SC_OK:
@@ -3881,80 +3882,88 @@ public class LivelinkConnector extends o
           {
             if (Logging.connectors.isDebugEnabled())
               Logging.connectors.debug("Livelink: Content length from livelink server "+contextMsg+"'
= "+new Long(dataSize).toString());
-
-            try
+            if (activities.checkLengthIndexable(dataSize))
             {
-              InputStream is = method.getResponseBodyAsStream();
               try
               {
-                rd.setBinary(is,dataSize);
-
-                activities.ingestDocument(documentIdentifier,version,viewHttpAddress,rd);
-
-                if (Logging.connectors.isDebugEnabled())
-                  Logging.connectors.debug("Livelink: Ingesting done "+contextMsg);
-
+                InputStream is = method.getResponseBodyAsStream();
+                try
+                {
+                  rd.setBinary(is,dataSize);
+
+                  activities.ingestDocument(documentIdentifier,version,viewHttpAddress,rd);
+
+                  if (Logging.connectors.isDebugEnabled())
+                    Logging.connectors.debug("Livelink: Ingesting done "+contextMsg);
+
+                }
+                finally
+                {
+                  // Close stream via thread, since otherwise this can hang
+                  closeViaThread(is);
+                }
               }
-              finally
+              catch (java.net.SocketTimeoutException e)
               {
-                // Close stream via thread, since otherwise this can hang
-                closeViaThread(is);
+                resultCode = "DATATIMEOUT";
+                resultDescription = e.getMessage();
+                currentTime = System.currentTimeMillis();
+                Logging.connectors.warn("Livelink: Livelink socket timed out ingesting from
the Livelink HTTP Server "+contextMsg+": "+e.getMessage(), e);
+                throw new ServiceInterruption("Socket timed out: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,false);
               }
+              catch (java.net.SocketException e)
+              {
+                resultCode = "DATASOCKETERROR";
+                resultDescription = e.getMessage();
+                currentTime = System.currentTimeMillis();
+                Logging.connectors.warn("Livelink: Livelink socket error ingesting from the
Livelink HTTP Server "+contextMsg+": "+e.getMessage(), e);
+                throw new ServiceInterruption("Socket error: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,false);
+              }
+              catch (javax.net.ssl.SSLHandshakeException e)
+              {
+                resultCode = "DATASSLHANDSHAKEERROR";
+                resultDescription = e.getMessage();
+                currentTime = System.currentTimeMillis();
+                Logging.connectors.warn("Livelink: SSL handshake failed authenticating "+contextMsg+":
"+e.getMessage(),e);
+                throw new ServiceInterruption("SSL handshake error: "+e.getMessage(),e,currentTime+60000L,currentTime+300000L,-1,true);
+              }
+              catch (org.apache.commons.httpclient.ConnectTimeoutException e)
+              {
+                resultCode = "CONNECTTIMEOUT";
+                resultDescription = e.getMessage();
+                currentTime = System.currentTimeMillis();
+                Logging.connectors.warn("Livelink: Livelink socket timed out connecting to
the Livelink HTTP Server "+contextMsg+": "+e.getMessage(), e);
+                throw new ServiceInterruption("Connect timed out: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,false);
+              }
+              catch (InterruptedException e)
+              {
+                throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+              }
+              catch (InterruptedIOException e)
+              {
+                throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+              }
+              catch (IOException e)
+              {
+                resultCode = "DATAEXCEPTION";
+                resultDescription = e.getMessage();
+                // Treat unknown error ingesting data as a transient condition
+                currentTime = System.currentTimeMillis();
+                Logging.connectors.warn("Livelink: IO exception ingesting "+contextMsg+":
"+e.getMessage(),e);
+                throw new ServiceInterruption("IO exception ingesting "+contextMsg+": "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,false);
+              }
+              readSize = new Long(dataSize);
             }
-            catch (java.net.SocketTimeoutException e)
-            {
-              resultCode = "DATATIMEOUT";
-              resultDescription = e.getMessage();
-              currentTime = System.currentTimeMillis();
-              Logging.connectors.warn("Livelink: Livelink socket timed out ingesting from
the Livelink HTTP Server "+contextMsg+": "+e.getMessage(), e);
-              throw new ServiceInterruption("Socket timed out: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,false);
-            }
-            catch (java.net.SocketException e)
-            {
-              resultCode = "DATASOCKETERROR";
-              resultDescription = e.getMessage();
-              currentTime = System.currentTimeMillis();
-              Logging.connectors.warn("Livelink: Livelink socket error ingesting from the
Livelink HTTP Server "+contextMsg+": "+e.getMessage(), e);
-              throw new ServiceInterruption("Socket error: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,false);
-            }
-            catch (javax.net.ssl.SSLHandshakeException e)
-            {
-              resultCode = "DATASSLHANDSHAKEERROR";
-              resultDescription = e.getMessage();
-              currentTime = System.currentTimeMillis();
-              Logging.connectors.warn("Livelink: SSL handshake failed authenticating "+contextMsg+":
"+e.getMessage(),e);
-              throw new ServiceInterruption("SSL handshake error: "+e.getMessage(),e,currentTime+60000L,currentTime+300000L,-1,true);
-            }
-            catch (org.apache.commons.httpclient.ConnectTimeoutException e)
-            {
-              resultCode = "CONNECTTIMEOUT";
-              resultDescription = e.getMessage();
-              currentTime = System.currentTimeMillis();
-              Logging.connectors.warn("Livelink: Livelink socket timed out connecting to
the Livelink HTTP Server "+contextMsg+": "+e.getMessage(), e);
-              throw new ServiceInterruption("Connect timed out: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,false);
-            }
-            catch (InterruptedException e)
-            {
-              throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
-            }
-            catch (InterruptedIOException e)
-            {
-              throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
-            }
-            catch (IOException e)
+            else
             {
-              resultCode = "DATAEXCEPTION";
-              resultDescription = e.getMessage();
-              // Treat unknown error ingesting data as a transient condition
-              currentTime = System.currentTimeMillis();
-              Logging.connectors.warn("Livelink: IO exception ingesting "+contextMsg+": "+e.getMessage(),e);
-              throw new ServiceInterruption("IO exception ingesting "+contextMsg+": "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,false);
+              resultCode = "DOCUMENTTOOLONG";
+              activities.deleteDocument(documentIdentifier,version);
             }
-            readSize = new Long(dataSize);
           }
           else
           {
             resultCode = "SESSIONLOGINFAILED";
+            activities.deleteDocument(documentIdentifier,version);
           }
           break;
         case HttpStatus.SC_BAD_REQUEST:

Modified: incubator/lcf/trunk/connectors/meridio/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/meridio/MeridioConnector.java
URL: http://svn.apache.org/viewvc/incubator/lcf/trunk/connectors/meridio/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/meridio/MeridioConnector.java?rev=1140925&r1=1140924&r2=1140925&view=diff
==============================================================================
--- incubator/lcf/trunk/connectors/meridio/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/meridio/MeridioConnector.java
(original)
+++ incubator/lcf/trunk/connectors/meridio/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/meridio/MeridioConnector.java
Wed Jun 29 00:56:26 2011
@@ -1361,26 +1361,32 @@ public class MeridioConnector extends or
               if (theTempFile.isFile())
               {
                 long fileSize = theTempFile.length();                   // ap.getSize();
-                InputStream is = new FileInputStream(theTempFile);      // ap.getDataHandler().getInputStream();
-                try
+                if (activities.checkLengthIndexable(fileSize))
                 {
-                  repositoryDocument.setBinary(is, fileSize);
+                  InputStream is = new FileInputStream(theTempFile);      // ap.getDataHandler().getInputStream();
+                  try
+                  {
+                    repositoryDocument.setBinary(is, fileSize);
 
-                  if (null != activities)
+                    if (null != activities)
+                    {
+                      activities.ingestDocument(documentIdentifier, docVersion,
+                        fileURL, repositoryDocument);
+                    }
+                  }
+                  finally
                   {
-                    activities.ingestDocument(documentIdentifier, docVersion,
-                      fileURL, repositoryDocument);
+                    is.close();
                   }
                 }
-                finally
-                {
-                  is.close();
-                }
+                else
+                  activities.deleteDocument(documentIdentifier, docVersion);
               }
               else
               {
                 if (Logging.connectors.isDebugEnabled())
                   Logging.connectors.debug("Meridio: Expected temporary file was not present
- skipping document '"+new Long(docId).toString() + "'");
+                activities.deleteDocument(documentIdentifier, docVersion);
               }
             }
             finally

Modified: incubator/lcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
URL: http://svn.apache.org/viewvc/incubator/lcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java?rev=1140925&r1=1140924&r2=1140925&view=diff
==============================================================================
--- incubator/lcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
(original)
+++ incubator/lcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
Wed Jun 29 00:56:26 2011
@@ -1178,129 +1178,137 @@ public class SharePointRepository extend
                 {
                   os.close();
                 }
-                InputStream is = new FileInputStream(tempFile);
-                try
+                
+                long documentLength = tempFile.length();
+                if (activities.checkLengthIndexable(documentLength))
                 {
-                  RepositoryDocument data = new RepositoryDocument();
-                  data.setBinary( is, tempFile.length() );
-
-                  if (acls != null)
+                  InputStream is = new FileInputStream(tempFile);
+                  try
                   {
-                    String[] actualAcls = new String[acls.size()];
-                    int j = 0;
-                    while (j < actualAcls.length)
-                    {
-                      actualAcls[j] = (String)acls.get(j);
-                      j++;
-                    }
+                    RepositoryDocument data = new RepositoryDocument();
+                    data.setBinary( is, documentLength );
 
-                    if (Logging.connectors.isDebugEnabled())
+                    if (acls != null)
                     {
-                      j = 0;
-                      StringBuilder sb = new StringBuilder("SharePoint: Acls: [ ");
+                      String[] actualAcls = new String[acls.size()];
+                      int j = 0;
                       while (j < actualAcls.length)
                       {
-                        sb.append(actualAcls[j++]).append(" ");
+                        actualAcls[j] = (String)acls.get(j);
+                        j++;
                       }
-                      sb.append("]");
-                      Logging.connectors.debug( sb.toString() );
-                    }
-
-                    data.setACL( actualAcls );
-                  }
 
-                  if (denyAcl != null)
-                  {
-                    String[] actualDenyAcls = new String[]{denyAcl};
-                    data.setDenyACL(actualDenyAcls);
-                  }
+                      if (Logging.connectors.isDebugEnabled())
+                      {
+                        j = 0;
+                        StringBuilder sb = new StringBuilder("SharePoint: Acls: [ ");
+                        while (j < actualAcls.length)
+                        {
+                          sb.append(actualAcls[j++]).append(" ");
+                        }
+                        sb.append("]");
+                        Logging.connectors.debug( sb.toString() );
+                      }
 
-                  // Add the path metadata item into the mix, if enabled
-                  String pathAttributeName = sDesc.getPathAttributeName();
-                  if (pathAttributeName != null && pathAttributeName.length() >
0)
-                  {
-                    if (Logging.connectors.isDebugEnabled())
-                      Logging.connectors.debug("SharePoint: Path attribute name is '"+pathAttributeName+"'");
-                    String pathString = sDesc.getPathAttributeValue(documentIdentifier);
-                    if (Logging.connectors.isDebugEnabled())
-                      Logging.connectors.debug("SharePoint: Path attribute value is '"+pathString+"'");
-                    data.addField(pathAttributeName,pathString);
-                  }
-                  else
-                    Logging.connectors.debug("SharePoint: Path attribute name is null");
+                      data.setACL( actualAcls );
+                    }
 
-                  // Retrieve field values from SharePoint
-                  if (metadataDescription.size() > 0)
-                  {
-                    String documentLibID = (String)docLibIDMap.get(decodedLibPath);
-                    if (documentLibID == null)
+                    if (denyAcl != null)
                     {
-                      documentLibID = proxy.getDocLibID( encodePath(site), site, libName);
-                      if (documentLibID == null)
-                        documentLibID = "";
-                      docLibIDMap.put(decodedLibPath,documentLibID);
+                      String[] actualDenyAcls = new String[]{denyAcl};
+                      data.setDenyACL(actualDenyAcls);
                     }
 
-                    if (documentLibID.length() == 0)
+                    // Add the path metadata item into the mix, if enabled
+                    String pathAttributeName = sDesc.getPathAttributeName();
+                    if (pathAttributeName != null && pathAttributeName.length() >
0)
                     {
                       if (Logging.connectors.isDebugEnabled())
-                        Logging.connectors.debug("SharePoint: Library '"+decodedLibPath+"'
no longer exists - deleting document '"+documentIdentifier+"'");
-                      activities.deleteDocument(documentIdentifier,version);
-                      i++;
-                      continue;
+                        Logging.connectors.debug("SharePoint: Path attribute name is '"+pathAttributeName+"'");
+                      String pathString = sDesc.getPathAttributeValue(documentIdentifier);
+                      if (Logging.connectors.isDebugEnabled())
+                        Logging.connectors.debug("SharePoint: Path attribute value is '"+pathString+"'");
+                      data.addField(pathAttributeName,pathString);
                     }
+                    else
+                      Logging.connectors.debug("SharePoint: Path attribute name is null");
 
-                    int cutoff = decodedLibPath.lastIndexOf("/");
-                    Map values = proxy.getFieldValues( metadataDescription, encodePath(site),
documentLibID, decodedDocumentPath.substring(cutoff+1) );
-                    if (values != null)
+                    // Retrieve field values from SharePoint
+                    if (metadataDescription.size() > 0)
                     {
-                      Iterator iter = values.keySet().iterator();
-                      while (iter.hasNext())
+                      String documentLibID = (String)docLibIDMap.get(decodedLibPath);
+                      if (documentLibID == null)
                       {
-                        String fieldName = (String)iter.next();
-                        String fieldData = (String)values.get(fieldName);
-                        data.addField(fieldName,fieldData);
+                        documentLibID = proxy.getDocLibID( encodePath(site), site, libName);
+                        if (documentLibID == null)
+                          documentLibID = "";
+                        docLibIDMap.put(decodedLibPath,documentLibID);
+                      }
+
+                      if (documentLibID.length() == 0)
+                      {
+                        if (Logging.connectors.isDebugEnabled())
+                          Logging.connectors.debug("SharePoint: Library '"+decodedLibPath+"'
no longer exists - deleting document '"+documentIdentifier+"'");
+                        activities.deleteDocument(documentIdentifier,version);
+                        i++;
+                        continue;
+                      }
+
+                      int cutoff = decodedLibPath.lastIndexOf("/");
+                      Map values = proxy.getFieldValues( metadataDescription, encodePath(site),
documentLibID, decodedDocumentPath.substring(cutoff+1) );
+                      if (values != null)
+                      {
+                        Iterator iter = values.keySet().iterator();
+                        while (iter.hasNext())
+                        {
+                          String fieldName = (String)iter.next();
+                          String fieldData = (String)values.get(fieldName);
+                          data.addField(fieldName,fieldData);
+                        }
+                      }
+                      else
+                      {
+                        // Document has vanished
+                        if (Logging.connectors.isDebugEnabled())
+                          Logging.connectors.debug("SharePoint: Document metadata fetch failure
indicated that document is gone: '"+documentIdentifier+"' - removing");
+                        activities.deleteDocument(documentIdentifier,version);
+                        i++;
+                        continue;
                       }
                     }
-                    else
-                    {
-                      // Document has vanished
-                      if (Logging.connectors.isDebugEnabled())
-                        Logging.connectors.debug("SharePoint: Document metadata fetch failure
indicated that document is gone: '"+documentIdentifier+"' - removing");
-                      activities.deleteDocument(documentIdentifier,version);
-                      i++;
-                      continue;
-                    }
-                  }
 
-                  activities.ingestDocument( documentIdentifier, version, fileUrl , data
);
-                }
-                finally
-                {
-                  try
-                  {
-                    is.close();
-                  }
-                  catch (java.net.SocketTimeoutException e)
-                  {
-                    // This is not fatal
-                    Logging.connectors.debug("SharePoint: Timeout before read could finish
for '"+fileUrl+"': "+e.getMessage(),e);
-                  }
-                  catch (org.apache.commons.httpclient.ConnectTimeoutException e)
-                  {
-                    // This is not fatal
-                    Logging.connectors.debug("SharePoint: Connect timeout before read could
finish for '"+fileUrl+"': "+e.getMessage(),e);
-                  }
-                  catch (InterruptedIOException e)
-                  {
-                    throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+                    activities.ingestDocument( documentIdentifier, version, fileUrl , data
);
                   }
-                  catch (IOException e)
+                  finally
                   {
-                    // This is not fatal
-                    Logging.connectors.debug("SharePoint: Server closed connection before
read could finish for '"+fileUrl+"': "+e.getMessage(),e);
+                    try
+                    {
+                      is.close();
+                    }
+                    catch (java.net.SocketTimeoutException e)
+                    {
+                      // This is not fatal
+                      Logging.connectors.debug("SharePoint: Timeout before read could finish
for '"+fileUrl+"': "+e.getMessage(),e);
+                    }
+                    catch (org.apache.commons.httpclient.ConnectTimeoutException e)
+                    {
+                      // This is not fatal
+                      Logging.connectors.debug("SharePoint: Connect timeout before read could
finish for '"+fileUrl+"': "+e.getMessage(),e);
+                    }
+                    catch (InterruptedIOException e)
+                    {
+                      throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+                    }
+                    catch (IOException e)
+                    {
+                      // This is not fatal
+                      Logging.connectors.debug("SharePoint: Server closed connection before
read could finish for '"+fileUrl+"': "+e.getMessage(),e);
+                    }
                   }
                 }
+                else
+                  // Document too long
+                  activities.deleteDocument( documentIdentifier, version );
               }
               finally
               {



Mime
View raw message