incubator-connectors-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1140728 - in /incubator/lcf/trunk/connectors: documentum/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/DCTM/ filenet/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filenet/ filesystem/connector/src/main...
Date Tue, 28 Jun 2011 17:10:05 GMT
Author: kwright
Date: Tue Jun 28 17:10:04 2011
New Revision: 1140728

URL: http://svn.apache.org/viewvc?rev=1140728&view=rev
Log:
Add length restrictions to documentum, filenet, and file connectors.

Modified:
    incubator/lcf/trunk/connectors/documentum/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/DCTM/DCTM.java
    incubator/lcf/trunk/connectors/filenet/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filenet/FilenetConnector.java
    incubator/lcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java
    incubator/lcf/trunk/connectors/jcifs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharedrive/SharedDriveConnector.java

Modified: incubator/lcf/trunk/connectors/documentum/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/DCTM/DCTM.java
URL: http://svn.apache.org/viewvc/incubator/lcf/trunk/connectors/documentum/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/DCTM/DCTM.java?rev=1140728&r1=1140727&r2=1140728&view=diff
==============================================================================
--- incubator/lcf/trunk/connectors/documentum/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/DCTM/DCTM.java
(original)
+++ incubator/lcf/trunk/connectors/documentum/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/DCTM/DCTM.java
Tue Jun 28 17:10:04 2011
@@ -1775,21 +1775,30 @@ public class DCTM extends org.apache.man
                 RepositoryDocument rd = t.getResponse();
                 if (rd != null)
                 {
-                  // Stream the data to the ingestion system
-                  InputStream is = new FileInputStream(objFileTemp);
-                  try
+                  long fileLength = t.getActivityFileLength().longValue();
+                  if (activities.checkLengthIndexable(fileLength))
                   {
-                    rd.setBinary(is, t.getActivityFileLength().longValue());
-                    // Do the ingestion
-                    activities.ingestDocument(documentIdentifier,versionString,
-                      t.getURI(), rd);
-                  }
-                  finally
-                  {
-                    is.close();
+                    // Stream the data to the ingestion system
+                    InputStream is = new FileInputStream(objFileTemp);
+                    try
+                    {
+                      rd.setBinary(is, fileLength);
+                      // Do the ingestion
+                      activities.ingestDocument(documentIdentifier,versionString,
+                        t.getURI(), rd);
+                    }
+                    finally
+                    {
+                      is.close();
+                    }
                   }
+                  else
+                    rd = null;
                 }
-
+                
+                if (rd == null)
+                  activities.deleteDocument(documentIdentifier,versionString);
+                
                 // Abort the retry loop and go on to the next document
                 break;
 

Modified: incubator/lcf/trunk/connectors/filenet/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filenet/FilenetConnector.java
URL: http://svn.apache.org/viewvc/incubator/lcf/trunk/connectors/filenet/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filenet/FilenetConnector.java?rev=1140728&r1=1140727&r2=1140728&view=diff
==============================================================================
--- incubator/lcf/trunk/connectors/filenet/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filenet/FilenetConnector.java
(original)
+++ incubator/lcf/trunk/connectors/filenet/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filenet/FilenetConnector.java
Tue Jun 28 17:10:04 2011
@@ -1268,66 +1268,72 @@ public class FilenetConnector extends or
               activities.recordActivity(new Long(startTime),ACTIVITY_FETCH,
                 new Long(fileLength),documentIdentifier,"Success",null,null);
 
-              InputStream is = new FileInputStream(objFileTemp);
-              try
+              if (activities.checkLengthIndexable(fileLength))
               {
-                RepositoryDocument rd = new RepositoryDocument();
-                rd.setBinary(is, fileLength);
 
-                // Apply metadata
-                int j = 0;
-                while (j < metadataNames.size())
+                InputStream is = new FileInputStream(objFileTemp);
+                try
                 {
-                  String metadataName = (String)metadataNames.get(j);
-                  String metadataValue = (String)metadataValues.get(j);
-                  rd.addField(metadataName,metadataValue);
-                  j++;
-                }
+                  RepositoryDocument rd = new RepositoryDocument();
+                  rd.setBinary(is, fileLength);
 
-                // Apply acls
-                if (aclValues != null)
-                {
-                  String[] acls = new String[aclValues.size()];
-                  j = 0;
-                  while (j < aclValues.size())
+                  // Apply metadata
+                  int j = 0;
+                  while (j < metadataNames.size())
                   {
-                    acls[j] = (String)aclValues.get(j);
+                    String metadataName = (String)metadataNames.get(j);
+                    String metadataValue = (String)metadataValues.get(j);
+                    rd.addField(metadataName,metadataValue);
                     j++;
                   }
-                  rd.setACL(acls);
-                }
-                if (denyAclValues != null)
-                {
-                  String[] denyAcls = new String[denyAclValues.size()];
-                  j = 0;
-                  while (j < denyAclValues.size())
+
+                  // Apply acls
+                  if (aclValues != null)
                   {
-                    denyAcls[j] = (String)denyAclValues.get(j);
-                    j++;
+                    String[] acls = new String[aclValues.size()];
+                    j = 0;
+                    while (j < aclValues.size())
+                    {
+                      acls[j] = (String)aclValues.get(j);
+                      j++;
+                    }
+                    rd.setACL(acls);
+                  }
+                  if (denyAclValues != null)
+                  {
+                    String[] denyAcls = new String[denyAclValues.size()];
+                    j = 0;
+                    while (j < denyAclValues.size())
+                    {
+                      denyAcls[j] = (String)denyAclValues.get(j);
+                      j++;
+                    }
+                    rd.setDenyACL(denyAcls);
                   }
-                  rd.setDenyACL(denyAcls);
-                }
 
-                // Ingest
-                activities.ingestDocument(documentIdentifier,documentVersion,
-                  convertToURI(urlBase.toString(),vId,elementNumber,documentClass.toString()),rd);
+                  // Ingest
+                  activities.ingestDocument(documentIdentifier,documentVersion,
+                    convertToURI(urlBase.toString(),vId,elementNumber,documentClass.toString()),rd);
 
-              }
-              finally
-              {
-                try
-                {
-                  is.close();
                 }
-                catch (InterruptedIOException e)
+                finally
                 {
-                  throw new ManifoldCFException(e.getMessage(),e,ManifoldCFException.INTERRUPTED);
-                }
-                catch (IOException e)
-                {
-                  Logging.connectors.warn("FileNet: IOException closing file input stream:
"+e.getMessage(),e);
+                  try
+                  {
+                    is.close();
+                  }
+                  catch (InterruptedIOException e)
+                  {
+                    throw new ManifoldCFException(e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+                  }
+                  catch (IOException e)
+                  {
+                    Logging.connectors.warn("FileNet: IOException closing file input stream:
"+e.getMessage(),e);
+                  }
                 }
               }
+              else
+                activities.deleteDocument(documentIdentifier,documentVersion);
             }
             finally
             {

Modified: incubator/lcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java
URL: http://svn.apache.org/viewvc/incubator/lcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java?rev=1140728&r1=1140727&r2=1140728&view=diff
==============================================================================
--- incubator/lcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java
(original)
+++ incubator/lcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java
Tue Jun 28 17:10:04 2011
@@ -132,15 +132,25 @@ public class FileConnector extends org.a
 
 
   /** Get document versions given an array of document identifiers.
-  * This method is called for EVERY document that is considered. It is
-  * therefore important to perform as little work as possible here.
+  * This method is called for EVERY document that is considered. It is therefore important
to perform
+  * as little work as possible here.
+  * The connector will be connected before this method can be called.
   *@param documentIdentifiers is the array of local document identifiers, as understood by
this connector.
+  *@param oldVersions is the corresponding array of version strings that have been saved
for the document identifiers.
+  *   A null value indicates that this is a first-time fetch, while an empty string indicates
that the previous document
+  *   had an empty version string.
+  *@param activities is the interface this method should use to perform whatever framework
actions are desired.
+  *@param spec is the current document specification for the current job.  If there is a
dependency on this
+  * specification, then the version string should include the pertinent data, so that reingestion
will occur
+  * when the specification changes.  This is primarily useful for metadata.
+  *@param jobMode is an integer describing how the job is being run, whether continuous or
once-only.
+  *@param usesDefaultAuthority will be true only if the authority in use for these documents
is the default one.
   *@return the corresponding version strings, with null in the places where the document
no longer exists.
   * Empty version strings indicate that there is no versioning ability for the corresponding
document, and the document
   * will always be processed.
   */
-  @Override
-  public String[] getDocumentVersions(String[] documentIdentifiers, DocumentSpecification
spec)
+  public String[] getDocumentVersions(String[] documentIdentifiers, String[] oldVersions,
IVersionActivity activities,
+    DocumentSpecification spec, int jobMode, boolean usesDefaultAuthority)
     throws ManifoldCFException, ServiceInterruption
   {
     String[] rval = new String[documentIdentifiers.length];
@@ -163,12 +173,17 @@ public class FileConnector extends org.a
         else
         {
           // It's a file
-          // Get the file's modified date.
-          long lastModified = file.lastModified();
           long fileLength = file.length();
-          StringBuilder sb = new StringBuilder();
-          sb.append(new Long(lastModified).toString()).append(":").append(new Long(fileLength).toString());
-          rval[i] = sb.toString();
+          if (activities.checkLengthIndexable(fileLength))
+          {
+            // Get the file's modified date.
+            long lastModified = file.lastModified();
+            StringBuilder sb = new StringBuilder();
+            sb.append(new Long(lastModified).toString()).append(":").append(new Long(fileLength).toString());
+            rval[i] = sb.toString();
+          }
+          else
+            rval[i] = null;
         }
       }
       else

Modified: incubator/lcf/trunk/connectors/jcifs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharedrive/SharedDriveConnector.java
URL: http://svn.apache.org/viewvc/incubator/lcf/trunk/connectors/jcifs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharedrive/SharedDriveConnector.java?rev=1140728&r1=1140727&r2=1140728&view=diff
==============================================================================
--- incubator/lcf/trunk/connectors/jcifs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharedrive/SharedDriveConnector.java
(original)
+++ incubator/lcf/trunk/connectors/jcifs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharedrive/SharedDriveConnector.java
Tue Jun 28 17:10:04 2011
@@ -518,7 +518,7 @@ public class SharedDriveConnector extend
         // null, it means that the windows permissions are not right and directory/file is
not readable!!!
         String newPath = getFileCanonicalPath(file);
         // We MUST check the specification here, otherwise a recrawl may not delete what
it's supposed to!
-        if (fileExists(file) && newPath != null && checkInclude(file,newPath,spec))
+        if (fileExists(file) && newPath != null && checkInclude(file,newPath,spec,activities))
         {
           if (fileIsDirectory(file))
           {
@@ -1352,7 +1352,7 @@ public class SharedDriveConnector extend
   *@param documentSpecification is the specification.
   *@return true if it should be included.
   */
-  protected boolean checkInclude(SmbFile file, String fileName, DocumentSpecification documentSpecification)
+  protected boolean checkInclude(SmbFile file, String fileName, DocumentSpecification documentSpecification,
IFingerprintActivity activities)
     throws ManifoldCFException, ServiceInterruption
   {
     if (Logging.connectors.isDebugEnabled())
@@ -1393,6 +1393,9 @@ public class SharedDriveConnector extend
       int i;
       if (!isDirectory)
       {
+        long fileLength = fileLength(file);
+        if (!activities.checkLengthIndexable(fileLength))
+          return false;
         long maxFileLength = Long.MAX_VALUE;
         i = 0;
         while (i < documentSpecification.getChildCount())
@@ -1408,11 +1411,11 @@ public class SharedDriveConnector extend
             }
             catch (NumberFormatException e)
             {
-              throw new ManifoldCFException("Bad number",e);
+              throw new ManifoldCFException("Bad number: "+e.getMessage(),e);
             }
           }
         }
-        if (fileLength(file) > maxFileLength)
+        if (fileLength > maxFileLength)
           return false;
       }
 
@@ -4714,7 +4717,7 @@ public class SharedDriveConnector extend
           // documents that we will immediately turn around and remove.  However, if this
           // check was not here, everything should still function, provided the getDocumentVersions()
           // method does the right thing.
-          if (checkInclude(f, newPath, spec))
+          if (checkInclude(f, newPath, spec, activities))
           {
             if (Logging.connectors.isDebugEnabled())
               Logging.connectors.debug("JCIFS: Recorded path is '" + newPath + "' and is
included.");



Mime
View raw message