manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1610996 - in /manifoldcf/trunk: ./ connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/ connectors/hdfs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/hdfs/ connectors/livelin...
Date Wed, 16 Jul 2014 13:18:32 GMT
Author: kwright
Date: Wed Jul 16 13:18:31 2014
New Revision: 1610996

URL: http://svn.apache.org/r1610996
Log:
Fix for CONNECTORS-995.

Modified:
    manifoldcf/trunk/CHANGES.txt
    manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java
    manifoldcf/trunk/connectors/hdfs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/hdfs/HDFSRepositoryConnector.java
    manifoldcf/trunk/connectors/livelink/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/livelink/LivelinkConnector.java
    manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java

Modified: manifoldcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1610996&r1=1610995&r2=1610996&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Wed Jul 16 13:18:31 2014
@@ -3,6 +3,11 @@ $Id$
 
 ======================= 1.7-dev =====================
 
+CONNECTORS-995: Connectors dealing with non-indexable documents
+such as directories should call noDocument() on those documents if they
+want incremental behavior.
+(Karl Wright)
+
 CONNECTORS-993: Pipeline code not handling "no document" case properly.
 This problem was an oversight of the new pipeline code.  Essentially,
 transformation connectors could choose not to send a document onward

Modified: manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java?rev=1610996&r1=1610995&r2=1610996&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java
(original)
+++ manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java
Wed Jul 16 13:18:31 2014
@@ -289,6 +289,7 @@ public class FileConnector extends org.a
       {
         if (file.isDirectory())
         {
+          activities.noDocument(documentIdentifier,version);
           // Queue up stuff for directory
           long startTime = System.currentTimeMillis();
           String errorCode = "OK";

Modified: manifoldcf/trunk/connectors/hdfs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/hdfs/HDFSRepositoryConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/hdfs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/hdfs/HDFSRepositoryConnector.java?rev=1610996&r1=1610995&r2=1610996&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/hdfs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/hdfs/HDFSRepositoryConnector.java
(original)
+++ manifoldcf/trunk/connectors/hdfs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/hdfs/HDFSRepositoryConnector.java
Wed Jul 16 13:18:31 2014
@@ -414,6 +414,7 @@ public class HDFSRepositoryConnector ext
       }
         
       if (fileStatus.isDirectory()) {
+        activities.noDocument(documentIdentifier,version);
         /*
           * Queue up stuff for directory
           */

Modified: manifoldcf/trunk/connectors/livelink/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/livelink/LivelinkConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/livelink/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/livelink/LivelinkConnector.java?rev=1610996&r1=1610995&r2=1610996&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/livelink/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/livelink/LivelinkConnector.java
(original)
+++ manifoldcf/trunk/connectors/livelink/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/livelink/LivelinkConnector.java
Wed Jul 16 13:18:31 2014
@@ -1450,7 +1450,10 @@ public class LivelinkConnector extends o
     {
       // Since each livelink access is time-consuming, be sure that we abort if the job has
gone inactive
       activities.checkJobStillActive();
+      
       String documentIdentifier = documentIdentifiers[i];
+      String version = versions[i];
+      
       boolean doScanOnly = scanOnly[i];
 
       boolean isFolder = documentIdentifier.startsWith("F");
@@ -1470,6 +1473,7 @@ public class LivelinkConnector extends o
 
       if (isFolder)
       {
+        activities.noDocument(documentIdentifier,version);
         if (Logging.connectors.isDebugEnabled())
           Logging.connectors.debug("Livelink: Processing folder "+Integer.toString(vol)+":"+Integer.toString(objID));
 
@@ -1579,7 +1583,7 @@ public class LivelinkConnector extends o
               Logging.connectors.debug("Livelink: Decided to ingest document "+Integer.toString(vol)+":"+Integer.toString(objID));
 
             // Grab the access tokens for this file from the version string, inside ingest
method.
-            ingestFromLiveLink(llc,documentIdentifiers[i],versions[i],activities,desc,sDesc);
+            ingestFromLiveLink(llc,documentIdentifiers[i],version,activities,desc,sDesc);
           }
           else
           {

Modified: manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java?rev=1610996&r1=1610995&r2=1610996&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
(original)
+++ manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
Wed Jul 16 13:18:31 2014
@@ -1266,6 +1266,8 @@ public class SharePointRepository extend
           // === List style identifier ===
           if (dListSeparatorIndex == documentIdentifier.length() - 3)
           {
+            activities.noDocument(documentIdentifier,version);
+
             String siteListPath = documentIdentifier.substring(0,documentIdentifier.length()-3);
             int listCutoff = siteListPath.lastIndexOf( "/" );
             String site = siteListPath.substring(0,listCutoff);
@@ -1608,6 +1610,8 @@ public class SharePointRepository extend
           if (dLibSeparatorIndex == documentIdentifier.length() - 2)
           {
             // It's a library.
+            activities.noDocument(documentIdentifier,version);
+
             String siteLibPath = documentIdentifier.substring(0,documentIdentifier.length()-2);
             int libCutoff = siteLibPath.lastIndexOf( "/" );
             String site = siteLibPath.substring(0,libCutoff);
@@ -1784,6 +1788,8 @@ public class SharePointRepository extend
         else
         {
           // === Site-style identifier ===
+          activities.noDocument(documentIdentifier,version);
+
           // Strip off the trailing "/" to get the site name.
           String decodedSitePath = documentIdentifier.substring(0,documentIdentifier.length()-1);
 



Mime
View raw message