manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1626228 [1/10] - in /manifoldcf/branches/dev_1x: ./ connectors/alfresco/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfresco/ connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/ conne...
Date Fri, 19 Sep 2014 14:22:28 GMT
Author: kwright
Date: Fri Sep 19 14:22:27 2014
New Revision: 1626228

URL: http://svn.apache.org/r1626228
Log:
Pull up processDocuments changes related to CONNECTORS-977 from trunk.

Modified:
    manifoldcf/branches/dev_1x/   (props changed)
    manifoldcf/branches/dev_1x/connectors/alfresco/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfresco/AlfrescoRepositoryConnector.java
    manifoldcf/branches/dev_1x/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java
    manifoldcf/branches/dev_1x/connectors/documentum/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/DCTM/DCTM.java
    manifoldcf/branches/dev_1x/connectors/dropbox/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/dropbox/DropboxRepositoryConnector.java
    manifoldcf/branches/dev_1x/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConnector.java
    manifoldcf/branches/dev_1x/connectors/filenet/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filenet/FilenetConnector.java
    manifoldcf/branches/dev_1x/connectors/filenet/implementation/src/main/java/org/apache/manifoldcf/crawler/common/filenet/FilenetImpl.java
    manifoldcf/branches/dev_1x/connectors/filenet/interface/src/main/java/org/apache/manifoldcf/crawler/common/filenet/IFilenet.java
    manifoldcf/branches/dev_1x/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java
    manifoldcf/branches/dev_1x/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/GenericConnector.java
    manifoldcf/branches/dev_1x/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveRepositoryConnector.java
    manifoldcf/branches/dev_1x/connectors/gridfs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/gridfs/GridFSRepositoryConnector.java
    manifoldcf/branches/dev_1x/connectors/hdfs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/hdfs/HDFSRepositoryConnector.java
    manifoldcf/branches/dev_1x/connectors/jcifs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharedrive/SharedDriveConnector.java
    manifoldcf/branches/dev_1x/connectors/jdbc/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/jdbc/JDBCConnector.java
    manifoldcf/branches/dev_1x/connectors/jira/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/jira/JiraRepositoryConnector.java
    manifoldcf/branches/dev_1x/connectors/livelink/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/livelink/LivelinkConnector.java
    manifoldcf/branches/dev_1x/connectors/meridio/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/meridio/MeridioConnector.java
    manifoldcf/branches/dev_1x/connectors/rss/   (props changed)
    manifoldcf/branches/dev_1x/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java
    manifoldcf/branches/dev_1x/connectors/sharepoint/   (props changed)
    manifoldcf/branches/dev_1x/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java
    manifoldcf/branches/dev_1x/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
    manifoldcf/branches/dev_1x/connectors/sharepoint/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/sharepoint/XMLGenTest.java
    manifoldcf/branches/dev_1x/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
    manifoldcf/branches/dev_1x/connectors/wiki/   (props changed)
    manifoldcf/branches/dev_1x/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java
    manifoldcf/branches/dev_1x/framework/pull-agent/src/test/java/org/apache/manifoldcf/crawler/tests/InterruptionRepositoryConnector.java
    manifoldcf/branches/dev_1x/framework/pull-agent/src/test/java/org/apache/manifoldcf/crawler/tests/SchedulingRepositoryConnector.java
    manifoldcf/branches/dev_1x/framework/pull-agent/src/test/java/org/apache/manifoldcf/crawler/tests/TestingRepositoryConnector.java

Propchange: manifoldcf/branches/dev_1x/
------------------------------------------------------------------------------
  Merged /manifoldcf/trunk:r1623951,1623953-1623954,1623956,1623972,1624058,1624085,1624174,1624236,1624377,1624384,1624399,1624449,1624504,1624729-1624731,1624906,1624909-1624910,1625023,1625095,1625103,1625108

Modified: manifoldcf/branches/dev_1x/connectors/alfresco/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfresco/AlfrescoRepositoryConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/connectors/alfresco/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfresco/AlfrescoRepositoryConnector.java?rev=1626228&r1=1626227&r2=1626228&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/connectors/alfresco/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfresco/AlfrescoRepositoryConnector.java
(original)
+++ manifoldcf/branches/dev_1x/connectors/alfresco/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfresco/AlfrescoRepositoryConnector.java
Fri Sep 19 14:22:27 2014
@@ -56,6 +56,8 @@ import org.apache.manifoldcf.crawler.con
 import org.apache.manifoldcf.crawler.interfaces.DocumentSpecification;
 import org.apache.manifoldcf.crawler.interfaces.IProcessActivity;
 import org.apache.manifoldcf.crawler.interfaces.ISeedingActivity;
+import org.apache.manifoldcf.crawler.interfaces.IExistingVersions;
+import org.apache.manifoldcf.crawler.interfaces.DocumentSpecification;
 import org.apache.manifoldcf.crawler.system.Logging;
 
 public class AlfrescoRepositoryConnector extends BaseRepositoryConnector {
@@ -847,32 +849,28 @@ public class AlfrescoRepositoryConnector
   }
 
   /** Process a set of documents.
-   * This is the method that should cause each document to be fetched, processed, and the
results either added
-   * to the queue of documents for the current job, and/or entered into the incremental ingestion
manager.
-   * The document specification allows this class to filter what is done based on the job.
-   *@param documentIdentifiers is the set of document identifiers to process.
-   *@param versions is the corresponding document versions to process, as returned by getDocumentVersions()
above.
-   *       The implementation may choose to ignore this parameter and always process the
current version.
-   *@param activities is the interface this method should use to queue up new document references
-   * and ingest documents.
-   *@param spec is the document specification.
-   *@param scanOnly is an array corresponding to the document identifiers.  It is set to
true to indicate when the processing
-   * should only find other references, and should not actually call the ingestion methods.
-   *@param jobMode is an integer describing how the job is being run, whether continuous
or once-only.
-   */
+  * This is the method that should cause each document to be fetched, processed, and the
results either added
+  * to the queue of documents for the current job, and/or entered into the incremental ingestion
manager.
+  * The document specification allows this class to filter what is done based on the job.
+  * The connector will be connected before this method can be called.
+  *@param documentIdentifiers is the set of document identifiers to process.
+  *@param statuses are the currently-stored document versions for each document in the set
of document identifiers
+  * passed in above.
+  *@param activities is the interface this method should use to queue up new document references
+  * and ingest documents.
+  *@param jobMode is an integer describing how the job is being run, whether continuous or
once-only.
+  *@param usesDefaultAuthority will be true only if the authority in use for these documents
is the default one.
+  */
   @Override
-  public void processDocuments(String[] documentIdentifiers, String[] versions,
-      IProcessActivity activities, DocumentSpecification spec,
-      boolean[] scanOnly) throws ManifoldCFException, ServiceInterruption {
-
-    Logging.connectors.debug("Alfresco: Inside processDocuments");
-    int i = 0;
-
-    while (i < documentIdentifiers.length) {
-      long startTime = System.currentTimeMillis();
-      String nodeReference = documentIdentifiers[i];
+  public void processDocuments(String[] documentIdentifiers, IExistingVersions statuses,
Specification spec,
+    IProcessActivity activities, int jobMode, boolean usesDefaultAuthority)
+    throws ManifoldCFException, ServiceInterruption {
+      
+    for (String documentIdentifier : documentIdentifiers) {
+      // Prepare to access the document
+      String nodeReference = documentIdentifier;
       String uuid = NodeUtils.getUuidFromNodeReference(nodeReference);
-
+      
       if (Logging.connectors.isDebugEnabled())
         Logging.connectors.debug("Alfresco: Processing document identifier '"
             + nodeReference + "'");
@@ -880,35 +878,49 @@ public class AlfrescoRepositoryConnector
       Reference reference = new Reference();
       reference.setStore(SearchUtils.STORE);
       reference.setUuid(uuid);
-
+      
       Predicate predicate = new Predicate();
       predicate.setStore(SearchUtils.STORE);
-      predicate.setNodes(new Reference[] { reference });
-
-      // getting properties
+      predicate.setNodes(new Reference[]{reference});
+      
       Node resultNode = null;
       try {
         resultNode = NodeUtils.get(endpoint, username, password, socketTimeout, session,
predicate);
       } catch (IOException e) {
         Logging.connectors.warn(
-            "Alfresco: IOException closing file input stream: "
+            "Alfresco: IOException getting node: "
                 + e.getMessage(), e);
         handleIOException(e);
       }
       
-      String errorCode = "OK";
-      String errorDesc = StringUtils.EMPTY;
-
       NamedValue[] properties = resultNode.getProperties();
-      boolean isDocument = ContentModelUtils.isDocument(properties);
+      boolean isDocument;
+      String versionString = "";
+      if (properties != null)
+        isDocument = ContentModelUtils.isDocument(properties);
+      else
+        isDocument = false;
+      if (isDocument){
+        boolean isVersioned = NodeUtils.isVersioned(resultNode.getAspects());
+        if(isVersioned){
+          versionString = NodeUtils.getVersionLabel(properties);
+        }
+      }
+
+      if (versionString.length() == 0 || activities.checkDocumentNeedsReindexing(documentIdentifier,versionString))
{
+        // Need to (re)index
       
-      try{    
-        
-        boolean isFolder = ContentModelUtils.isFolder(endpoint, username, password, socketTimeout,
session, reference);
+        String errorCode = "OK";
+        String errorDesc = StringUtils.EMPTY;
+        long startTime = System.currentTimeMillis();
         
-        //a generic node in Alfresco could have child-associations
-        if (isFolder) {
-            // ingest all the children of the folder
+        try{    
+          
+          boolean isFolder = ContentModelUtils.isFolder(endpoint, username, password, socketTimeout,
session, reference);
+          
+          //a generic node in Alfresco could have child-associations
+          if (isFolder) {
+            // queue all the children of the folder
             QueryResult queryResult = SearchUtils.getChildren(endpoint, username, password,
socketTimeout, session, reference);
             ResultSet resultSet = queryResult.getResultSet();
             ResultSetRow[] resultSetRows = resultSet.getRows();
@@ -917,18 +929,17 @@ public class AlfrescoRepositoryConnector
               String childNodeReference = PropertiesUtils.getNodeReference(childProperties);
               activities.addDocumentReference(childNodeReference, nodeReference, RELATIONSHIP_CHILD);
             }
-        } 
+          } 
 
-      }catch(IOException e){
-        Logging.connectors.warn(
-            "Alfresco: IOException closing file input stream: "
-                + e.getMessage(), e);
-        handleIOException(e);
-      }
-      
-      //a generic node in Alfresco could also have binaries content
-      if (isDocument) {
-        if (!scanOnly[i]) {
+        }catch(IOException e){
+          Logging.connectors.warn(
+              "Alfresco: IOException finding children: "
+                  + e.getMessage(), e);
+          handleIOException(e);
+        }
+        
+        //a generic node in Alfresco could also have binaries content
+        if (isDocument) {
           // this is a content to ingest
           InputStream is = null;
           long fileLength = 0;
@@ -945,28 +956,25 @@ public class AlfrescoRepositoryConnector
               fileLength = binary.getLength();
               is = ContentReader.getBinary(endpoint, binary, username, password, socketTimeout,
session);
               rd.setBinary(is, fileLength);
-              
+                
               //id is the node reference only if the node has an unique content stream
               //For a node with a single d:content property: id = node reference
               String id = PropertiesUtils.getNodeReference(properties);
-              
+                
               //For a node with multiple d:content properties: id = node reference;QName
               //The QName of a property of type d:content will be appended to the node reference
               if(contentProperties.size()>1){
                 id = id + INGESTION_SEPARATOR_FOR_MULTI_BINARY + contentProperty.getName();
               }
-              
-              //version label
-              String version = PropertiesUtils.getVersionLabel(properties);
-              
+                
               //the document uri is related to the specific d:content property available
in the node
               //we want to ingest each content stream that are nested in a single node
               String documentURI = binary.getUrl();
-              activities.ingestDocumentWithException(id, version, documentURI, rd);
+              activities.ingestDocumentWithException(documentIdentifier, id, versionString,
documentURI, rd);
             }
-            
+              
             AuthenticationUtils.endSession();
-            
+              
           } catch (ParseException e) {
             errorCode = "IO ERROR";
             errorDesc = e.getMessage();
@@ -997,77 +1005,17 @@ public class AlfrescoRepositoryConnector
                       + e.getMessage(), e);
               handleIOException(e);
             }
-                      
+                        
             session = null;
-            
+              
             activities.recordActivity(new Long(startTime), ACTIVITY_READ,
-                fileLength, nodeReference, errorCode, errorDesc, null);
+              fileLength, nodeReference, errorCode, errorDesc, null);
           }
-        }
-      }
-      i++;
-    }
-  }
 
-  /** The short version of getDocumentVersions.
-   * Get document versions given an array of document identifiers.
-   * This method is called for EVERY document that is considered. It is
-   * therefore important to perform as little work as possible here.
-   *@param documentIdentifiers is the array of local document identifiers, as understood
by this connector.
-   *@param spec is the current document specification for the current job.  If there is a
dependency on this
-   * specification, then the version string should include the pertinent data, so that reingestion
will occur
-   * when the specification changes.  This is primarily useful for metadata.
-   *@return the corresponding version strings, with null in the places where the document
no longer exists.
-   * Empty version strings indicate that there is no versioning ability for the corresponding
document, and the document
-   * will always be processed.
-   */
-  @Override
-  public String[] getDocumentVersions(String[] documentIdentifiers,
-      DocumentSpecification spec) throws ManifoldCFException,
-      ServiceInterruption {
-    String[] rval = new String[documentIdentifiers.length];
-    int i = 0;
-    while (i < rval.length){
-      String nodeReference = documentIdentifiers[i];
-      String uuid = NodeUtils.getUuidFromNodeReference(nodeReference);
-      
-      Reference reference = new Reference();
-      reference.setStore(SearchUtils.STORE);
-      reference.setUuid(uuid);
-      
-      Predicate predicate = new Predicate();
-      predicate.setStore(SearchUtils.STORE);
-      predicate.setNodes(new Reference[]{reference});
-      
-      Node node = null;
-      try {
-        node = NodeUtils.get(endpoint, username, password, socketTimeout, session, predicate);
-      } catch (IOException e) {
-        Logging.connectors.warn(
-            "Alfresco: IOException closing file input stream: "
-                + e.getMessage(), e);
-        handleIOException(e);
-      }
-      
-      if(node.getProperties()!=null){
-        NamedValue[] properties = node.getProperties();
-        boolean isDocument = ContentModelUtils.isDocument(properties);
-        if(isDocument){
-          boolean isVersioned = NodeUtils.isVersioned(node.getAspects());
-          if(isVersioned){
-            rval[i] = NodeUtils.getVersionLabel(properties);
-          } else {
-            //a document that doesn't contain versioning information will always be processed
-            rval[i] = StringUtils.EMPTY;
-          }
-        } else {
-          //a space will always be processed
-          rval[i] = StringUtils.EMPTY;
         }
       }
-      i++;
     }
-    return rval;
+    
   }
   
   private static void handleIOException(IOException e)

Modified: manifoldcf/branches/dev_1x/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java?rev=1626228&r1=1626227&r2=1626228&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java
(original)
+++ manifoldcf/branches/dev_1x/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java
Fri Sep 19 14:22:27 2014
@@ -68,6 +68,7 @@ import org.apache.manifoldcf.crawler.con
 import org.apache.manifoldcf.crawler.interfaces.DocumentSpecification;
 import org.apache.manifoldcf.crawler.interfaces.IProcessActivity;
 import org.apache.manifoldcf.crawler.interfaces.ISeedingActivity;
+import org.apache.manifoldcf.crawler.interfaces.IExistingVersions;
 import org.apache.manifoldcf.crawler.system.Logging;
 
 /**
@@ -1067,28 +1068,24 @@ public class CmisRepositoryConnector ext
   }
 
   /** Process a set of documents.
-   * This is the method that should cause each document to be fetched, processed, and the
results either added
-   * to the queue of documents for the current job, and/or entered into the incremental ingestion
manager.
-   * The document specification allows this class to filter what is done based on the job.
-   *@param documentIdentifiers is the set of document identifiers to process.
-   *@param versions is the corresponding document versions to process, as returned by getDocumentVersions()
above.
-   *       The implementation may choose to ignore this parameter and always process the
current version.
-   *@param activities is the interface this method should use to queue up new document references
-   * and ingest documents.
-   *@param spec is the document specification.
-   *@param scanOnly is an array corresponding to the document identifiers.  It is set to
true to indicate when the processing
-   * should only find other references, and should not actually call the ingestion methods.
-   *@param jobMode is an integer describing how the job is being run, whether continuous
or once-only.
-   */
-  @SuppressWarnings("unchecked")
+  * This is the method that should cause each document to be fetched, processed, and the
results either added
+  * to the queue of documents for the current job, and/or entered into the incremental ingestion
manager.
+  * The document specification allows this class to filter what is done based on the job.
+  * The connector will be connected before this method can be called.
+  *@param documentIdentifiers is the set of document identifiers to process.
+  *@param statuses are the currently-stored document versions for each document in the set
of document identifiers
+  * passed in above.
+  *@param activities is the interface this method should use to queue up new document references
+  * and ingest documents.
+  *@param jobMode is an integer describing how the job is being run, whether continuous or
once-only.
+  *@param usesDefaultAuthority will be true only if the authority in use for these documents
is the default one.
+  */
   @Override
-  public void processDocuments(String[] documentIdentifiers, String[] versions,
-      IProcessActivity activities, DocumentSpecification spec,
-      boolean[] scanOnly) throws ManifoldCFException, ServiceInterruption {
+  public void processDocuments(String[] documentIdentifiers, IExistingVersions statuses,
Specification spec,
+    IProcessActivity activities, int jobMode, boolean usesDefaultAuthority)
+    throws ManifoldCFException, ServiceInterruption {
 
-    getSession();
-    Logging.connectors.debug("CMIS: Inside processDocuments");
-    
+    // Extract what we need from the spec
     String cmisQuery = StringUtils.EMPTY;
     for (int i = 0; i < spec.getChildCount(); i++)
     {
@@ -1099,207 +1096,244 @@ public class CmisRepositoryConnector ext
       }
     }
 
-    for (int i = 0; i < documentIdentifiers.length; i++) {
-      long startTime = System.currentTimeMillis();
-      String nodeId = documentIdentifiers[i];
-      String version = versions[i];
+    getSession();
 
+    for (String documentIdentifier : documentIdentifiers) {
+      
       if (Logging.connectors.isDebugEnabled())
         Logging.connectors.debug("CMIS: Processing document identifier '"
-            + nodeId + "'");
+            + documentIdentifier + "'");
 
+      // Load the object.  If this fails, it has been deleted.
       CmisObject cmisObject;
       try {
-        cmisObject = session.getObject(nodeId);
+        cmisObject = session.getObject(documentIdentifier);
       } catch (CmisObjectNotFoundException e) {
-        // Delete it
-        activities.deleteDocument(nodeId);
+        cmisObject = null;
+      }
+
+      if (cmisObject == null) {
+        //System.out.println(" doesn't exist");
+        activities.deleteDocument(documentIdentifier);
         continue;
       }
       
-      String errorCode = "OK";
-      String errorDesc = StringUtils.EMPTY;
-      String baseTypeId = cmisObject.getBaseType().getId();
-
-      if (baseTypeId.equals(CMIS_FOLDER_BASE_TYPE)) {
-
-        // adding all the children for a folder
-
-        Folder folder = (Folder) cmisObject;
-        ItemIterable<CmisObject> children = folder.getChildren();
-        for (CmisObject child : children) {
-          activities.addDocumentReference(child.getId(), nodeId,
-              RELATIONSHIP_CHILD);
+      String versionString;
+      
+      if (cmisObject.getBaseType().getId().equals(CMIS_DOCUMENT_BASE_TYPE)) {
+        Document document = (Document) cmisObject;
+
+        // Since documents that are not current have different node id's, we can return a
constant version,
+        // EXCEPT when the document is not the current one (in which case we delete)
+        boolean isCurrentVersion;
+        try {
+          Document d = document.getObjectOfLatestVersion(false);
+          isCurrentVersion = d.getId().equals(documentIdentifier);
+        } catch (CmisObjectNotFoundException e) {
+          isCurrentVersion = false;
         }
-      } else if(baseTypeId.equals(CMIS_DOCUMENT_BASE_TYPE)){
-        if (!scanOnly[i]) {
-          // content ingestion
+        if (isCurrentVersion) {
+          //System.out.println(" is latest version");
+          versionString = documentIdentifier + ":" + cmisQuery;
+        } else {
+          //System.out.println(" is NOT latest vrersion");
+          activities.deleteDocument(documentIdentifier);
+          continue;
+        }
+      } else {
+        //a CMIS folder will always be processed
+        //System.out.println(" is folder");
+        versionString = StringUtils.EMPTY;
+      }
+      
+      if (versionString.length() == 0 || activities.checkDocumentNeedsReindexing(documentIdentifier,versionString))
{
+        // Index this document
+        String errorCode = "OK";
+        String errorDesc = StringUtils.EMPTY;
+        long startTime = System.currentTimeMillis();
+        
+        String baseTypeId = cmisObject.getBaseType().getId();
 
-          Document document = (Document) cmisObject;
-          long fileLength;
-          InputStream is;
-          try {
-            fileLength = document.getContentStreamLength();
-            if (fileLength > 0)
-              is = document.getContentStream().getStream();
-            else
-              is = null;
-          } catch (CmisObjectNotFoundException e) {
-            // Document gone
-            activities.deleteDocument(nodeId);
-            continue;
+        if (baseTypeId.equals(CMIS_FOLDER_BASE_TYPE)) {
+
+          // adding all the children for a folder
+
+          Folder folder = (Folder) cmisObject;
+          ItemIterable<CmisObject> children = folder.getChildren();
+          for (CmisObject child : children) {
+            activities.addDocumentReference(child.getId(), documentIdentifier,
+                RELATIONSHIP_CHILD);
           }
+      } else if(baseTypeId.equals(CMIS_DOCUMENT_BASE_TYPE)){
+        // content ingestion
+
+        Document document = (Document) cmisObject;
+        long fileLength;
+        InputStream is;
+        try {
+          fileLength = document.getContentStreamLength();
+          if (fileLength > 0)
+            is = document.getContentStream().getStream();
+          else
+            is = null;
+        } catch (CmisObjectNotFoundException e) {
+          // Document gone
+          activities.deleteDocument(documentIdentifier);
+          continue;
+        }
           
-          try {
-            RepositoryDocument rd = new RepositoryDocument();
-            Date createdDate = document.getCreationDate().getTime();
-            Date modifiedDate = document.getLastModificationDate().getTime();
+        try {
+          RepositoryDocument rd = new RepositoryDocument();
+          Date createdDate = document.getCreationDate().getTime();
+          Date modifiedDate = document.getLastModificationDate().getTime();
             
-            rd.setFileName(document.getContentStreamFileName());
-            rd.setMimeType(document.getContentStreamMimeType());
-            rd.setCreatedDate(createdDate);
-            rd.setModifiedDate(modifiedDate);
+          rd.setFileName(document.getContentStreamFileName());
+          rd.setMimeType(document.getContentStreamMimeType());
+          rd.setCreatedDate(createdDate);
+          rd.setModifiedDate(modifiedDate);
             
-            //binary
-            if(is != null) {
-              rd.setBinary(is, fileLength);
-            } else {
-              rd.setBinary(new NullInputStream(0),0);
-            }
+          //binary
+          if(is != null) {
+            rd.setBinary(is, fileLength);
+          } else {
+            rd.setBinary(new NullInputStream(0),0);
+          }
 
-            //properties
-            List<Property<?>> properties = document.getProperties();
-            String id = StringUtils.EMPTY;
-            for (Property<?> property : properties) {
-              String propertyId = property.getId();
+          //properties
+          List<Property<?>> properties = document.getProperties();
+          String id = StringUtils.EMPTY;
+          for (Property<?> property : properties) {
+            String propertyId = property.getId();
               
-              if(CmisRepositoryConnectorUtils.existsInSelectClause(cmisQuery, propertyId)){
+            if(CmisRepositoryConnectorUtils.existsInSelectClause(cmisQuery, propertyId)){
                 
-                if (propertyId.endsWith(Constants.PARAM_OBJECT_ID))
-                  id = (String) property.getValue();
+              if (propertyId.endsWith(Constants.PARAM_OBJECT_ID)) {
+                id = (String) property.getValue();
     
-                  if (property.getValue() !=null 
-                      || property.getValues() != null) {
-                    PropertyType propertyType = property.getType();
+                if (property.getValue() !=null 
+                    || property.getValues() != null) {
+                  PropertyType propertyType = property.getType();
       
-                    switch (propertyType) {
+                  switch (propertyType) {
       
-                    case STRING:
-                    case ID:
-                    case URI:
-                    case HTML:
-                      if(property.isMultiValued()){
-                        List<String> htmlPropertyValues = (List<String>) property.getValues();
-                        for (String htmlPropertyValue : htmlPropertyValues) {
-                          rd.addField(propertyId, htmlPropertyValue);
-                        }
-                      } else {
-                        String stringValue = (String) property.getValue();
-                        if(StringUtils.isNotEmpty(stringValue)){
-                          rd.addField(propertyId, stringValue);
-                        }
+                  case STRING:
+                  case ID:
+                  case URI:
+                  case HTML:
+                    if(property.isMultiValued()){
+                      List<String> htmlPropertyValues = (List<String>) property.getValues();
+                      for (String htmlPropertyValue : htmlPropertyValues) {
+                        rd.addField(propertyId, htmlPropertyValue);
                       }
-                      break;
+                    } else {
+                      String stringValue = (String) property.getValue();
+                      if(StringUtils.isNotEmpty(stringValue)){
+                        rd.addField(propertyId, stringValue);
+                      }
+                    }
+                    break;
            
-                    case BOOLEAN:
-                      if(property.isMultiValued()){
-                        List<Boolean> booleanPropertyValues = (List<Boolean>)
property.getValues();
-                        for (Boolean booleanPropertyValue : booleanPropertyValues) {
-                          rd.addField(propertyId, booleanPropertyValue.toString());
-                        }
-                      } else {
-                        Boolean booleanValue = (Boolean) property.getValue();
-                        if(booleanValue!=null){
-                          rd.addField(propertyId, booleanValue.toString());
-                        }
+                  case BOOLEAN:
+                    if(property.isMultiValued()){
+                      List<Boolean> booleanPropertyValues = (List<Boolean>) property.getValues();
+                      for (Boolean booleanPropertyValue : booleanPropertyValues) {
+                        rd.addField(propertyId, booleanPropertyValue.toString());
                       }
-                      break;
-      
-                    case INTEGER:
-                      if(property.isMultiValued()){
-                        List<BigInteger> integerPropertyValues = (List<BigInteger>)
property.getValues();
-                        for (BigInteger integerPropertyValue : integerPropertyValues) {
-                          rd.addField(propertyId, integerPropertyValue.toString());
-                        }
-                      } else {
-                        BigInteger integerValue = (BigInteger) property.getValue();
-                        if(integerValue!=null){
-                          rd.addField(propertyId, integerValue.toString());
-                        }
+                    } else {
+                      Boolean booleanValue = (Boolean) property.getValue();
+                      if(booleanValue!=null){
+                        rd.addField(propertyId, booleanValue.toString());
                       }
-                      break;
+                    }
+                    break;
       
-                    case DECIMAL:
-                      if(property.isMultiValued()){
-                        List<BigDecimal> decimalPropertyValues = (List<BigDecimal>)
property.getValues();
-                        for (BigDecimal decimalPropertyValue : decimalPropertyValues) {
-                          rd.addField(propertyId, decimalPropertyValue.toString());
-                        }
-                      } else {
-                        BigDecimal decimalValue = (BigDecimal) property.getValue();
-                        if(decimalValue!=null){
-                          rd.addField(propertyId, decimalValue.toString());
-                        }
+                  case INTEGER:
+                    if(property.isMultiValued()){
+                      List<BigInteger> integerPropertyValues = (List<BigInteger>)
property.getValues();
+                      for (BigInteger integerPropertyValue : integerPropertyValues) {
+                        rd.addField(propertyId, integerPropertyValue.toString());
                       }
-                      break;
+                    } else {
+                      BigInteger integerValue = (BigInteger) property.getValue();
+                      if(integerValue!=null){
+                        rd.addField(propertyId, integerValue.toString());
+                      }
+                    }
+                    break;
       
-                    case DATETIME:
-                      if(property.isMultiValued()){
-                        List<GregorianCalendar> datePropertyValues = (List<GregorianCalendar>)
property.getValues();
-                        for (GregorianCalendar datePropertyValue : datePropertyValues) {
-                          rd.addField(propertyId,
-                              ISO8601_DATE_FORMATTER.format(datePropertyValue.getTime()));
-                        }
-                      } else {
-                        GregorianCalendar dateValue = (GregorianCalendar) property.getValue();
-                        if(dateValue!=null){
-                          rd.addField(propertyId, ISO8601_DATE_FORMATTER.format(dateValue.getTime()));
-                        }
+                  case DECIMAL:
+                    if(property.isMultiValued()){
+                      List<BigDecimal> decimalPropertyValues = (List<BigDecimal>)
property.getValues();
+                      for (BigDecimal decimalPropertyValue : decimalPropertyValues) {
+                        rd.addField(propertyId, decimalPropertyValue.toString());
                       }
-                      break;
+                    } else {
+                      BigDecimal decimalValue = (BigDecimal) property.getValue();
+                      if(decimalValue!=null){
+                        rd.addField(propertyId, decimalValue.toString());
+                      }
+                    }
+                    break;
       
-                    default:
-                      break;
+                  case DATETIME:
+                    if(property.isMultiValued()){
+                      List<GregorianCalendar> datePropertyValues = (List<GregorianCalendar>)
property.getValues();
+                      for (GregorianCalendar datePropertyValue : datePropertyValues) {
+                        rd.addField(propertyId,
+                            ISO8601_DATE_FORMATTER.format(datePropertyValue.getTime()));
+                      }
+                    } else {
+                      GregorianCalendar dateValue = (GregorianCalendar) property.getValue();
+                      if(dateValue!=null){
+                        rd.addField(propertyId, ISO8601_DATE_FORMATTER.format(dateValue.getTime()));
+                      }
                     }
+                    break;
+      
+                  default:
+                    break;
                   }
-                  
                 }
+                  
+              }
               
             }
+          }
+          
+          //ingestion
             
-            //ingestion
-            
-            //documentURI
-            String documentURI = CmisRepositoryConnectorUtils.getDocumentURL(document, session);
+          //documentURI
+          String documentURI = CmisRepositoryConnectorUtils.getDocumentURL(document, session);
             
-            try {
-              activities.ingestDocumentWithException(nodeId, version, documentURI, rd);
-            } catch (IOException e) {
-              errorCode = "IO ERROR";
-              errorDesc = e.getMessage();
-              handleIOException(e, "reading file input stream");
+          try {
+            activities.ingestDocumentWithException(documentIdentifier, versionString, documentURI,
rd);
+          } catch (IOException e) {
+            errorCode = "IO ERROR";
+            errorDesc = e.getMessage();
+            handleIOException(e, "reading file input stream");
+          }
+        } finally {
+          try {
+            if(is!=null){
+              is.close();
             }
+          } catch (IOException e) {
+            errorCode = "IO ERROR";
+            errorDesc = e.getMessage();
+            handleIOException(e, "closing file input stream");
           } finally {
-            try {
-              if(is!=null){
-                is.close();
-              }
-            } catch (IOException e) {
-              errorCode = "IO ERROR";
-              errorDesc = e.getMessage();
-              handleIOException(e, "closing file input stream");
-            } finally {
-              activities.recordActivity(new Long(startTime), ACTIVITY_READ,
-                fileLength, nodeId, errorCode, errorDesc, null);
-            }
+            activities.recordActivity(new Long(startTime), ACTIVITY_READ,
+              fileLength, documentIdentifier, errorCode, errorDesc, null);
           }
         }
       }
       else
-        activities.deleteDocument(nodeId);
+        activities.deleteDocument(documentIdentifier);
+      }
     }
+    
   }
-  
+
   protected static void handleIOException(IOException e, String context) throws ManifoldCFException,
ServiceInterruption {
     if (e instanceof InterruptedIOException) {
       throw new ManifoldCFException(e.getMessage(), e,
@@ -1311,77 +1345,5 @@ public class CmisRepositoryConnector ext
       throw new ManifoldCFException(e.getMessage(), e);
     }
   }
-  
-  /** The short version of getDocumentVersions.
-   * Get document versions given an array of document identifiers.
-   * This method is called for EVERY document that is considered. It is
-   * therefore important to perform as little work as possible here.
-   *@param documentIdentifiers is the array of local document identifiers, as understood
by this connector.
-   *@param spec is the current document specification for the current job.  If there is a
dependency on this
-   * specification, then the version string should include the pertinent data, so that reingestion
will occur
-   * when the specification changes.  This is primarily useful for metadata.
-   *@return the corresponding version strings, with null in the places where the document
no longer exists.
-   * Empty version strings indicate that there is no versioning ability for the corresponding
document, and the document
-   * will always be processed.
-   */
-  @Override
-  public String[] getDocumentVersions(String[] documentIdentifiers,
-      DocumentSpecification spec) throws ManifoldCFException,
-      ServiceInterruption {
-    
-    String cmisQuery = StringUtils.EMPTY;
-    for (int i = 0; i < spec.getChildCount(); i++)
-    {
-      SpecificationNode sn = spec.getChild(i);
-      if (sn.getType().equals(JOB_STARTPOINT_NODE_TYPE)) {
-        cmisQuery = sn.getAttributeValue(CmisConfig.CMIS_QUERY_PARAM);
-        break;
-      }
-    }
-
-    getSession();
-
-    String[] rval = new String[documentIdentifiers.length];
-    for (int i = 0; i < rval.length; i++) {
-      //System.out.println("Get document versions: "+documentIdentifiers[i]);
-      CmisObject cmisObject;
-      try {
-        cmisObject = session.getObject(documentIdentifiers[i]);
-      } catch (CmisObjectNotFoundException e) {
-        cmisObject = null;
-      }
-
-      if (cmisObject == null) {
-        //System.out.println(" doesn't exist");
-        rval[i] = null;
-        continue;
-      }
-      
-      if (cmisObject.getBaseType().getId().equals(CMIS_DOCUMENT_BASE_TYPE)) {
-        Document document = (Document) cmisObject;
 
-        // Since documents that are not current have different node id's, we can return a
constant version,
-        // EXCEPT when the document is not the current one (in which case we delete)
-        boolean isCurrentVersion;
-        try {
-          Document d = document.getObjectOfLatestVersion(false);
-          isCurrentVersion = d.getId().equals(documentIdentifiers[i]);
-        } catch (CmisObjectNotFoundException e) {
-          isCurrentVersion = false;
-        }
-        if (isCurrentVersion) {
-          //System.out.println(" is latest version");
-          rval[i] = documentIdentifiers[i] + ":" + cmisQuery;
-        } else {
-          //System.out.println(" is NOT latest vrersion");
-          rval[i] = null;
-        }
-      } else {
-        //a CMIS folder will always be processed
-        //System.out.println(" is folder");
-        rval[i] = StringUtils.EMPTY;
-      }
-    }
-    return rval;
-  }
 }



Mime
View raw message