manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1526124 - in /manifoldcf/trunk: CHANGES.txt connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
Date Wed, 25 Sep 2013 08:05:09 GMT
Author: kwright
Date: Wed Sep 25 08:05:09 2013
New Revision: 1526124

URL: http://svn.apache.org/r1526124
Log:
Fix for CONNECTORS-782.

Modified:
    manifoldcf/trunk/CHANGES.txt
    manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java

Modified: manifoldcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1526124&r1=1526123&r2=1526124&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Wed Sep 25 08:05:09 2013
@@ -3,6 +3,9 @@ $Id$
 
 ======================= 1.4-dev =====================
 
+CONNECTORS-782: Add unique-ID metadata in SharePoint connector.
+(Dmitry Goldenberg, Karl Wright)
+
 CONNECTORS-778: Add support for attachments in SharePoint
 connector.
 (Dmitry Goldenberg, Karl Wright)

Modified: manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java?rev=1526124&r1=1526123&r2=1526124&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
(original)
+++ manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
Wed Sep 25 08:05:09 2013
@@ -765,8 +765,6 @@ public class SharePointRepository extend
                 String[] denyTokens = activities.retrieveParentData(documentIdentifier, "denyTokens");
                 String[] listIDs = activities.retrieveParentData(documentIdentifier, "guids");
                 String[] listFields = activities.retrieveParentData(documentIdentifier, "fields");
-                // Grab the ID from the carrydown data; it's needed to find the attachments.
-                String[] ids = activities.retrieveParentData(documentIdentifier, "ids");
 
                 String listID;
                 if (listIDs.length >= 1)
@@ -774,13 +772,7 @@ public class SharePointRepository extend
                 else
                   listID = null;
 
-                String id;
-                if (ids.length >= 1)
-                  id = ids[0];
-                else
-                  id = null;
-
-                if (listID != null && id != null)
+                if (listID != null)
                 {
                   String[] sortedMetadataFields = getInterestingFieldSetSorted(metadataInfo,listFields);
                   
@@ -792,11 +784,15 @@ public class SharePointRepository extend
                   ArrayList metadataDescription = new ArrayList();
                   metadataDescription.add("Modified");
                   metadataDescription.add("Created");
+                  metadataDescription.add("ID");
+                  metadataDescription.add("GUID");
                   // The document path includes the library, with no leading slash, and is
decoded.
                   String decodedItemPathWithoutSite = decodedItemPath.substring(cutoff+1);
                   Map<String,String> values = proxy.getFieldValues( metadataDescription,
encodedSitePath, listID, "/Lists/" + decodedItemPathWithoutSite, dspStsWorks );
                   String modifiedDate = values.get("Modified");
                   String createdDate = values.get("Created");
+                  String id = values.get("ID");
+                  String guid = values.get("GUID");
                   if (modifiedDate != null)
                   {
                     // Item has a modified date so we presume it exists.
@@ -817,6 +813,7 @@ public class SharePointRepository extend
                     packDate(sb,modifiedDateValue);
                     packDate(sb,createdDateValue);
                     pack(sb,id,'+');
+                    pack(sb,guid,'+');
                     // The rest of this is unparseable
                     sb.append(versionToken);
                     sb.append(pathNameAttributeVersion);
@@ -999,6 +996,7 @@ public class SharePointRepository extend
                 metadataDescription.add("Last_x0020_Modified");
                 metadataDescription.add("Modified");
                 metadataDescription.add("Created");
+                metadataDescription.add("GUID");
                 // The document path includes the library, with no leading slash, and is
decoded.
                 int cutoff = decodedLibPath.lastIndexOf("/");
                 String decodedDocumentPathWithoutSite = decodedDocumentPath.substring(cutoff);
@@ -1006,8 +1004,9 @@ public class SharePointRepository extend
 
                 String modifiedDate = values.get("Modified");
                 String createdDate = values.get("Created");
-                  
+                String guid = values.get("GUID");
                 String modifyDate = values.get("Last_x0020_Modified");
+
                 if (modifyDate != null)
                 {
                   // Item has a modified date, so we presume it exists
@@ -1051,6 +1050,7 @@ public class SharePointRepository extend
                     packList(sb,denyTokens,'+');
                     packDate(sb,modifiedDateValue);
                     packDate(sb,createdDateValue);
+                    pack(sb,guid,'+');
                     // The rest of this is unparseable
                     sb.append(versionToken);
                     sb.append(pathNameAttributeVersion);
@@ -1175,6 +1175,8 @@ public class SharePointRepository extend
     return sortedMetadataFields;
   }
 
+  protected static final String[] attachmentDataNames = new String[]{"createdDate","modifiedDate","accessTokens","denyTokens","url","guids"};
+
   /** Process a set of documents.
   * This is the method that should cause each document to be fetched, processed, and the
results either added
   * to the queue of documents for the current job, and/or entered into the incremental ingestion
manager.
@@ -1346,6 +1348,11 @@ public class SharePointRepository extend
               StringBuilder idBuffer = new StringBuilder();
               startPosition = unpack(idBuffer,version,startPosition,'+');
 
+              // List item GUID (for metadata)
+              StringBuilder guidBuffer = new StringBuilder();
+              startPosition = unpack(guidBuffer,version,startPosition,'+');
+              String guid = guidBuffer.toString();
+              
               // We need the list ID, which we've already fetched, so grab that from the
parent data.
               String[] listIDs = activities.retrieveParentData(documentIdentifier, "guids");
 
@@ -1378,8 +1385,7 @@ public class SharePointRepository extend
                   // we unpacked the version information early above.
                   
                   // No check for inclusion; if the list item is included, so is this
-                  String[] dataNames = new String[]{"createdDate","modifiedDate","accessTokens","denyTokens","url"};
-                  String[][] dataValues = new String[5][];
+                  String[][] dataValues = new String[attachmentDataNames.length][];
                   if (createdDate == null)
                     dataValues[0] = new String[0];
                   else
@@ -1397,9 +1403,10 @@ public class SharePointRepository extend
                   else
                     dataValues[3] = (String[])denyAcls.toArray(new String[0]);
                   dataValues[4] = new String[]{attachmentName.getPrettyName()};
+                  dataValues[5] = new String[]{guid};
 
                   activities.addDocumentReference(documentIdentifier + "/" + attachmentName.getValue(),
-                    documentIdentifier, null, dataNames, dataValues);
+                    documentIdentifier, null, attachmentDataNames, dataValues);
                   
                 }
               }
@@ -1458,7 +1465,8 @@ public class SharePointRepository extend
                         data.addField(fieldName,fieldData);
                       }
                     }
-
+                    data.addField("GUID",guid);
+                    
                     activities.ingestDocument( documentIdentifier, version, itemUrl , data
);
                   }
                   finally
@@ -1506,18 +1514,41 @@ public class SharePointRepository extend
                 if (createdDate.getTime() == 0L)
                   createdDate = null;
 
-                // Fetch and index.  This also filters documents based on output connector
restrictions.
-                String fileUrl = serverUrl + encodePath(urlBuffer.toString());
-                String fetchUrl = fileUrl;
-                if (!fetchAndIndexFile(activities, documentIdentifier, version, fileUrl,
fetchUrl,
-                  accessTokens, denyTokens, createdDate, modifiedDate, null, sDesc))
+                // We need the list ID, which we've already fetched, so grab that from the
parent data.
+                String[] guids = activities.retrieveParentData(documentIdentifier, "guids");
+                String guid;
+                if (guids.length >= 1)
+                  guid = guids[0];
+                else
+                  guid = null;
+                
+                if (guid != null)
+                {
+                  String url = urlBuffer.toString();
+                  int lastIndex = url.lastIndexOf("/");
+                  guid = guid + ":" + url.substring(lastIndex+1);
+                  
+                  // Fetch and index.  This also filters documents based on output connector
restrictions.
+                  String fileUrl = serverUrl + encodePath(url);
+                  String fetchUrl = fileUrl;
+                  if (!fetchAndIndexFile(activities, documentIdentifier, version, fileUrl,
fetchUrl,
+                    accessTokens, denyTokens, createdDate, modifiedDate, null, guid, sDesc))
+                  {
+                    // Document not indexed for whatever reason
+                    activities.deleteDocument(documentIdentifier,version);
+                    i++;
+                    continue;
+                  }
+                }
+                else
                 {
-                  // Document not indexed for whatever reason
+                  if (Logging.connectors.isDebugEnabled())
+                    Logging.connectors.debug("SharePoint: Skipping attachment '"+documentIdentifier+"'
because no parent guid found");
                   activities.deleteDocument(documentIdentifier,version);
                   i++;
                   continue;
                 }
-
+                
               }
             }
           }
@@ -1644,6 +1675,11 @@ public class SharePointRepository extend
               if (createdDate.getTime() == 0L)
                 createdDate = null;
               
+              // Document GUID (for metadata)
+              StringBuilder guidBuffer = new StringBuilder();
+              startPosition = unpack(guidBuffer,version,startPosition,'+');
+              String guid = guidBuffer.toString();
+
               // Generate the URL we are going to use
               String fileUrl = fileBaseUrl + encodedDocumentPath;
               if (Logging.connectors.isDebugEnabled())
@@ -1686,7 +1722,7 @@ public class SharePointRepository extend
 
               // Fetch and index.  This also filters documents based on output connector
restrictions.
               if (!fetchAndIndexFile(activities, documentIdentifier, version, fileUrl, serverUrl
+ encodedServerLocation + encodedDocumentPath,
-                acls, denyAcls, createdDate, modifiedDate, metadataValues, sDesc))
+                acls, denyAcls, createdDate, modifiedDate, metadataValues, guid, sDesc))
               {
                 // Document not indexed for whatever reason
                 activities.deleteDocument(documentIdentifier,version);
@@ -1782,7 +1818,7 @@ public class SharePointRepository extend
   */
   protected boolean fetchAndIndexFile(IProcessActivity activities, String documentIdentifier,
String version,
     String fileUrl, String fetchUrl, ArrayList acls, ArrayList denyAcls, Date createdDate,
Date modifiedDate,
-    Map<String,String> metadataValues, SystemMetadataDescription sDesc)
+    Map<String,String> metadataValues, String guid, SystemMetadataDescription sDesc)
     throws ManifoldCFException, ServiceInterruption
   {
     // Before we fetch, confirm that the output connector will accept the document
@@ -1923,6 +1959,8 @@ public class SharePointRepository extend
                     data.addField(fieldName,fieldData);
                   }
                 }
+                data.addField("GUID",guid);
+                
                 activities.ingestDocument( documentIdentifier, version, fileUrl , data );
                 return true;
               }
@@ -2154,7 +2192,7 @@ public class SharePointRepository extend
     }
   }
   
-  protected final static String[] listItemStreamDataNames = new String[]{"accessTokens",
"denyTokens", "guids", "fields", "ids"};
+  protected final static String[] listItemStreamDataNames = new String[]{"accessTokens",
"denyTokens", "guids", "fields"};
 
   protected class ListItemStream implements IFileStream
   {
@@ -2214,26 +2252,6 @@ public class SharePointRepository extend
               // The way I've chosen to do this is to use a triple slash at that point, as
a separator.
               String modifiedPath = relPath.substring(0,siteListPath.length()) + "//" + relPath.substring(siteListPath.length());
               
-              // Evil hack!!!
-              // Come up with the ID based on the URL.  This SHOULD come from SharePoint
via addFile, above, but
-              // this requires a new release of the plugin for SharePoint 2010, and SPSProxyHelper
revision and testing on SharePoint 2007.
-              String itemRef = relPath.substring(siteListPath.length());
-              String itemID;
-              if (itemRef.length() > 1)
-              {
-                int undIndex = itemRef.indexOf("_",1);
-                if (undIndex != -1)
-                  itemID = itemRef.substring(1,undIndex);
-                else
-                  itemID = itemRef.substring(1);
-              }
-              else
-                itemID = null;
-
-              if (itemID == null)
-                dataValues[4] = new String[0];
-              else
-                dataValues[4] = new String[]{itemID};
               activities.addDocumentReference( modifiedPath, documentIdentifier, null, listItemStreamDataNames,
dataValues );
             }
             else



Mime
View raw message